diff --git a/docs-gen/content/docs/tools/cli.md b/docs-gen/content/docs/tools/cli.md index 72ea15bf..67991dfb 100644 --- a/docs-gen/content/docs/tools/cli.md +++ b/docs-gen/content/docs/tools/cli.md @@ -148,6 +148,304 @@ s2dm compose -s schema.graphql --expanded-instances -o output.graphql ## Export Commands +### JSON Tree + +The `export json` command translates a GraphQL schema into a **hierarchical JSON tree** — a nested structure of branches and leaf signals. It is the primary S2DM output format and serves as the canonical human-readable representation of your domain model. + +#### Key Features + +- **Branch / leaf hierarchy**: Object types become branch nodes with a `children` map; scalar and enum fields become leaf nodes +- **Schema-intrinsic fields by default**: Only information encoded in the GraphQL schema itself is written — `description`, `datatype`, `min`, `max`, `unit`, `deprecated` +- **Raw datatype names**: Scalar and enum names are used as-is (e.g. `Float`, `Boolean`, `UInt8`, `GearPosition`) +- **Instance dimensions from `@instanceTag`**: When a field references an `@instanceTag` type, it is excluded as a child and its enum dimensions are written as an `instances` array on the parent branch instead +- **VSS metadata overlay** (`--vspec-meta`): Activates an optional YAML-based overlay that adds `type`, `comment`, `allowed`, `instances`, and other VSS-compatible keys — keyed by fully-qualified signal name (FQN) +- **VSS struct type handling** (`--vspec-meta`): Fields referencing `@vspec(element: STRUCT)` types are emitted as leaf nodes whose `datatype` is the struct's FQN, not expanded branches. Struct type definitions are rendered in a separate top-level `ComplexDataTypes` section built from the YAML +- **Root Type Filtering**: Use `--root-type` to export only one branch and its descendants +- **Selection Query**: Use `--selection-query` to narrow the export to a specific set of fields +- **Naming Configuration**: Use `--naming-config` to rename types and fields during export + +#### Usage + +```bash +s2dm export json -s -o +``` + +#### Options + +- `-s, --schema PATH`: GraphQL schema file or directory (required, can be specified multiple times) +- `-o, --output FILE`: Output JSON file path (required) +- `-r, --root-type TEXT`: Export only this type and its descendants (optional) +- `-q, --selection-query PATH`: GraphQL query file to filter exported fields (optional) +- `-n, --naming-config PATH`: YAML naming configuration for transforming names during export (optional) +- `-e, --expanded-instances`: Unfold `@instanceTag` arrays into nested types before exporting (optional) +- `--vspec-meta PATH`: YAML file with FQN-indexed VSS metadata overlay (optional — activates vspec-meta mode) + +#### Default Mode + +By default, only schema-intrinsic information is included. No `@vspec` annotations are processed and no `"type"` key is added to branch nodes. + +Given this schema: + +```graphql +directive @range(min: Float, max: Float) on FIELD_DEFINITION +directive @instanceTag on OBJECT + +enum SideEnum { LEFT RIGHT } + +type MirrorTag @instanceTag { + side: SideEnum +} + +type Mirror { + """Mirror pan as a percent.""" + pan: Float @range(min: -100, max: 100) +} + +type Body { + """All mirrors.""" + mirrors: Mirror + instanceTag: MirrorTag +} + +type Vehicle { + body: Body +} +``` + +Running: + +```bash +s2dm export json -s schema.graphql -o output.json -r Vehicle +``` + +Produces: + +```json +{ + "Vehicle": { + "children": { + "body": { + "children": { + "mirrors": { + "children": { + "pan": { + "description": "Mirror pan as a percent.", + "datatype": "Float", + "min": -100, + "max": 100 + } + }, + "description": "All mirrors.", + "instances": [ + ["LEFT", "RIGHT"] + ] + } + } + } + } + } +} +``` + +Key behaviours to note: + +- `instanceTag` does **not** appear as a child of `mirrors` +- Instead, the enum values of `SideEnum` are extracted and written as `"instances"` on `mirrors` +- Multi-dimensional tags produce one list per enum field (e.g. a `row` + `side` tag produces `[["ROW1","ROW2"], ["LEFT","RIGHT"]]`) + +#### VSS Metadata Overlay (`--vspec-meta`) + +Passing `--vspec-meta` activates vspec-meta mode. A flat YAML file keyed by dot-separated FQN is merged on top of each matching node. This enables producing VSS-compatible output without encoding VSS-specific information directly in the GraphQL schema. + +```bash +s2dm export json -s schema.graphql -o output.json -r Vehicle --vspec-meta vspec_lookup.yaml +``` + +The YAML file maps FQNs to dictionaries of keys to overlay: + +```yaml +Vehicle.Body.Mirrors: + type: branch + instances: ["DriverSide", "PassengerSide"] + comment: Inherited from VSS 5.0 + +Vehicle.Body.Mirrors.Pan: + type: sensor + unit: percent +``` + +In vspec-meta mode the following additional behaviour is enabled: + +- `"type"` is added to every branch node — derived from `@vspec(element: ...)` on the GraphQL type itself (e.g. `branch`, `struct`), falling back to `"branch"` if absent. The YAML overlay can still override it. +- Fields with `@vspec(fqn: ...)` have their output key renamed to the last segment of the FQN +- Enum fields annotated with `@vspec` gain an `"allowed"` list from the enum values +- All YAML keys for a matching FQN entry are merged into the node, with the YAML taking precedence +- Fields referencing a `@vspec(element: STRUCT)` type are exported as **leaf nodes** whose `datatype` is the struct's FQN (e.g. `VehicleDataTypes.TestBranch1.NestedStruct`) rather than expanded branch nodes +- If the YAML contains a `ComplexDataTypes` key, its FQN-indexed entries are assembled into a nested `ComplexDataTypes` section in the output (see [VSS Struct Types](#vss-struct-types-vspec-meta-only) below) + +#### Output Structure + +Both modes share the same structural convention: + +```json +{ + "": { + "description": "...", + "children": { + "": { + "description": "...", + "datatype": "", + "unit": "...", + "min": 0, + "max": 100, + "deprecated": "..." + }, + "": { + "children": { ... }, + "description": "...", + "instances": [["VAL1", "VAL2"], ...] + } + } + } +} +``` + +Leaf node keys (written when the corresponding information is present in the schema): + +| Key | Source | +|-----|--------| +| `description` | GraphQL field docstring | +| `datatype` | Scalar or enum type name (`Float`, `UInt8`, `GearPosition`, `String[]`, …) | +| `min` | `@range(min: …)` directive | +| `max` | `@range(max: …)` directive | +| `unit` | Field argument `unit` default value | +| `deprecated` | GraphQL `@deprecated` reason | + +Branch node keys: + +| Key | Source | +|-----|--------| +| `children` | Object type fields | +| `description` | GraphQL field docstring | +| `instances` | Enum dimensions of the `@instanceTag` type (default mode) or YAML overlay (vspec-meta mode) | +| `type` | vspec-meta mode only — from `@vspec(element: ...)` on the type (e.g. `branch`, `struct`), or YAML overlay | +| `comment` | vspec-meta mode only — from YAML entry | + +#### Examples + +#### VSS Struct Types (vspec-meta only) + +When a GraphQL schema contains `@vspec(element: STRUCT)` types (representing VSS struct datatypes), vspec-meta mode handles them differently from regular branch types: + +- **Fields referencing a struct type become leaf nodes** — the field is not expanded into a branch; instead it gets a `datatype` equal to the struct's FQN as recorded in `@vspec(fqn: ...)` on the struct type. A `[]` suffix is appended for list fields. +- **Struct types are excluded from the main export** — they do not appear as top-level keys alongside signal branches. +- **A `ComplexDataTypes` section is built from the YAML** — when the `--vspec-meta` YAML contains a `ComplexDataTypes` key, its FQN-indexed entries are assembled into a nested hierarchy and added to the output as `result["ComplexDataTypes"]`. + +Example YAML with a `ComplexDataTypes` section: + +```yaml +A: + description: Branch A. + type: branch + +A.NestedStructSensor: + datatype: VehicleDataTypes.TestBranch1.NestedStruct + description: A rich sensor with user-defined data type. + type: sensor + +ComplexDataTypes: + VehicleDataTypes: + description: Top-level branch for vehicle data types. + type: branch + VehicleDataTypes.TestBranch1: + description: Test branch with structs and properties definitions + type: branch + VehicleDataTypes.TestBranch1.NestedStruct: + description: A struct - Nested + type: struct + VehicleDataTypes.TestBranch1.NestedStruct.x: + datatype: double + description: x property + min: -10 + type: property +``` + +Produces: + +```json +{ + "A": { + "children": { + "NestedStructSensor": { + "description": "A rich sensor with user-defined data type.", + "datatype": "VehicleDataTypes.TestBranch1.NestedStruct", + "type": "sensor" + } + }, + "description": "Branch A.", + "type": "branch" + }, + "ComplexDataTypes": { + "VehicleDataTypes": { + "description": "Top-level branch for vehicle data types.", + "type": "branch", + "children": { + "TestBranch1": { + "description": "Test branch with structs and properties definitions", + "type": "branch", + "children": { + "NestedStruct": { + "description": "A struct - Nested", + "type": "struct", + "children": { + "x": { + "datatype": "double", + "description": "x property", + "min": -10, + "type": "property" + } + } + } + } + } + } + } + } +} +``` + +> **Default mode (no `--vspec-meta`)**: `@vspec(element: STRUCT)` types are treated as plain branches and expanded inline. Fields referencing them are expanded as nested `children` rather than leaf nodes with a struct FQN datatype. Field names reflect the camelCase names in the GraphQL schema rather than the original VSS snake_case names. + +#### Examples + +##### Export a Root Type + +```bash +s2dm export json -s schema.graphql -o vehicle.json -r Vehicle +``` + +##### Export with VSS Metadata Overlay + +```bash +s2dm export json \ + -s schema.graphql \ + -o vehicle_vss.json \ + -r Vehicle \ + --vspec-meta vspec_lookup.yaml +``` + +##### Filter to a Subset of Fields + +```bash +s2dm export json -s schema.graphql -o adas.json -q adas_query.graphql +``` + +For help: + +```bash +s2dm export json --help +``` + ### JSON Schema This exporter translates the given GraphQL schema to [JSON Schema](https://json-schema.org/) format. @@ -1540,6 +1838,169 @@ s2dm export avro schema --help s2dm export avro protocol --help ``` +### MongoDB BSON Validators + +This exporter generates [MongoDB collection validators](https://www.mongodb.com/docs/manual/core/schema-validation/) from your S2DM schema. Each object type in your schema becomes a separate JSON file that MongoDB can use to validate documents as they are inserted or updated. + +> In other words: once you export and register the validator, MongoDB will reject any document that does not match the shape defined in your GraphQL schema. + +#### Usage + +```bash +s2dm export mongodb \ + --schema tests/data/spec/common.graphql \ + --schema my_schema.graphql \ + --output ./validators +``` + +This writes one file per object type into `./validators/`. For example, a schema with `ChargingStation` and `Address` types produces `ChargingStation.json` and `Address.json`. + +#### Example + +Given this schema: + +```graphql +type ChargingStation { + id: ID! + name: String! + maxPowerKw: Float! + connectors: [ConnectorKind!]! + address: Address +} + +type Address { + street: String + city: String +} + +enum ConnectorKind { TYPE_A TYPE_B TYPE_C } +``` + +The exporter produces `ChargingStation.json`: + +```json +{ + "bsonType": "object", + "additionalProperties": true, + "properties": { + "id": { "bsonType": "objectId" }, + "name": { "bsonType": "string" }, + "maxPowerKw": { "bsonType": "double" }, + "connectors": { + "bsonType": "array", + "items": { "bsonType": "string", "enum": ["TYPE_A", "TYPE_B", "TYPE_C"] } + }, + "address": { + "bsonType": ["object", "null"], + "additionalProperties": true, + "properties": { + "street": { "bsonType": ["string", "null"] }, + "city": { "bsonType": ["string", "null"] } + } + } + }, + "required": ["id", "name", "maxPowerKw", "connectors"] +} +``` + +Key things to notice: + +- `Address` is embedded directly inside `ChargingStation` — MongoDB validators work this way by design, there are no cross-collection references +- `ConnectorKind` is embedded as an allowed-values list +- Optional fields (no `!`) do not appear in `required` and accept `null` +- `additionalProperties: true` means extra fields beyond those in the schema are allowed by default + +#### Restricting Extra Fields (`--properties-config`) + +By default, documents may contain fields not defined in your schema. Use `--properties-config` to nominate specific types or embedded objects that should reject unknown fields. + +Create a YAML file listing which objects should be strict: + +```yaml +# strict.yaml +- ChargingStation # the top-level ChargingStation collection +- ChargingStation.address # only the embedded address inside ChargingStation +``` + +Then pass it to the exporter: + +```bash +s2dm export mongodb \ + --schema tests/data/spec/common.graphql \ + --schema my_schema.graphql \ + --output ./validators \ + --properties-config strict.yaml +``` + +Each listed entry will have `additionalProperties: false` in its output. The two forms can be mixed freely: + +- `TypeName` — applies to the top-level validator for that type +- `TypeName.fieldName` — applies only to the embedded object at that field; the standalone `TypeName` validator is not affected + +If a name in the config does not match any type or field in your schema, the command exits with an error before writing anything. + +#### Registering with MongoDB (`--validator`) + +By default the output files contain the bare schema content. To produce files ready for direct use with `db.createCollection()`, add `--validator`: + +```bash +s2dm export mongodb \ + --schema tests/data/spec/common.graphql \ + --schema my_schema.graphql \ + --output ./validators \ + --validator +``` + +Each file is then wrapped so you can pass it directly: + +```js +const schema = require("./validators/ChargingStation.json"); +db.createCollection("ChargingStation", { validator: schema }); +``` + +Without `--validator`, the files contain the inner schema only — useful for version control, schema registries, or tooling that adds the envelope itself. + +#### Filtering to a Single Type (`--root-type`) + +Use `--root-type` to export only one type and have all dependent types inlined into it. This is useful when you need a single self-contained validator file: + +```bash +s2dm export mongodb \ + --schema tests/data/spec/common.graphql \ + --schema my_schema.graphql \ + --output ./validators \ + --root-type ChargingStation +``` + +Only `ChargingStation.json` is written; `Address.json` is not created separately. + +#### GeoJSON Fields + +If your schema uses geographic coordinates, include the GeoJSON spec file and annotate fields with `@geoType`: + +```bash +s2dm export mongodb \ + --schema tests/data/spec/common.graphql \ + --schema tests/data/spec/geojson.graphql \ + --schema my_schema.graphql \ + --output ./validators +``` + +```graphql +type ChargingStation { + """Geographical location of the station""" + location: GeoJSON! @geoType(shape: POINT) +} +``` + +The validator for `location` will enforce that the stored value is a valid GeoJSON Point object. Supported shapes: `POINT`, `MULTIPOINT`, `LINESTRING`, `MULTILINESTRING`, `POLYGON`, `MULTIPOLYGON`. + +#### All Options + +```bash +s2dm export mongodb --help +``` + ## Common Features ### Selection Query Filtering diff --git a/src/s2dm/cli.py b/src/s2dm/cli.py index 7438926e..a140052c 100644 --- a/src/s2dm/cli.py +++ b/src/s2dm/cli.py @@ -14,7 +14,10 @@ from s2dm.concept.services import iter_all_concepts from s2dm.exporters.avro import translate_to_avro_protocol, translate_to_avro_schema from s2dm.exporters.id import IDExporter +from s2dm.exporters.json import export_to_json_tree from s2dm.exporters.jsonschema import translate_to_jsonschema +from s2dm.exporters.mongodb import translate_to_mongodb +from s2dm.exporters.mongodb.mongodb import load_properties_config, wrap_validator from s2dm.exporters.protobuf import translate_to_protobuf from s2dm.exporters.shacl import translate_to_shacl from s2dm.exporters.spec_history import SpecHistoryExporter @@ -592,6 +595,135 @@ def jsonschema( _ = output.write_text(result) +# Export -> json tree +# ---------- +@export.command(name="json") +@schema_option +@selection_query_option() +@output_option +@root_type_option +@naming_config_option +@expanded_instances_option +@click.option( + "--vspec-meta", + type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path), + default=None, + help="YAML file with FQN-indexed VSS metadata overlay (unit, type, comment, etc.).", +) +def json_tree( + schemas: list[Path], + selection_query: Path | None, + output: Path, + root_type: str | None, + naming_config: Path | None, + expanded_instances: bool, + vspec_meta: Path | None, +) -> None: + """Export GraphQL schema to a hierarchical JSON tree structure.""" + try: + annotated_schema, _, _ = load_and_process_schema( + schema_paths=schemas, + naming_config_path=naming_config, + selection_query_path=selection_query, + root_type=root_type, + expanded_instances=expanded_instances, + ) + assert_correct_schema(annotated_schema.schema) + + result = export_to_json_tree( + annotated_schema=annotated_schema, + root_type=root_type, + vspec_lookup_path=vspec_meta, + ) + except ValueError as exc: + raise click.ClickException(str(exc)) from exc + + output.parent.mkdir(parents=True, exist_ok=True) + _ = output.write_text(json.dumps(result, indent=2)) + log.info(f"Exported JSON tree to {output}") + + +# Export -> mongodb +# ---------- +@export.command +@schema_option +@selection_query_option() +@click.option( + "--output", + "-o", + type=click.Path(file_okay=False, writable=True, path_type=Path), + required=True, + help="Output directory. Writes one JSON file per type.", +) +@root_type_option +@naming_config_option +@expanded_instances_option +@click.option( + "--validator", + "-v", + is_flag=True, + default=False, + help='Wrap each schema in a MongoDB validator envelope: {"$jsonSchema": ...}.', +) +@click.option( + "--properties-config", + "-pc", + type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path), + default=None, + help=( + "YAML file listing object keys that should have additionalProperties: false. " + "Each entry is either a bare type name (e.g. 'Address') or a 'Parent.field' path " + "(e.g. 'ChargingStation.address')." + ), +) +def mongodb( + schemas: list[Path], + selection_query: Path | None, + output: Path, + root_type: str | None, + naming_config: Path | None, + expanded_instances: bool, + validator: bool, + properties_config: Path | None, +) -> None: + """Generate MongoDB BSON Schema validators from a given GraphQL schema. + + Writes one JSON file per type. Use --root-type to emit a single type only. + Pass --validator to wrap each schema in {"$jsonSchema": ...} for use with db.createCollection(). + Pass --properties-config to restrict which objects disallow extra fields. + """ + annotated_schema, _, _ = load_and_process_schema( + schema_paths=schemas, + naming_config_path=naming_config, + selection_query_path=selection_query, + root_type=root_type, + expanded_instances=expanded_instances, + ) + assert_correct_schema(annotated_schema.schema) + + add_props_false = load_properties_config(properties_config) if properties_config else None + try: + result = translate_to_mongodb(annotated_schema, add_props_false) + except ValueError as exc: + raise click.ClickException(str(exc)) from exc + output.mkdir(parents=True, exist_ok=True) + + if root_type: + # Root-type mode: emit only the root type as a bare schema in {RootType}.json + if root_type not in result: + raise click.ClickException(f"Root type '{root_type}' not found in exported types.") + schema = wrap_validator(result[root_type]) if validator else result[root_type] + out_file = output / f"{root_type}.json" + _ = out_file.write_text(json.dumps(schema, indent=2)) + log.info(f"Wrote root type '{root_type}' validator to {out_file}") + else: + for type_name, bare in result.items(): + schema = wrap_validator(bare) if validator else bare + file_path = output / f"{type_name}.json" + _ = file_path.write_text(json.dumps(schema, indent=2)) + log.info(f"Wrote {len(result)} validator file(s) to {output}") + + # Export -> avro # ---------- @click.group() diff --git a/src/s2dm/exporters/json.py b/src/s2dm/exporters/json.py new file mode 100644 index 00000000..4289e760 --- /dev/null +++ b/src/s2dm/exporters/json.py @@ -0,0 +1,559 @@ +"""JSON Tree Exporter for S2DM. + +This exporter generates hierarchical JSON tree structures from a GraphQL schema. + +Schema-intrinsic information is extracted directly from the GraphQL schema: + - description, datatype, min/max (from @range), unit (raw field arg), deprecated + +Optional VSS meta overlay (activated by passing a YAML file via `vspec_lookup_path`): + When a YAML lookup is provided, @vspec annotations are also processed: + - type from @vspec(element:...), output key renaming via @vspec(fqn:...) + - allowed values from enum fields annotated with @vspec + - all YAML entry keys are merged into the leaf node (unit, comment, default, etc.) + +Output structure example: + +{ + "Vehicle": { + "children": { + "Speed": { + "datatype": "float", + "description": "Vehicle speed", + "unit": "km/h", + "min": 0, + "max": 250 + }, + ... + }, + "description": ... + } +} +""" + +import json +from pathlib import Path +from typing import Any, cast + +import yaml +from graphql import ( + GraphQLEnumType, + GraphQLField, + GraphQLList, + GraphQLNonNull, + GraphQLObjectType, + GraphQLScalarType, + GraphQLSchema, + Undefined, + get_named_type, + is_enum_type, + is_list_type, + is_non_null_type, + is_object_type, + is_scalar_type, +) + +from s2dm import log +from s2dm.exporters.utils.annotated_schema import AnnotatedSchema +from s2dm.exporters.utils.directive import get_directive_arguments, has_given_directive +from s2dm.exporters.utils.extraction import get_all_object_types +from s2dm.exporters.utils.graphql_type import is_introspection_type + + +def load_vspec_lookup(path: Path) -> dict[str, Any]: + """Load a YAML file containing FQN-indexed VSS metadata for overlay. + + Args: + path: Path to the YAML file with FQN dot-path keys and metadata dicts as values. + + Returns: + Dictionary mapping FQN strings to metadata dicts. + """ + return yaml.safe_load(path.read_text()) or {} + + +class JsonExporter: + """Exports GraphQL schema to JSON tree structure.""" + + def __init__( + self, + schema: GraphQLSchema, + annotated_schema: AnnotatedSchema, + expanded_instances: bool = False, + vspec_lookup: dict[str, Any] | None = None, + ): + """Initialize JSON exporter. + + Args: + schema: The GraphQL schema to export + annotated_schema: Annotated schema with field metadata + expanded_instances: Whether instance tags have been expanded + vspec_lookup: Optional dict from load_vspec_lookup() for FQN-indexed overlay + """ + self.schema = schema + self.annotated_schema = annotated_schema + self.expanded_instances = expanded_instances + self.vspec_lookup = vspec_lookup + # Flat lookup: merges top-level FQN entries AND ComplexDataTypes sub-entries so that + # _apply_vspec_lookup can resolve any FQN regardless of where it lives in the YAML. + self._flat_lookup: dict[str, Any] = {} + if vspec_lookup is not None: + for _key, _val in vspec_lookup.items(): + if _key == "ComplexDataTypes" and isinstance(_val, dict): + self._flat_lookup.update(_val) + elif isinstance(_val, dict): + self._flat_lookup[_key] = _val + + def export(self, root_type: str | None = None) -> dict[str, Any]: + """Export schema to JSON tree structure. + + Args: + root_type: Optional root type name to export. If None, exports all top-level types. + + Returns: + Dictionary with type names as keys and tree structures as values + """ + if root_type: + # Single root export + if root_type not in self.schema.type_map: + raise ValueError(f"Root type '{root_type}' not found in schema") + + gql_type = self.schema.type_map[root_type] + if not is_object_type(gql_type): + raise ValueError(f"Root type '{root_type}' is not an object type") + + visited: set[str] = set() + return {root_type: self._build_branch_node(cast(GraphQLObjectType, gql_type), visited)} + else: + # Multi-root export - all non-introspection, non-operation object types + operation_type_names = { + t.name + for t in [self.schema.query_type, self.schema.mutation_type, self.schema.subscription_type] + if t is not None + } + user_types = [ + t + for t in get_all_object_types(self.schema) + if not is_introspection_type(t.name) + and t.name not in operation_type_names + # In vspec-meta mode: STRUCT types are emitted via the ComplexDataTypes section + and not (self.vspec_lookup is not None and self._is_vspec_struct_type(t)) + ] + result: dict[str, Any] = {} + for obj_type in user_types: + visited = set() + result[obj_type.name] = self._build_branch_node(obj_type, visited) + # In vspec-meta mode: build ComplexDataTypes section from YAML if present + if self.vspec_lookup is not None and "ComplexDataTypes" in self.vspec_lookup: + complex_entries = self.vspec_lookup["ComplexDataTypes"] + if isinstance(complex_entries, dict): + result["ComplexDataTypes"] = self._build_fqn_tree(complex_entries) + return result + + def _build_branch_node(self, gql_type: GraphQLObjectType, visited: set[str]) -> dict[str, Any]: + """Build a branch node (ObjectType) with children. + + Args: + gql_type: The GraphQL ObjectType to process + visited: Set of visited type names for cycle detection + + Returns: + Dictionary representing a branch node with children + """ + # Check for cycles + if gql_type.name in visited: + log.warning(f"Cycle detected for type '{gql_type.name}', using $ref") + return {"$ref": gql_type.name} + + visited.add(gql_type.name) + + # Build base branch node + node: dict[str, Any] = {"children": {}} + # In vspec-meta mode, derive "type" from the type's own @vspec(element:...) if present, + # falling back to "branch". + if self.vspec_lookup is not None: + if has_given_directive(gql_type, "vspec"): + type_vspec_args = get_directive_arguments(gql_type, "vspec") + element = type_vspec_args.get("element") + node["type"] = str(element).lower() if element else "branch" + else: + node["type"] = "branch" + + # Add description if present + if gql_type.description: + node["description"] = gql_type.description + + # Check for @metadata directive on the type itself + if has_given_directive(gql_type, "metadata"): + metadata_args = get_directive_arguments(gql_type, "metadata") + if "comment" in metadata_args: + node["comment"] = metadata_args["comment"] + + # Process all fields + for field_name, field in gql_type.fields.items(): + # Handle @instanceTag fields — they encode instance structure, not data children + field_named_type = get_named_type(field.type) + if is_object_type(field_named_type) and has_given_directive( + cast(GraphQLObjectType, field_named_type), "instanceTag" + ): + # In default mode: derive instances from enum dimensions inside the tag + if self.vspec_lookup is None: + dimensions = self._extract_instances_from_instance_tag(cast(GraphQLObjectType, field_named_type)) + if dimensions: + node["instances"] = dimensions + continue + + # Resolve output key and FQN from @vspec(fqn:...) — only in vspec-meta mode + output_key = field_name + fqn: str | None = None + if self.vspec_lookup is not None and has_given_directive(field, "vspec"): + vspec_args = get_directive_arguments(field, "vspec") + raw_fqn = vspec_args.get("fqn") + if raw_fqn and isinstance(raw_fqn, str): + fqn = raw_fqn + if "." in fqn: + output_key = fqn.rsplit(".", 1)[-1] + child_node = self._process_field_to_node(field_name, field, gql_type, visited.copy(), fqn=fqn) + node["children"][output_key] = child_node + + return node + + def _build_struct_field_leaf_node( + self, + field: GraphQLField, + struct_type: GraphQLObjectType, + ) -> dict[str, Any]: + """Build a leaf node for a field whose underlying type is a @vspec STRUCT. + + In VSS, a sensor/property with a struct datatype is a leaf — the struct + definition lives separately in the types tree, and the signal only carries + the FQN of the struct as its datatype. + + Args: + field: The GraphQL field referencing the struct type. + struct_type: The @vspec(element: STRUCT) ObjectType being referenced. + + Returns: + Leaf node dict with description, datatype (struct FQN), optional + min/max/unit/deprecated, and type from the field's @vspec element. + """ + node: dict[str, Any] = {} + + if field.description: + node["description"] = field.description.strip() + + # Detect list wrapper (NonNull → List) + ft = field.type + if is_non_null_type(ft): + ft = cast("GraphQLNonNull[Any]", ft).of_type + is_list = is_list_type(ft) + + # datatype = FQN of the struct type from @vspec(fqn:...) on the struct + struct_vspec_args = get_directive_arguments(struct_type, "vspec") + struct_fqn = struct_vspec_args.get("fqn") + if struct_fqn and isinstance(struct_fqn, str): + node["datatype"] = struct_fqn + ("[]" if is_list else "") + + # @range from this field + if has_given_directive(field, "range"): + range_args = get_directive_arguments(field, "range") + if "min" in range_args and range_args["min"] is not None: + node["min"] = range_args["min"] + if "max" in range_args and range_args["max"] is not None: + node["max"] = range_args["max"] + + # unit from field argument + if "unit" in field.args: + unit_arg = field.args["unit"] + if unit_arg.default_value is not None and unit_arg.default_value is not Undefined: + node["unit"] = str(unit_arg.default_value) + + # deprecated + if field.deprecation_reason: + node["deprecated"] = field.deprecation_reason + + # type from the field's own @vspec(element:...) (sensor, struct_property, etc.) + if has_given_directive(field, "vspec"): + vspec_args = get_directive_arguments(field, "vspec") + element = vspec_args.get("element") + if element: + node["type"] = str(element).lower() + + return node + + def _is_vspec_struct_type(self, obj_type: GraphQLObjectType) -> bool: + """Return True if this type carries @vspec(element: STRUCT).""" + if not has_given_directive(obj_type, "vspec"): + return False + return str(get_directive_arguments(obj_type, "vspec").get("element", "")).upper() == "STRUCT" + + def _build_fqn_tree(self, fqn_entries: dict[str, Any]) -> dict[str, Any]: + """Build a nested tree from a flat dict of FQN-keyed metadata entries. + + Used to produce the ComplexDataTypes output section entirely from YAML data + without consulting the GraphQL schema. + + Args: + fqn_entries: Flat dict keyed by dot-separated FQN strings; values are + metadata dicts (type, datatype, description, min, max, …). + + Returns: + Nested dict representing the hierarchy implied by the FQN paths. + """ + nodes: dict[str, dict[str, Any]] = {} + + for fqn in sorted(fqn_entries.keys(), key=lambda k: k.count(".")): + metadata = fqn_entries[fqn] + node: dict[str, Any] = {k: v for k, v in metadata.items() if v is not None} + + # Add children dict when any direct child exists in the entries + prefix = fqn + "." + if any(k.startswith(prefix) and k.count(".") == fqn.count(".") + 1 for k in fqn_entries): + node["children"] = {} + + nodes[fqn] = node + + # Attach to parent + parts = fqn.split(".") + if len(parts) > 1: + parent_fqn = ".".join(parts[:-1]) + if parent_fqn in nodes: + parent_node = nodes[parent_fqn] + if "children" not in parent_node: + parent_node["children"] = {} + parent_node["children"][parts[-1]] = node + + # Return only root-level nodes (FQNs with no dots) + return {fqn: nodes[fqn] for fqn in nodes if "." not in fqn} + + def _extract_instances_from_instance_tag(self, instance_tag_type: GraphQLObjectType) -> list[list[str]]: + """Extract instance dimensions from an @instanceTag type's enum fields. + + Each enum-typed field in the instanceTag type represents one dimension; + its values (in definition order) form the list for that dimension. + + Args: + instance_tag_type: The GraphQL ObjectType carrying the @instanceTag directive. + + Returns: + List of dimensions, each a list of enum value name strings. + """ + dimensions: list[list[str]] = [] + for _, field in instance_tag_type.fields.items(): + field_type = get_named_type(field.type) + if is_enum_type(field_type): + enum_type = cast(GraphQLEnumType, field_type) + dimensions.append(list(enum_type.values.keys())) + return dimensions + + def _apply_vspec_lookup(self, node: dict[str, Any], fqn: str) -> None: + """Overlay node properties from the vspec_lookup YAML using the given FQN. + + Merges all keys from the YAML entry into `node`, overwriting existing values. + Supported keys: type, datatype, description, unit, min, max, allowed, comment, + default, deprecated. + + Args: + node: The leaf node dict to update in-place. + fqn: The fully-qualified name to look up in self.vspec_lookup. + """ + if self.vspec_lookup is None: + return + entry = self._flat_lookup.get(fqn) + if not entry or not isinstance(entry, dict): + return + for key, value in entry.items(): + if value is not None: + node[key] = value + + def _process_field_to_node( + self, + field_name: str, + field: GraphQLField, + parent_type: GraphQLObjectType, + visited: set[str], + fqn: str | None = None, + ) -> dict[str, Any]: + """Process a GraphQL field and return its node representation. + + Args: + field_name: The name of the field + field: The GraphQL field to process + parent_type: The parent ObjectType containing this field + visited: Set of visited type names for cycle detection + fqn: Optional fully-qualified name from @vspec for vspec_lookup overlay + + Returns: + Node dictionary + """ + field_type = get_named_type(field.type) + + # Check if field is an ObjectType (branch node) + if is_object_type(field_type): + obj_type = cast(GraphQLObjectType, field_type) + + # Skip @instanceTag types — guarded upstream, but kept as safety net + if has_given_directive(obj_type, "instanceTag"): + log.warning(f"Skipping @instanceTag type '{obj_type.name}' as field") + return {"children": {}} + + # In vspec-meta mode: fields referencing a @vspec(element: STRUCT) type are leaf + # nodes whose datatype is the struct's FQN, not expanded branch nodes. + if self.vspec_lookup is not None and has_given_directive(obj_type, "vspec"): + struct_vspec = get_directive_arguments(obj_type, "vspec") + if str(struct_vspec.get("element", "")).upper() == "STRUCT": + leaf_node = self._build_struct_field_leaf_node(field, obj_type) + if fqn: + self._apply_vspec_lookup(leaf_node, fqn) + return leaf_node + + # Build branch node recursively + branch_node = self._build_branch_node(obj_type, visited) + + # Add description from field's docstring + if field.description: + branch_node["description"] = field.description.strip() + + # Check for instances metadata (from annotated schema — default mode) + field_meta = self.annotated_schema.field_metadata.get((parent_type.name, field_name)) + if field_meta and field_meta.instances: + branch_node["instances"] = field_meta.instances + + # Apply YAML overlay for branch node (sets instances, type, comment, etc.) + if fqn and self.vspec_lookup is not None: + self._apply_vspec_lookup(branch_node, fqn) + + return branch_node + + # Leaf node (scalar or enum) + leaf_node = self._extract_leaf_properties(field) + + # Apply YAML overlay if a vspec_lookup is configured + if fqn and self.vspec_lookup is not None: + self._apply_vspec_lookup(leaf_node, fqn) + + return leaf_node + + def _extract_leaf_properties(self, field: GraphQLField) -> dict[str, Any]: + """Extract properties from a leaf field (scalar or enum). + + Default mode (no vspec_lookup): extracts only schema-intrinsic fields: + description, datatype, min/max (from @range), unit (from field arg), deprecated. + + Vspec-meta mode (vspec_lookup is set): also processes @vspec to add + type (from element) and allowed (for enum fields). The YAML overlay is + applied afterwards in _process_field_to_node. + + Args: + field: The GraphQL field to extract properties from + + Returns: + Dictionary with leaf node properties + """ + node: dict[str, Any] = {} + + # Get the actual field type (unwrap NonNull/List wrappers) + field_type = field.type + is_non_null = is_non_null_type(field_type) + is_list = False + + if is_non_null: + field_type = cast("GraphQLNonNull[Any]", field_type).of_type + + if is_list_type(field_type): + is_list = True + field_type = cast("GraphQLList[Any]", field_type).of_type + # Unwrap NonNull inside list if present + if is_non_null_type(field_type): + field_type = cast("GraphQLNonNull[Any]", field_type).of_type + + named_type = get_named_type(field_type) + + # Add description (always included if present) + if field.description: + node["description"] = field.description + + # Determine datatype (always included) + if is_scalar_type(named_type): + scalar = cast(GraphQLScalarType, named_type) + datatype = scalar.name + if is_list: + datatype += "[]" + node["datatype"] = datatype + elif is_enum_type(named_type): + datatype = named_type.name + if is_list: + datatype += "[]" + node["datatype"] = datatype + + # Extract from @range directive + if has_given_directive(field, "range"): + range_args = get_directive_arguments(field, "range") + if "min" in range_args and range_args["min"] is not None: + node["min"] = range_args["min"] + if "max" in range_args and range_args["max"] is not None: + node["max"] = range_args["max"] + + # Extract unit from field arguments. + if "unit" in field.args: + unit_arg = field.args["unit"] + if unit_arg.default_value is not None and unit_arg.default_value is not Undefined: + node["unit"] = str(unit_arg.default_value) + + # Add deprecated if present + if field.deprecation_reason: + node["deprecated"] = field.deprecation_reason + + # Handle @vspec directive — only in vspec-meta mode (vspec_lookup is configured) + if self.vspec_lookup is not None and has_given_directive(field, "vspec"): + vspec_args = get_directive_arguments(field, "vspec") + + # Extract element type (maps to "type" in output) + if "element" in vspec_args and vspec_args["element"]: + # Convert from VSPEC element enum to lowercase string + element = str(vspec_args["element"]).lower() + node["type"] = element + + # For enum types with @vspec, add allowed values + if is_enum_type(named_type): + enum_type = cast(GraphQLEnumType, named_type) + node["allowed"] = list(enum_type.values.keys()) + + return node + + +def export_to_json_tree( + annotated_schema: AnnotatedSchema, + root_type: str | None = None, + output_file: Path | None = None, + vspec_lookup_path: Path | None = None, +) -> dict[str, Any]: + """Export GraphQL schema to JSON tree format. + + Args: + annotated_schema: The annotated schema to export + root_type: Optional root type name. If None, exports all top-level types. + output_file: Optional output file path. If provided, writes JSON to file. + vspec_lookup_path: Optional path to a YAML file with FQN-indexed metadata overlay. + When provided, leaf node properties are overwritten by matching YAML entries. + + Returns: + Dictionary representing the JSON tree + """ + vspec_lookup: dict[str, Any] | None = None + if vspec_lookup_path is not None: + vspec_lookup = load_vspec_lookup(vspec_lookup_path) + + exporter = JsonExporter( + annotated_schema.schema, + annotated_schema, + expanded_instances=False, # Determined by schema loading + vspec_lookup=vspec_lookup, + ) + + result = exporter.export(root_type=root_type) + + if output_file: + output_file.write_text(json.dumps(result, indent=2)) + log.info(f"Exported JSON tree to {output_file}") + + return result diff --git a/src/s2dm/exporters/mongodb/README.md b/src/s2dm/exporters/mongodb/README.md new file mode 100644 index 00000000..e1e9a185 --- /dev/null +++ b/src/s2dm/exporters/mongodb/README.md @@ -0,0 +1,137 @@ +# MongoDB BSON Schema Exporter + +Translates an S2DM GraphQL schema into MongoDB BSON validator schemas — one schema per exportable GraphQL type. + +## Output + +Each schema is a bare BSON object (no `$jsonSchema` wrapper) intended to be used as: + +```js +db.createCollection("MyType", { + validator: { $jsonSchema: } +}) +``` + +**Default mode** writes one `TypeName.json` per type. +**Root-type mode** (`--root-type Name`) writes only `Name.json` with no outer key wrapper. +**Validator envelope** (`--validator / -v`) wraps every output schema in `{"$jsonSchema": ...}` for direct use with `db.createCollection()`. + +## BSON Type Mapping + +### Built-in scalars + +| GraphQL | BSON | +|---------|------| +| `String` | `string` | +| `Int` | `int` | +| `Float` | `double` | +| `Boolean` | `bool` | +| `ID` | `objectId` | + +### S2DM extended scalars (`common.graphql`) + +| GraphQL | BSON | +|---------|------| +| `Int8`, `UInt8`, `Int16`, `UInt16`, `UInt32` | `int` | +| `Int64`, `UInt64` | `long` | + +Unknown scalars fall back to `string`. + +### GeoJSON scalar (`geojson.graphql`) + +The `GeoJSON` scalar with `@geoType(shape: ...)` emits a hardcoded BSON geometry schema. Without `@geoType`, a permissive object requiring only `type` and `coordinates` is emitted. + +Supported shapes: `POINT`, `MULTIPOINT`, `LINESTRING`, `MULTILINESTRING`, `POLYGON`, `MULTIPOLYGON`. + +> MongoDB `$jsonSchema` does not support `oneOf`, so only `POINT` can be fully validated at all nesting levels. Other shapes are validated at the first level only. + +## Nullability + +| GraphQL | `bsonType` | In `required`? | +|---------|-----------|----------------| +| `field: T!` | `""` | yes | +| `field: T` | `["", "null"]` | no | + +## `additionalProperties` + +Every emitted object schema always includes `additionalProperties`. By default it is `true`. Use `--properties-config` to set it to `false` for specific types or fields. + +### `--properties-config / -pc` + +Accepts a YAML file listing the object schemas that should have `additionalProperties: false`. Each entry is either: + +- A **bare type name** — applies to the top-level exported schema for that type: + ```yaml + - Address + ``` +- A **`Parent.field` path** — applies only to the inline occurrence of that field inside `Parent`, leaving the `Address` top-level schema unaffected: + ```yaml + - ChargingStation.address + ``` + +Both forms can be combined freely: + +```yaml +- Address +- Address.street +- ChargingStation.address +``` + +If any entry does not correspond to a type or field that exists in the schema, the exporter raises an error before writing any output. + +## Directive Mapping + +| Directive | BSON output | +|-----------|------------| +| `@range(min, max)` on scalar field | `minimum` / `maximum` on the field | +| `@range(min, max)` on list field | `minimum` / `maximum` inside `items` | +| `@noDuplicates` | `uniqueItems: true` | +| `@cardinality(min, max)` | `minItems` / `maxItems` | +| `@instanceTag` | type and its reference field on parent types both excluded | + +GraphQL docstrings (`field.description`, `type.description`) are emitted as `description` automatically — `@metadata(comment)` is not used here. MongoDB does not support `$comment`; `description` is included in validation error messages since MongoDB 5.1. + +### `@instanceTag` behaviour + +Without `--expanded-instances` the exporter has no way to represent instance tag structures: +- `@instanceTag` types are excluded as top-level entries +- The `instanceTag` field on parent types (the reference pointing to the tag type) is also dropped + +With `--expanded-instances` the schema loader unfolds the tag structure into concrete fields *before* the transformer runs, so neither the `@instanceTag` type nor the reference field appear in the transformer's input at all. + +## Exclusions + +Never exported as top-level entries: +- Root types (`Query`, `Mutation`, `Subscription`) +- `@instanceTag` types +- Scalars and enums (always inlined at usage site) +- GraphQL introspection types + +## Circular References + +MongoDB does not support `$ref`, so circular type references cannot be represented. The transformer raises a `ValueError` identifying the cycle before any output is written. + +## Architecture + +``` +mongodb/ +├── __init__.py # Public API: translate_to_mongodb() +├── mongodb.py # Entry points: transform(), translate_to_mongodb(), +│ # wrap_validator(), load_properties_config(), to_json_string() +└── transformer.py # MongoDBTransformer — all GraphQL → BSON logic +``` + +`MongoDBTransformer.transform()` returns `dict[str, dict]`. Each value is a bare BSON schema built by recursive inlining — no `$ref`, no `$defs`. Circular reference detection is done via a `frozenset[str]` of type names currently being resolved. + +`load_properties_config(path)` parses the `--properties-config` YAML file and returns a `frozenset[str]` of keys. Validation against the schema happens inside `MongoDBTransformer._validate_properties_config()`, called at the start of `transform()`. + +## MongoDB `$jsonSchema` restrictions + +This exporter deliberately avoids these unsupported keywords: + +- `$ref`, `$schema`, `definitions`, `$defs` +- `default`, `format`, `id` +- `integer` type (use `int` or `long`) +- `$comment` (use `description` — supported since MongoDB 5.1) + +Reference: [MongoDB JSON Schema omissions](https://www.mongodb.com/docs/manual/reference/operator/query/jsonSchema/#json-schema-omissions) diff --git a/src/s2dm/exporters/mongodb/__init__.py b/src/s2dm/exporters/mongodb/__init__.py new file mode 100644 index 00000000..fdd39238 --- /dev/null +++ b/src/s2dm/exporters/mongodb/__init__.py @@ -0,0 +1,5 @@ +"""MongoDB BSON Schema exporter module for S2DM.""" + +from .mongodb import translate_to_mongodb + +__all__ = ["translate_to_mongodb"] diff --git a/src/s2dm/exporters/mongodb/mongodb.py b/src/s2dm/exporters/mongodb/mongodb.py new file mode 100644 index 00000000..b5f3944a --- /dev/null +++ b/src/s2dm/exporters/mongodb/mongodb.py @@ -0,0 +1,84 @@ +import json +from pathlib import Path +from typing import Any + +import yaml +from graphql import GraphQLSchema + +from s2dm import log +from s2dm.exporters.utils.annotated_schema import AnnotatedSchema + +from .transformer import MongoDBTransformer + + +def load_properties_config(config_path: Path) -> frozenset[str]: + """Load a YAML properties-config file and return a frozenset of keys. + + The file is a YAML sequence of strings. Each entry is either a bare type + name (``"Address"``) or a ``Parent.field`` path + (``"ChargingStation.address"``). Only entries of those two forms are + accepted; anything else raises ``ValueError``. + + Example file:: + + - Address + - Address.street + - ChargingStation.address + + """ + raw = yaml.safe_load(config_path.read_text()) + if not isinstance(raw, list): + raise ValueError(f"Properties config '{config_path}' must be a YAML sequence, got {type(raw).__name__}.") + entries: set[str] = set() + for item in raw: + if not isinstance(item, str): + raise ValueError(f"Properties config entries must be strings, got {type(item).__name__!r}: {item!r}") + parts = item.split(".") + if len(parts) not in (1, 2) or any(p == "" for p in parts): + raise ValueError( + f"Invalid properties-config entry {item!r}. " "Expected 'TypeName' or 'TypeName.fieldName'." + ) + entries.add(item) + return frozenset(entries) + + +def transform( + graphql_schema: GraphQLSchema, + additional_props_false: frozenset[str] | None = None, +) -> dict[str, dict[str, Any]]: + """Return a bare BSON schema dict for every exportable type in the schema. + + Parameters + ---------- + graphql_schema: + The compiled GraphQL schema. + additional_props_false: + Keys for which ``additionalProperties: false`` should be emitted. + See :class:`~.transformer.MongoDBTransformer` for the key format. + """ + log.info(f"Transforming GraphQL schema to MongoDB BSON validators ({len(graphql_schema.type_map)} types)") + result = MongoDBTransformer(graphql_schema, additional_props_false).transform() + log.info(f"Generated {len(result)} MongoDB BSON schema(s)") + return result + + +def translate_to_mongodb( + annotated_schema: AnnotatedSchema, + additional_props_false: frozenset[str] | None = None, +) -> dict[str, dict[str, Any]]: + """Unwrap an ``AnnotatedSchema`` and delegate to :func:`transform`.""" + return transform(annotated_schema.schema, additional_props_false) + + +def wrap_validator(schema: dict[str, Any]) -> dict[str, Any]: + """Wrap a bare BSON schema in the MongoDB collection validator envelope. + + Produces ``{"$jsonSchema": schema}`` for direct use with + ``db.createCollection(name, {validator: {$jsonSchema: ...}})``. + """ + return {"$jsonSchema": schema} + + +def to_json_string(schemas: dict[str, dict[str, Any]]) -> str: + """Serialize the full validator map to a pretty-printed JSON string.""" + return json.dumps(schemas, indent=2) diff --git a/src/s2dm/exporters/mongodb/transformer.py b/src/s2dm/exporters/mongodb/transformer.py new file mode 100644 index 00000000..3a6e54a0 --- /dev/null +++ b/src/s2dm/exporters/mongodb/transformer.py @@ -0,0 +1,550 @@ +from typing import Any, cast + +from graphql import ( + GraphQLEnumType, + GraphQLField, + GraphQLInterfaceType, + GraphQLList, + GraphQLNonNull, + GraphQLObjectType, + GraphQLScalarType, + GraphQLSchema, + GraphQLType, + GraphQLUnionType, + is_enum_type, + is_interface_type, + is_list_type, + is_non_null_type, + is_object_type, + is_scalar_type, + is_union_type, +) + +from s2dm import log +from s2dm.exporters.utils.directive import get_directive_arguments, has_given_directive +from s2dm.exporters.utils.extraction import get_all_named_types +from s2dm.exporters.utils.field import get_cardinality +from s2dm.exporters.utils.graphql_type import is_root_type +from s2dm.exporters.utils.instance_tag import ( + is_instance_tag_field, + is_valid_instance_tag_field, +) + +# MongoDB supports draft 4 of JSON Schema with BSON type extensions. +# Unsupported keywords: $ref, $schema, definitions, default, format, id, integer type. +# Use bsonType instead of type; use int/long instead of integer. + +# --------------------------------------------------------------------------- +# GeoJSON BSON schemas (RFC 7946 — https://www.rfc-editor.org/rfc/rfc7946) +# Used when a field is typed as the GeoJSON scalar with @geoType directive. +# Note: MongoDB $jsonSchema enforces the first level precisely; deeper nesting +# is expressed best-effort since oneOf/anyOf are not supported. +# --------------------------------------------------------------------------- + +_GEOJSON_POINT_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["Point"]}, + "coordinates": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + "maxItems": 2, + }, + }, +} + +_GEOJSON_MULTIPOINT_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["MultiPoint"]}, + "coordinates": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + }, + }, + }, +} + +_GEOJSON_LINESTRING_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["LineString"]}, + "coordinates": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + }, + "minItems": 2, + }, + }, +} + +_GEOJSON_MULTILINESTRING_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["MultiLineString"]}, + "coordinates": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + }, + "minItems": 2, + }, + }, + }, +} + +_GEOJSON_POLYGON_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["Polygon"]}, + "coordinates": { + "bsonType": "array", + "items": { + # Each linear ring: array of positions, first == last, minItems 4 + "bsonType": "array", + "items": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + }, + "minItems": 4, + }, + }, + }, +} + +_GEOJSON_MULTIPOLYGON_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string", "enum": ["MultiPolygon"]}, + "coordinates": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": {"bsonType": "double"}, + "minItems": 2, + }, + "minItems": 4, + }, + }, + }, + }, +} + +# Permissive fallback: GeoJSON scalar without @geoType. +# Requires 'type' (string) and 'coordinates' (array) but does not constrain geometry shape. +_GEOJSON_GENERIC_SCHEMA: dict[str, Any] = { + "bsonType": "object", + "required": ["type", "coordinates"], + "properties": { + "type": {"bsonType": "string"}, + "coordinates": {"bsonType": "array"}, + }, +} + +_GEOJSON_SCHEMAS: dict[str, dict[str, Any]] = { + "POINT": _GEOJSON_POINT_SCHEMA, + "MULTIPOINT": _GEOJSON_MULTIPOINT_SCHEMA, + "LINESTRING": _GEOJSON_LINESTRING_SCHEMA, + "MULTILINESTRING": _GEOJSON_MULTILINESTRING_SCHEMA, + "POLYGON": _GEOJSON_POLYGON_SCHEMA, + "MULTIPOLYGON": _GEOJSON_MULTIPOLYGON_SCHEMA, +} + +GRAPHQL_SCALAR_TO_BSON: dict[str, str] = { + "String": "string", + "Int": "int", + "Float": "double", + "Boolean": "bool", + "ID": "objectId", + "Int8": "int", + "UInt8": "int", + "Int16": "int", + "UInt16": "int", + "UInt32": "int", + "Int64": "long", + "UInt64": "long", +} + + +class MongoDBTransformer: + """ + Converts a GraphQL schema to per-type MongoDB BSON validator schemas. + + Uses ``bsonType`` (not ``type``). All nested types are inlined — no ``$ref``. + Enums inlined at usage. Nullable fields: ``bsonType: [, "null"]``. + Root types and ``@instanceTag`` types are excluded as top-level entries. + The ``instanceTag`` field on parent types (the reference to the tag structure) + is also dropped — without ``--expanded-instances`` the tag structure has no + representation in the output. + Circular references raise ``ValueError``. + + Parameters + ---------- + graphql_schema: + The compiled GraphQL schema to export. + additional_props_false: + Set of keys for which ``additionalProperties: false`` should be emitted. + Each entry is either a bare type name (``"Address"``) or a + ``Parent.field`` path (``"ChargingStation.address"``). + When empty (the default) no ``additionalProperties`` key is emitted at all. + """ + + def __init__( + self, + graphql_schema: GraphQLSchema, + additional_props_false: frozenset[str] | None = None, + ) -> None: + self.graphql_schema = graphql_schema + self._additional_props_false: frozenset[str] = additional_props_false or frozenset() + + def transform(self) -> dict[str, dict[str, Any]]: + """Return ``{type_name: bson_schema}`` for every exportable object/interface/union type.""" + if self._additional_props_false: + self._validate_properties_config() + + result: dict[str, dict[str, Any]] = {} + + for type_def in get_all_named_types(self.graphql_schema): + if is_scalar_type(type_def) or is_enum_type(type_def): + continue # inlined at usage; no top-level entry + + if is_object_type(type_def): + obj = cast(GraphQLObjectType, type_def) + if has_given_directive(obj, "instanceTag"): + log.debug(f"Skipping @instanceTag type: {obj.name}") + continue + if is_root_type(type_def.name): + log.debug(f"Skipping root type: {type_def.name}") + continue + result[type_def.name] = self._build_object_schema(obj, frozenset(), type_key=type_def.name) + + elif is_interface_type(type_def): + iface = cast(GraphQLInterfaceType, type_def) + result[type_def.name] = self._build_interface_schema(iface, frozenset(), type_key=type_def.name) + + elif is_union_type(type_def): + union = cast(GraphQLUnionType, type_def) + result[type_def.name] = self._build_union_schema(union, frozenset()) + + return result + + def _validate_properties_config(self) -> None: + """Raise ``ValueError`` listing every entry in the properties config that does not + correspond to a known object/interface type (or field on one) in the schema. + + Rules: + - ``"TypeName"`` → ``TypeName`` must be an object or interface type in the schema. + - ``"TypeName.fieldName"`` → ``TypeName`` must be an object/interface AND must have + a field named ``fieldName``. + """ + errors: list[str] = [] + for key in sorted(self._additional_props_false): + parts = key.split(".") + type_name = parts[0] + type_def = self.graphql_schema.type_map.get(type_name) + + if type_def is None: + errors.append(f" '{key}': type '{type_name}' does not exist in the schema.") + continue + + if not (is_object_type(type_def) or is_interface_type(type_def)): + errors.append( + f" '{key}': '{type_name}' is not an object or interface type " f"(got {type(type_def).__name__})." + ) + continue + + if len(parts) == 2: + field_name = parts[1] + fields = ( + cast(GraphQLObjectType, type_def).fields + if is_object_type(type_def) + else cast(GraphQLInterfaceType, type_def).fields + ) + if field_name not in fields: + errors.append(f" '{key}': type '{type_name}' has no field '{field_name}'.") + + if errors: + log.error("Invalid properties-config entries:\n" + "\n".join(errors)) + raise ValueError("Fix the file passed to '--properties-config' and try again.") + + # ------------------------------------------------------------------ + # Schema builders (return bare dicts, no $jsonSchema wrapper) + # ------------------------------------------------------------------ + + def _build_object_schema( + self, + object_type: GraphQLObjectType, + resolving: frozenset[str], + type_key: str | None = None, + ) -> dict[str, Any]: + if object_type.name in resolving: + chain = " -> ".join(sorted(resolving)) + raise ValueError( + f"Circular reference detected: '{chain}' -> '{object_type.name}'. " + "MongoDB validators do not support $ref; circular types cannot be inlined." + ) + resolving = resolving | {object_type.name} + + schema: dict[str, Any] = {"bsonType": "object", "properties": {}} + if object_type.description: + schema["description"] = object_type.description + + # Emit additionalProperties: false when listed in config, true otherwise. + # type_key is either the bare type name (top-level) or "Parent.field" (inline). + schema["additionalProperties"] = not (type_key is not None and type_key in self._additional_props_false) + + required: list[str] = [] + for field_name, field in object_type.fields.items(): + if is_valid_instance_tag_field(field, self.graphql_schema): + if is_instance_tag_field(field_name): + continue + raise ValueError(f"Invalid schema: @instanceTag object found on non-instanceTag field '{field_name}'") + if is_non_null_type(field.type): + required.append(field_name) + # Compute the inline key for this field: "ParentType.fieldName" + inline_key = f"{object_type.name}.{field_name}" if type_key is not None else None + schema["properties"][field_name] = self._build_field_schema(field, resolving, inline_key=inline_key) + + if required: + schema["required"] = required + return schema + + def _build_interface_schema( + self, + interface_type: GraphQLInterfaceType, + resolving: frozenset[str], + type_key: str | None = None, + ) -> dict[str, Any]: + if interface_type.name in resolving: + raise ValueError(f"Circular reference detected involving interface '{interface_type.name}'.") + resolving = resolving | {interface_type.name} + + schema: dict[str, Any] = {"bsonType": "object", "properties": {}} + if interface_type.description: + schema["description"] = interface_type.description + + schema["additionalProperties"] = not (type_key is not None and type_key in self._additional_props_false) + + required: list[str] = [] + for field_name, field in interface_type.fields.items(): + if is_non_null_type(field.type): + required.append(field_name) + inline_key = f"{interface_type.name}.{field_name}" if type_key is not None else None + schema["properties"][field_name] = self._build_field_schema(field, resolving, inline_key=inline_key) + + if required: + schema["required"] = required + return schema + + def _build_union_schema( + self, + union_type: GraphQLUnionType, + resolving: frozenset[str], + ) -> dict[str, Any]: + members: list[dict[str, Any]] = [self._build_object_schema(member, resolving) for member in union_type.types] + schema: dict[str, Any] = {"oneOf": members} + if union_type.description: + schema["description"] = union_type.description + return schema + + # ------------------------------------------------------------------ + # Field / type resolution + # ------------------------------------------------------------------ + + @staticmethod + def _unwrapped_scalar_name(field_type: GraphQLType) -> str | None: + """Return the scalar name if field_type (unwrapping a single NonNull) is a scalar.""" + unwrapped = cast(GraphQLNonNull[Any], field_type).of_type if is_non_null_type(field_type) else field_type + return cast(GraphQLScalarType, unwrapped).name if is_scalar_type(unwrapped) else None + + def _build_geojson_schema(self, shape: str, nullable: bool) -> dict[str, Any]: + """Return the BSON schema for a GeoJSON field, with optional null union.""" + base = dict(_GEOJSON_SCHEMAS.get(shape, _GEOJSON_GENERIC_SCHEMA)) + if nullable: + base["bsonType"] = [cast(str, base.get("bsonType", "object")), "null"] + return base + + def _build_field_schema( + self, + field: GraphQLField, + resolving: frozenset[str], + inline_key: str | None = None, + ) -> dict[str, Any]: + # --- GeoJSON scalar with @geoType directive → precise BSON shape --- + # Must be checked before _get_type_schema because the shape context + # lives on the field directive, not on the type itself. + if self._unwrapped_scalar_name(field.type) == "GeoJSON" and has_given_directive(field, "geoType"): + shape = str(get_directive_arguments(field, "geoType").get("shape", "")) + nullable = not is_non_null_type(field.type) + schema = self._build_geojson_schema(shape, nullable) + if field.description: + schema["description"] = field.description + return schema + + schema = self._get_type_schema(field.type, nullable=True, resolving=resolving, inline_key=inline_key) + + if field.description: + schema["description"] = field.description + + if hasattr(field, "ast_node") and field.ast_node and field.ast_node.directives: + directive_result = self._process_directives(field, field.type) + schema.update(directive_result["field"]) + # range on list-item types goes into items + if directive_result["contained_type"]: + bson_t = schema.get("bsonType") + is_array = bson_t == "array" or (isinstance(bson_t, list) and "array" in bson_t) + if is_array and "items" in schema: + schema["items"].update(directive_result["contained_type"]) + + return schema + + def _get_type_schema( + self, + field_type: GraphQLType, + nullable: bool, + resolving: frozenset[str], + inline_key: str | None = None, + ) -> dict[str, Any]: + # --- NonNull wrapper --- + if is_non_null_type(field_type): + return self._get_type_schema( + cast(GraphQLNonNull[Any], field_type).of_type, + nullable=False, + resolving=resolving, + inline_key=inline_key, + ) + + # --- List --- + if is_list_type(field_type): + list_type = cast(GraphQLList[Any], field_type) + # Items inherit their own nullability from their own wrapping + items_schema = self._get_type_schema(list_type.of_type, nullable=True, resolving=resolving) + bson_t: str | list[str] = ["array", "null"] if nullable else "array" + return {"bsonType": bson_t, "items": items_schema} + + # --- Scalar --- + if is_scalar_type(field_type): + scalar = cast(GraphQLScalarType, field_type) + # GeoJSON without @geoType → permissive object (shape unknown at type level) + if scalar.name == "GeoJSON": + schema = dict(_GEOJSON_GENERIC_SCHEMA) + if nullable: + schema["bsonType"] = [cast(str, schema.get("bsonType", "object")), "null"] + return schema + bson_scalar = GRAPHQL_SCALAR_TO_BSON.get(scalar.name, "string") + if nullable: + return {"bsonType": [bson_scalar, "null"]} + return {"bsonType": bson_scalar} + + # --- Enum — always inlined, never $ref --- + if is_enum_type(field_type): + enum_type = cast(GraphQLEnumType, field_type) + values = list(enum_type.values.keys()) + if nullable: + return {"bsonType": ["string", "null"], "enum": values} + return {"bsonType": "string", "enum": values} + + # --- Object — inline recursively --- + if is_object_type(field_type): + obj = cast(GraphQLObjectType, field_type) + inner = self._build_object_schema(obj, resolving, type_key=inline_key) + if nullable: + inner = dict(inner) + inner["bsonType"] = [cast(str, inner.get("bsonType", "object")), "null"] + return inner + + # --- Interface — inline recursively --- + if is_interface_type(field_type): + iface = cast(GraphQLInterfaceType, field_type) + inner = self._build_interface_schema(iface, resolving, type_key=inline_key) + if nullable: + inner = dict(inner) + inner["bsonType"] = [cast(str, inner.get("bsonType", "object")), "null"] + return inner + + # --- Union --- oneOf inline members --- + if is_union_type(field_type): + union = cast(GraphQLUnionType, field_type) + inner = self._build_union_schema(union, resolving) + if nullable: + inner = dict(inner) + inner["oneOf"] = list(inner.get("oneOf", [])) + [{"bsonType": "null"}] + return inner + + log.warning(f"Unknown GraphQL type {type(field_type)}, defaulting to bsonType: string") + return {"bsonType": "string"} + + # ------------------------------------------------------------------ + # Directive processing + # ------------------------------------------------------------------ + + def _process_directives( + self, + element: GraphQLField | GraphQLObjectType, + field_type: GraphQLType | None = None, + ) -> dict[str, Any]: + """ + Map S2DM directives to BSON validator keywords. + + Returns ``{"field": {...}, "contained_type": {...}}`` where ``field`` applies to + the field itself and ``contained_type`` applies to array items (e.g. ``@range``). + Descriptions are read from GraphQL docstrings (``field.description``) in + ``_build_field_schema``, not from ``@metadata`` — MongoDB does not support ``$comment``. + """ + field_exts: dict[str, Any] = {} + contained_exts: dict[str, Any] = {} + + # @noDuplicates → uniqueItems (supported by MongoDB) + if has_given_directive(element, "noDuplicates"): + field_exts["uniqueItems"] = True + + # @cardinality → minItems / maxItems + if isinstance(element, GraphQLField): + cardinality = get_cardinality(element) + if cardinality: + if cardinality.min is not None: + field_exts["minItems"] = cardinality.min + if cardinality.max is not None: + field_exts["maxItems"] = cardinality.max + + # @range → minimum / maximum + # On list fields these belong in items, not the array wrapper + if has_given_directive(element, "range"): + args = get_directive_arguments(element, "range") + range_exts: dict[str, Any] = {} + if "min" in args: + range_exts["minimum"] = args["min"] + if "max" in args: + range_exts["maximum"] = args["max"] + + unwrapped = field_type + if unwrapped and is_non_null_type(unwrapped): + unwrapped = cast(GraphQLNonNull[Any], unwrapped).of_type + if unwrapped and is_list_type(unwrapped): + contained_exts.update(range_exts) + else: + field_exts.update(range_exts) + + return {"field": field_exts, "contained_type": contained_exts} diff --git a/src/s2dm/exporters/utils/directive.py b/src/s2dm/exporters/utils/directive.py index f03089e2..8107be3e 100644 --- a/src/s2dm/exporters/utils/directive.py +++ b/src/s2dm/exporters/utils/directive.py @@ -14,6 +14,8 @@ GraphQLType, GraphQLUnionType, IntValueNode, + ListValueNode, + ObjectValueNode, ) from graphql.language.printer import print_ast @@ -37,6 +39,21 @@ def get_type_directive_location(graphql_type: GraphQLType) -> DirectiveLocation return None +def _parse_value_node(node: Any) -> Any: + """Recursively convert a GraphQL AST value node to a plain Python value.""" + if isinstance(node, IntValueNode): + return int(node.value) + if isinstance(node, FloatValueNode): + return float(node.value) + if isinstance(node, ListValueNode): + return [_parse_value_node(v) for v in node.values] + if isinstance(node, ObjectValueNode): + return {f.name.value: _parse_value_node(f.value) for f in node.fields} + if hasattr(node, "value"): + return node.value + return node + + def get_directive_arguments(element: GraphQLField | GraphQLObjectType, directive_name: str) -> dict[str, Any]: """ Extracts the arguments of a specified directive from a GraphQL element. @@ -45,26 +62,14 @@ def get_directive_arguments(element: GraphQLField | GraphQLObjectType, directive directive_name: The name of the directive whose arguments are to be extracted. Returns: dict[str, Any]: A dictionary containing the directive arguments with proper type conversion. + List and object argument values are recursively converted to plain Python + lists and dicts. """ if not has_given_directive(element, directive_name) or not element.ast_node: return {} directive = next(d for d in element.ast_node.directives if d.name.value == directive_name) - args: dict[str, Any] = {} - - for arg in directive.arguments: - arg_name = arg.name.value - if hasattr(arg.value, "value"): - if isinstance(arg.value, IntValueNode): - args[arg_name] = int(arg.value.value) - elif isinstance(arg.value, FloatValueNode): - args[arg_name] = float(arg.value.value) - else: - args[arg_name] = arg.value.value - else: - args[arg_name] = arg.value - - return args + return {arg.name.value: _parse_value_node(arg.value) for arg in directive.arguments} def has_given_directive(element: GraphQLObjectType | GraphQLField, directive_name: str) -> bool: diff --git a/tests/data/mongodb/test_schema.graphql b/tests/data/mongodb/test_schema.graphql new file mode 100644 index 00000000..f58c3507 --- /dev/null +++ b/tests/data/mongodb/test_schema.graphql @@ -0,0 +1,46 @@ +"""A type covering all scalar types, nullable/non-null, enums, nested types, arrays, and GeoJSON.""" +type TestTypeA { + testFieldId: ID + testFieldIdRequired: ID! + testFieldString: String + testFieldStringRequired: String! + testFieldInt: Int + testFieldIntRequired: Int! + testFieldFloat: Float + testFieldBool: Boolean + testFieldInt8: Int8 + testFieldInt64: Int64 + testFieldEnum: AnEnumA + testFieldEnumRequired: AnEnumA! + testFieldNested: TestTypeB + testFieldNestedRequired: TestTypeB! + testFieldList: [TestTypeB] + testFieldListRequired: [TestTypeB]! + testFieldGeoPoint: GeoJSON @geoType(shape: POINT) + testFieldGeoPointRequired: GeoJSON! @geoType(shape: POINT) + testFieldGeoPolygon: GeoJSON @geoType(shape: POLYGON) + testFieldGeoNoShape: GeoJSON +} + +"""A type with S2DM directive annotations.""" +type TestTypeB { + testFieldRange: Float @range(min: 0.0, max: 100.0) + testFieldNoDups: [String] @noDuplicates + testFieldCardinality: [String] @cardinality(min: 1, max: 5) + """A descriptive comment""" + testFieldMeta: String + testFieldEnumB: AnEnumB +} + +"""A first test enum.""" +enum AnEnumA { + VALUE_1 + VALUE_2 + VALUE_3 +} + +"""A second test enum.""" +enum AnEnumB { + OPTION_X + OPTION_Y +} diff --git a/tests/data/spec/common.graphql b/tests/data/spec/common.graphql index bef9ba59..d7e008c2 100644 --- a/tests/data/spec/common.graphql +++ b/tests/data/spec/common.graphql @@ -76,6 +76,7 @@ input InCabinArea3x3Input @reference(source: "common_types.graphql") { column: ThreeColumnsInCabinEnum } + type Query { ping: String } diff --git a/tests/data/spec/geojson.graphql b/tests/data/spec/geojson.graphql new file mode 100644 index 00000000..4576e77f --- /dev/null +++ b/tests/data/spec/geojson.graphql @@ -0,0 +1,21 @@ +"""GeoJSON geometry scalar (RFC 7946 — https://www.rfc-editor.org/rfc/rfc7946). +Use with @geoType to constrain the geometry type in MongoDB BSON validators. +Without @geoType, exports as a permissive object requiring 'type' and 'coordinates' fields. +""" +scalar GeoJSON + +"""GeoJSON geometry types (RFC 7946 §1.4).""" +enum GeoJSONShape { + POINT + MULTIPOINT + LINESTRING + MULTILINESTRING + POLYGON + MULTIPOLYGON +} + +"""Specifies the GeoJSON geometry type for a GeoJSON scalar field. +Enables precise BSON schema generation in the MongoDB exporter. +If omitted on a GeoJSON field, a permissive object schema is emitted. +""" +directive @geoType(shape: GeoJSONShape!) on FIELD_DEFINITION diff --git a/tests/test_e2e_json.py b/tests/test_e2e_json.py new file mode 100644 index 00000000..ae39684b --- /dev/null +++ b/tests/test_e2e_json.py @@ -0,0 +1,392 @@ +"""End-to-end tests for JSON tree exporter CLI.""" + +import json +from pathlib import Path + +from click.testing import CliRunner + +from s2dm.cli import cli + + +def test_json_export_cli_basic(tmp_path: Path, spec_directory: Path) -> None: + """Test basic JSON export via CLI.""" + output_file = tmp_path / "output.json" + runner = CliRunner() + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(spec_directory / "common.graphql"), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0, f"CLI failed with: {result.output}" + assert output_file.exists() + + # Validate JSON structure + data = json.loads(output_file.read_text()) + assert isinstance(data, dict) + assert len(data) > 0 + + # Check that at least one type was exported (no 'type' key in default mode) + for _type_name, type_data in data.items(): + assert "type" not in type_data # No type in default mode + if "children" in type_data: + assert isinstance(type_data["children"], dict) + break + + +def test_json_export_with_root_type(tmp_path: Path) -> None: + """Test JSON export with specific root type.""" + output_file = tmp_path / "output.json" + runner = CliRunner() + + # Create a simple test schema + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + type Vehicle { + speed: Float + doors: Int + } + + type Building { + floors: Int + } + """ + ) + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "--root-type", + "Vehicle", + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + assert output_file.exists() + + data = json.loads(output_file.read_text()) + assert "Vehicle" in data + assert "Building" not in data # Should only export Vehicle + + +def test_json_export_with_directives(tmp_path: Path) -> None: + """Test JSON export with @range directive (no @vspec).""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + directive @range(min: Float, max: Float) on FIELD_DEFINITION + directive @metadata(comment: String, vssType: String) on FIELD_DEFINITION + + type Vehicle { + \"\"\"Vehicle speed in km/h\"\"\" + speed: Float @range(min: 0.0, max: 250.0) @metadata(vssType: "sensor", comment: "Test comment") + + \"\"\"Number of doors\"\"\" + doorCount: Int @metadata(vssType: "attribute") + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + data = json.loads(output_file.read_text()) + + vehicle = data["Vehicle"] + assert "children" in vehicle + + speed = vehicle["children"]["speed"] + assert speed["datatype"] == "Float" + assert speed["min"] == 0.0 + assert speed["max"] == 250.0 + assert speed["description"] == "Vehicle speed in km/h" + # @metadata is ignored without @vspec + assert "type" not in speed + assert "comment" not in speed + + door_count = vehicle["children"]["doorCount"] + assert "type" not in door_count + + +def test_json_export_with_enum(tmp_path: Path) -> None: + """Test JSON export with enum types (no @vspec means no allowed field).""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + enum GearPosition { + PARK + REVERSE + NEUTRAL + DRIVE + } + + type Vehicle { + gear: GearPosition + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + data = json.loads(output_file.read_text()) + + gear = data["Vehicle"]["children"]["gear"] + assert gear["datatype"] == "GearPosition" + # No allowed field without @vspec + assert "allowed" not in gear + + +def test_json_export_nested_types(tmp_path: Path) -> None: + """Test JSON export with nested object types.""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + type Door { + \"\"\"Is door open\"\"\" + isOpen: Boolean + + \"\"\"Is door locked\"\"\" + isLocked: Boolean + } + + type Vehicle { + \"\"\"Vehicle doors\"\"\" + door: Door + + \"\"\"Vehicle speed\"\"\" + speed: Float + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + data = json.loads(output_file.read_text()) + + vehicle = data["Vehicle"] + assert "door" in vehicle["children"] + + door = vehicle["children"]["door"] + assert "type" not in door # No type in default mode + assert door["description"] == "Vehicle doors" + assert "children" in door + assert "isOpen" in door["children"] + assert "isLocked" in door["children"] + + is_open = door["children"]["isOpen"] + assert is_open["datatype"] == "Boolean" + assert is_open["description"] == "Is door open" + + +def test_json_export_array_types(tmp_path: Path) -> None: + """Test JSON export with array types.""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + scalar UInt8 + + type Vehicle { + seatPosCount: [UInt8] + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + data = json.loads(output_file.read_text()) + + seat_pos = data["Vehicle"]["children"]["seatPosCount"] + assert seat_pos["datatype"] == "UInt8[]" + + +def test_json_export_creates_parent_dirs(tmp_path: Path) -> None: + """Test that export creates parent directories if they don't exist.""" + runner = CliRunner() + output_file = tmp_path / "nested" / "dir" / "output.json" + + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + type Vehicle { + speed: Float + } + """ + ) + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + assert output_file.exists() + assert output_file.parent.exists() + + +def test_json_export_invalid_root_type(tmp_path: Path) -> None: + """Test that invalid root type produces an error.""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + type Vehicle { + speed: Float + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "--root-type", + "NonExistent", + "-o", + str(output_file), + ], + ) + + assert result.exit_code != 0 + assert "not found" in result.output.lower() + + +def test_json_export_with_selection_query(tmp_path: Path) -> None: + """Test JSON export with selection query.""" + runner = CliRunner() + schema_file = tmp_path / "test_schema.graphql" + schema_file.write_text( + """ + type Vehicle { + speed: Float + doors: Int + engine: Engine + } + + type Engine { + power: Float + } + + type Query { + vehicle(id: ID!): Vehicle + } + """ + ) + + query_file = tmp_path / "query.graphql" + query_file.write_text( + """ + { + vehicle(id: "test") { + speed + doors + } + } + """ + ) + + output_file = tmp_path / "output.json" + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "--selection-query", + str(query_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0 + data = json.loads(output_file.read_text()) + + # After selection query, should only have the selected fields + vehicle = data["Vehicle"] + assert "speed" in vehicle["children"] + assert "doors" in vehicle["children"] + # Engine is not selected, so it should not be in the output + assert "engine" not in vehicle["children"] diff --git a/tests/test_e2e_mongodb.py b/tests/test_e2e_mongodb.py new file mode 100644 index 00000000..0e05f8d8 --- /dev/null +++ b/tests/test_e2e_mongodb.py @@ -0,0 +1,562 @@ +"""End-to-end tests for the MongoDB BSON Schema exporter.""" + +import json +from pathlib import Path +from typing import Any + +import pytest +from click.testing import CliRunner + +from s2dm.cli import cli +from s2dm.exporters.mongodb import translate_to_mongodb +from s2dm.exporters.utils.schema_loader import load_and_process_schema + +MONGODB_TEST_SCHEMA = Path(__file__).parent / "data" / "mongodb" / "test_schema.graphql" + + +class TestMongoDBE2E: + @pytest.fixture + def schema_paths(self, spec_directory: Path) -> list[Path]: + """Combined spec directives + mongodb test schema.""" + return [spec_directory, MONGODB_TEST_SCHEMA] + + @pytest.fixture + def validators(self, schema_paths: list[Path]) -> dict[str, dict[str, Any]]: + """Load and transform to validators dict.""" + annotated_schema, _, _ = load_and_process_schema( + schema_paths=schema_paths, + naming_config_path=None, + selection_query_path=None, + root_type=None, + expanded_instances=False, + ) + return translate_to_mongodb(annotated_schema) + + # ------------------------------------------------------------------ + # Structure checks + # ------------------------------------------------------------------ + + def test_top_level_types_exported(self, validators: dict[str, dict[str, Any]]) -> None: + assert "TestTypeA" in validators + assert "TestTypeB" in validators + + def test_enums_not_top_level(self, validators: dict[str, dict[str, Any]]) -> None: + assert "AnEnumA" not in validators + assert "AnEnumB" not in validators + + def test_query_type_not_exported(self, validators: dict[str, dict[str, Any]]) -> None: + assert "Query" not in validators + + def test_each_type_has_bsontype_object(self, validators: dict[str, dict[str, Any]]) -> None: + for name in ("TestTypeA", "TestTypeB"): + assert validators[name]["bsonType"] == "object", f"Missing bsonType for {name}" + + def test_no_json_schema_wrapper(self, validators: dict[str, dict[str, Any]]) -> None: + assert "$jsonSchema" not in json.dumps(validators) + + def test_no_ref_anywhere(self, validators: dict[str, dict[str, Any]]) -> None: + assert "$ref" not in json.dumps(validators), "Found $ref — not supported in MongoDB $jsonSchema" + + def test_no_schema_keyword(self, validators: dict[str, dict[str, Any]]) -> None: + dumped = json.dumps(validators) + assert '"$schema"' not in dumped + assert '"definitions"' not in dumped + + def test_no_integer_type(self, validators: dict[str, dict[str, Any]]) -> None: + """MongoDB does not support JSON Schema 'integer' type.""" + dumped = json.dumps(validators) + assert '"type": "integer"' not in dumped + assert '"type":"integer"' not in dumped + + # ------------------------------------------------------------------ + # Scalar / BSON type assertions + # ------------------------------------------------------------------ + + def test_id_required_maps_to_objectid(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + assert props["testFieldIdRequired"]["bsonType"] == "objectId" + + def test_nullable_id_includes_null(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + bson_t = props["testFieldId"]["bsonType"] + assert isinstance(bson_t, list) + assert "objectId" in bson_t + assert "null" in bson_t + + def test_int8_maps_to_int(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + bson_t = props["testFieldInt8"]["bsonType"] + if isinstance(bson_t, list): + assert "int" in bson_t + else: + assert bson_t == "int" + + def test_int64_maps_to_long(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + bson_t = props["testFieldInt64"]["bsonType"] + if isinstance(bson_t, list): + assert "long" in bson_t + else: + assert bson_t == "long" + + # ------------------------------------------------------------------ + # Required fields + # ------------------------------------------------------------------ + + def test_required_fields_collected(self, validators: dict[str, dict[str, Any]]) -> None: + required = validators["TestTypeA"].get("required", []) + for field in ( + "testFieldIdRequired", + "testFieldStringRequired", + "testFieldIntRequired", + "testFieldNestedRequired", + "testFieldListRequired", + "testFieldEnumRequired", + ): + assert field in required, f"Expected '{field}' in required" + + def test_nullable_fields_not_in_required(self, validators: dict[str, dict[str, Any]]) -> None: + required = validators["TestTypeA"].get("required", []) + for field in ("testFieldId", "testFieldString", "testFieldNested", "testFieldEnum"): + assert field not in required, f"Did not expect '{field}' in required" + + # ------------------------------------------------------------------ + # Enum inlining + # ------------------------------------------------------------------ + + def test_enum_required_field_inlined_with_values(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + f = props["testFieldEnumRequired"] + assert f["bsonType"] == "string" + assert set(f["enum"]) == {"VALUE_1", "VALUE_2", "VALUE_3"} + + def test_nullable_enum_inlined_with_null(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + bson_t = props["testFieldEnum"]["bsonType"] + assert isinstance(bson_t, list) + assert "null" in bson_t + + # ------------------------------------------------------------------ + # Nested type inlining + # ------------------------------------------------------------------ + + def test_nested_type_inlined_no_ref(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + nested = props["testFieldNestedRequired"] + assert nested["bsonType"] == "object" + assert "properties" in nested + assert "testFieldRange" in nested["properties"] + + def test_nullable_nested_type_includes_null(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + bson_t = props["testFieldNested"]["bsonType"] + assert isinstance(bson_t, list) + assert "object" in bson_t + assert "null" in bson_t + + # ------------------------------------------------------------------ + # Directives + # ------------------------------------------------------------------ + + def test_range_directive(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeB"]["properties"] + f = props["testFieldRange"] + assert f["minimum"] == 0.0 + assert f["maximum"] == 100.0 + + def test_no_duplicates_directive(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeB"]["properties"] + assert props["testFieldNoDups"]["uniqueItems"] is True + + def test_cardinality_directive(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeB"]["properties"] + f = props["testFieldCardinality"] + assert f["minItems"] == 1 + assert f["maxItems"] == 5 + + def test_graphql_docstring_emitted_as_description(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeB"]["properties"] + assert props["testFieldMeta"]["description"] == "A descriptive comment" + + def test_metadata_does_not_use_dollar_comment(self, validators: dict[str, dict[str, Any]]) -> None: + """$comment is not supported by MongoDB $jsonSchema.""" + assert '"$comment"' not in json.dumps(validators) + + # ------------------------------------------------------------------ + # CLI — default mode (one file per type) + # ------------------------------------------------------------------ + + def test_cli_default_mode_creates_per_type_files(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + assert result.exit_code == 0, result.output + assert (tmp_path / "out" / "TestTypeA.json").exists() + assert (tmp_path / "out" / "TestTypeB.json").exists() + + def test_cli_default_mode_no_output_json(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + assert not (tmp_path / "out" / "output.json").exists() + + def test_cli_default_mode_each_file_has_bare_bson_schema(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + for type_name in ("TestTypeA", "TestTypeB"): + data = json.loads((tmp_path / "out" / f"{type_name}.json").read_text()) + assert "$jsonSchema" not in data, f"Unexpected $jsonSchema wrapper in {type_name}.json" + assert data["bsonType"] == "object" + + def test_cli_default_mode_no_ref(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + for type_name in ("TestTypeA", "TestTypeB"): + assert "$ref" not in (tmp_path / "out" / f"{type_name}.json").read_text() + + def test_cli_default_mode_no_enum_files(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + assert not (tmp_path / "out" / "AnEnumA.json").exists() + assert not (tmp_path / "out" / "AnEnumB.json").exists() + + # ------------------------------------------------------------------ + # CLI — root-type mode (-r) + # ------------------------------------------------------------------ + + def test_cli_root_type_creates_named_file(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--root-type", + "TestTypeA", + ], + ) + assert result.exit_code == 0, result.output + assert (tmp_path / "out" / "TestTypeA.json").exists() + assert not (tmp_path / "out" / "output.json").exists() + + def test_cli_root_type_content_is_bare_schema(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--root-type", + "TestTypeA", + ], + ) + data = json.loads((tmp_path / "out" / "TestTypeA.json").read_text()) + # bare schema — no {"TestTypeA": {...}} wrapper + assert "TestTypeA" not in data + assert data["bsonType"] == "object" + + def test_cli_root_type_dependent_types_not_written(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--root-type", + "TestTypeA", + ], + ) + # TestTypeB is a dependency that should only be inlined, not a standalone file + assert not (tmp_path / "out" / "TestTypeB.json").exists() + + def test_cli_root_type_invalid_raises_error(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--root-type", + "NonExistentType", + ], + ) + assert result.exit_code != 0 + + # ------------------------------------------------------------------ + # CLI — --validator flag (adds $jsonSchema envelope) + # ------------------------------------------------------------------ + + def test_cli_validator_flag_default_mode(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--validator", + ], + ) + assert result.exit_code == 0, result.output + for type_name in ("TestTypeA", "TestTypeB"): + data = json.loads((tmp_path / "out" / f"{type_name}.json").read_text()) + assert "$jsonSchema" in data + assert data["$jsonSchema"]["bsonType"] == "object" + + def test_cli_validator_flag_root_type(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--root-type", + "TestTypeA", + "--validator", + ], + ) + data = json.loads((tmp_path / "out" / "TestTypeA.json").read_text()) + assert "$jsonSchema" in data + assert data["$jsonSchema"]["bsonType"] == "object" + + def test_cli_no_validator_flag_default_is_bare(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + for type_name in ("TestTypeA", "TestTypeB"): + data = json.loads((tmp_path / "out" / f"{type_name}.json").read_text()) + assert "$jsonSchema" not in json.dumps(data) + + # ------------------------------------------------------------------ + # GeoJSON + # ------------------------------------------------------------------ + + def test_geo_point_has_precise_bson_schema(self, validators: dict[str, dict[str, Any]]) -> None: + props = validators["TestTypeA"]["properties"] + f = props["testFieldGeoPointRequired"] + assert f["bsonType"] == "object" + assert set(f["required"]) == {"type", "coordinates"} + assert f["properties"]["type"]["enum"] == ["Point"] + coords = f["properties"]["coordinates"] + assert coords["bsonType"] == "array" + assert coords["items"]["bsonType"] == "double" + assert coords["minItems"] == 2 + assert coords["maxItems"] == 2 + + def test_geo_point_nullable_includes_null(self, validators: dict[str, dict[str, Any]]) -> None: + bson_t = validators["TestTypeA"]["properties"]["testFieldGeoPoint"]["bsonType"] + assert isinstance(bson_t, list) + assert "object" in bson_t + assert "null" in bson_t + + def test_geo_polygon_has_polygon_type_enum(self, validators: dict[str, dict[str, Any]]) -> None: + f = validators["TestTypeA"]["properties"]["testFieldGeoPolygon"] + assert f["properties"]["type"]["enum"] == ["Polygon"] + + def test_geo_no_shape_is_permissive(self, validators: dict[str, dict[str, Any]]) -> None: + f = validators["TestTypeA"]["properties"]["testFieldGeoNoShape"] + assert "object" in (f["bsonType"] if isinstance(f["bsonType"], list) else [f["bsonType"]]) + # No enum constraint on type property + assert "enum" not in f["properties"]["type"] + + def test_geo_fields_have_no_ref(self, validators: dict[str, dict[str, Any]]) -> None: + geo_props = {k: v for k, v in validators["TestTypeA"]["properties"].items() if "geo" in k.lower() or "Geo" in k} + assert "$ref" not in json.dumps(geo_props) + + # ------------------------------------------------------------------ + # CLI — --properties-config flag + # ------------------------------------------------------------------ + + def test_cli_properties_config_top_level(self, tmp_path: Path, spec_directory: Path) -> None: + cfg = tmp_path / "props.yaml" + cfg.write_text("- TestTypeA\n") + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--properties-config", + str(cfg), + ], + ) + assert result.exit_code == 0, result.output + data = json.loads((tmp_path / "out" / "TestTypeA.json").read_text()) + assert data["additionalProperties"] is False + + def test_cli_properties_config_inline_path(self, tmp_path: Path, spec_directory: Path) -> None: + cfg = tmp_path / "props.yaml" + cfg.write_text("- TestTypeA.testFieldNested\n") + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--properties-config", + str(cfg), + ], + ) + data = json.loads((tmp_path / "out" / "TestTypeA.json").read_text()) + nested = data["properties"]["testFieldNested"] + assert nested["additionalProperties"] is False + # Inline TestTypeB top-level should default to true + data_b = json.loads((tmp_path / "out" / "TestTypeB.json").read_text()) + assert data_b["additionalProperties"] is True + + def test_cli_properties_config_unknown_type_exits_nonzero(self, tmp_path: Path, spec_directory: Path) -> None: + cfg = tmp_path / "props.yaml" + cfg.write_text("- NonExistentType\n") + runner = CliRunner() + result = runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + "--properties-config", + str(cfg), + ], + ) + assert result.exit_code != 0 + assert "NonExistentType" in result.output + + def test_cli_no_properties_config_no_additional_properties(self, tmp_path: Path, spec_directory: Path) -> None: + runner = CliRunner() + runner.invoke( + cli, + [ + "export", + "mongodb", + "--schema", + str(spec_directory), + "--schema", + str(MONGODB_TEST_SCHEMA), + "--output", + str(tmp_path / "out"), + ], + ) + for type_name in ("TestTypeA", "TestTypeB"): + data = json.loads((tmp_path / "out" / f"{type_name}.json").read_text()) + assert data["additionalProperties"] is True diff --git a/tests/test_json.py b/tests/test_json.py new file mode 100644 index 00000000..ead2dcfd --- /dev/null +++ b/tests/test_json.py @@ -0,0 +1,241 @@ +"""Unit tests for JSON tree exporter.""" + +from pathlib import Path + +import pytest +from graphql import build_schema + +from s2dm.exporters.json import JsonExporter +from s2dm.exporters.utils.annotated_schema import AnnotatedSchema +from s2dm.exporters.utils.schema_loader import load_and_process_schema + + +def test_simple_scalar_field() -> None: + """Test extraction of simple scalar field properties.""" + schema_str = """ + type Vehicle { + \"\"\"Vehicle speed in km/h\"\"\" + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + + assert "Vehicle" in result + vehicle = result["Vehicle"] + assert "type" not in vehicle # No type in default mode + assert "children" in vehicle + assert "speed" in vehicle["children"] + + speed = vehicle["children"]["speed"] + assert speed["datatype"] == "Float" + assert speed["description"] == "Vehicle speed in km/h" + # No 'type' field without @vspec + + +def test_range_directive() -> None: + """Test extraction from @range directive.""" + schema_str = """ + directive @range(min: Float, max: Float) on FIELD_DEFINITION + + type Vehicle { + speed: Float @range(min: 0.0, max: 250.0) + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + assert speed["min"] == 0.0 + assert speed["max"] == 250.0 + + +def test_metadata_directive() -> None: + """Test that @metadata directive is ignored without @vspec.""" + schema_str = """ + directive @metadata(comment: String, vssType: String) on FIELD_DEFINITION + + type Vehicle { + speed: Float @metadata(comment: "Test comment", vssType: "actuator") + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + # @metadata is ignored without @vspec + assert "comment" not in speed + assert "type" not in speed + assert speed["datatype"] == "Float" + + +def test_enum_field() -> None: + """Test enum field without @vspec has no allowed values.""" + schema_str = """ + enum GearPosition { + PARK + REVERSE + NEUTRAL + DRIVE + } + + type Vehicle { + gear: GearPosition + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + gear = result["Vehicle"]["children"]["gear"] + + assert gear["datatype"] == "GearPosition" + # No 'allowed' field without @vspec + assert "allowed" not in gear + + +def test_array_type() -> None: + """Test that array types get [] suffix on datatype.""" + schema_str = """ + type Vehicle { + seatPosCount: [Int] + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + seat_pos = result["Vehicle"]["children"]["seatPosCount"] + + assert seat_pos["datatype"] == "Int[]" + + +def test_nested_object_types() -> None: + """Test nested object types create branch nodes.""" + schema_str = """ + type Door { + isOpen: Boolean + isLocked: Boolean + } + + type Vehicle { + door: Door + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + door = result["Vehicle"]["children"]["door"] + + assert "type" not in door # No type in default mode + assert "children" in door + assert "isOpen" in door["children"] + assert "isLocked" in door["children"] + + +def test_multi_root_export() -> None: + """Test exporting multiple root types.""" + schema_str = """ + type Vehicle { + speed: Float + } + + type Building { + floors: Int + } + + type Query { + ping: String + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type=None) + + # Should export Vehicle and Building, but exclude operation types (Query) + assert "Vehicle" in result + assert "Building" in result + assert "Query" not in result + + +def test_cycle_detection() -> None: + """Test that circular references are handled with $ref.""" + schema_str = """ + type Node { + value: String + parent: Node + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Node") + parent_field = result["Node"]["children"]["parent"] + + # Should have $ref to prevent infinite recursion + assert "$ref" in parent_field + + +def test_combined_directives() -> None: + """Test that @range works but @metadata is ignored without @vspec.""" + schema_str = """ + directive @range(min: Float, max: Float) on FIELD_DEFINITION + directive @metadata(comment: String, vssType: String) on FIELD_DEFINITION + + type Vehicle { + \"\"\"Current speed\"\"\" + speed: Float @range(min: 0.0, max: 250.0) @metadata(comment: "In kilometers per hour", vssType: "sensor") + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + assert speed["datatype"] == "Float" + assert speed["description"] == "Current speed" + assert speed["min"] == 0.0 + assert speed["max"] == 250.0 + # @metadata is ignored without @vspec + assert "comment" not in speed + assert "type" not in speed + + +def test_integration_with_test_schema(spec_directory: Path) -> None: + """Test with actual test schema from tests/data/spec.""" + schema_path = spec_directory / "common.graphql" + if not schema_path.exists(): + pytest.skip("Test schema not found") + + annotated_schema, _, _ = load_and_process_schema( + schema_paths=[schema_path], + naming_config_path=None, + selection_query_path=None, + root_type=None, + expanded_instances=False, + ) + + exporter = JsonExporter(annotated_schema.schema, annotated_schema) + result = exporter.export(root_type=None) + + # Should successfully export without errors + assert isinstance(result, dict) + assert len(result) > 0 diff --git a/tests/test_json_no_vspec.py b/tests/test_json_no_vspec.py new file mode 100644 index 00000000..02a328d5 --- /dev/null +++ b/tests/test_json_no_vspec.py @@ -0,0 +1,320 @@ +"""Test JSON exporter with schemas that have NO @vspec annotations. + +These tests ensure that only default fields are included when @vspec is not present: +- description (from GraphQL docstring) +- datatype (from scalar type) +- min (from @range directive) +- max (from @range directive) +- unit (from field argument) +""" + +from graphql import build_schema + +from s2dm.exporters.json import JsonExporter +from s2dm.exporters.utils.annotated_schema import AnnotatedSchema + + +def test_no_vspec_basic_fields() -> None: + """Test that only default fields appear without @vspec directive.""" + schema_str = """ + type Vehicle { + \"\"\"Current vehicle speed\"\"\" + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + # Should have: description, datatype + assert "description" in speed + assert speed["description"] == "Current vehicle speed" + assert "datatype" in speed + assert speed["datatype"] == "Float" + + # Should NOT have: type, comment, allowed, default + assert "type" not in speed + assert "comment" not in speed + assert "allowed" not in speed + assert "default" not in speed + + +def test_no_vspec_with_range() -> None: + """Test that @range directive values are included without @vspec.""" + schema_str = """ + directive @range(min: Float, max: Float) on FIELD_DEFINITION + + type Vehicle { + \"\"\"Vehicle speed in km/h\"\"\" + speed: Float @range(min: 0.0, max: 250.0) + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + # Should have: description, datatype, min, max + assert speed["description"] == "Vehicle speed in km/h" + assert speed["datatype"] == "Float" + assert speed["min"] == 0.0 + assert speed["max"] == 250.0 + + # Should NOT have: type, comment + assert "type" not in speed + assert "comment" not in speed + + +def test_no_vspec_with_unit() -> None: + """Test that unit field argument is included without @vspec.""" + schema_str = """ + enum SpeedUnit { + KILOMETER_PER_HOUR + METER_PER_SECOND + } + + type Vehicle { + \"\"\"Vehicle speed\"\"\" + speed(unit: SpeedUnit = KILOMETER_PER_HOUR): Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + # Should have: description, datatype, unit + assert speed["description"] == "Vehicle speed" + assert speed["datatype"] == "Float" + assert speed["unit"] == "KILOMETER_PER_HOUR" + + # Should NOT have: type, comment + assert "type" not in speed + assert "comment" not in speed + + +def test_no_vspec_enum_field() -> None: + """Test that enum fields don't include 'allowed' without @vspec.""" + schema_str = """ + enum GearPosition { + PARK + REVERSE + NEUTRAL + DRIVE + } + + type Vehicle { + \"\"\"Current gear position\"\"\" + gear: GearPosition + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + gear = result["Vehicle"]["children"]["gear"] + + # Should have: description, datatype + assert gear["description"] == "Current gear position" + assert gear["datatype"] == "GearPosition" + + # Should NOT have: allowed, type + assert "allowed" not in gear + assert "type" not in gear + + +def test_no_vspec_with_metadata_directive() -> None: + """Test that @metadata directive is ignored without @vspec.""" + schema_str = """ + directive @metadata(comment: String, vssType: String) on FIELD_DEFINITION + + type Vehicle { + \"\"\"Vehicle speed\"\"\" + speed: Float @metadata(comment: "This is a comment", vssType: "sensor") + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + speed = result["Vehicle"]["children"]["speed"] + + # Should have: description, datatype + assert speed["description"] == "Vehicle speed" + assert speed["datatype"] == "Float" + + # Should NOT have: comment, type (even though @metadata provides them) + assert "comment" not in speed + assert "type" not in speed + + +def test_no_vspec_complete_example() -> None: + """Test complete example with all default fields but no @vspec.""" + schema_str = """ + directive @range(min: Float, max: Float) on FIELD_DEFINITION + + enum TemperatureUnit { + CELSIUS + FAHRENHEIT + } + + type Vehicle { + \"\"\"Exterior temperature\"\"\" + temperature(unit: TemperatureUnit = CELSIUS): Float @range(min: -50.0, max: 50.0) + + \"\"\"Vehicle identification number\"\"\" + vin: String + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + + # Check temperature field + temperature = result["Vehicle"]["children"]["temperature"] + assert temperature == { + "description": "Exterior temperature", + "datatype": "Float", + "min": -50.0, + "max": 50.0, + "unit": "CELSIUS", + } + + # Check vin field + vin = result["Vehicle"]["children"]["vin"] + assert vin == {"description": "Vehicle identification number", "datatype": "String"} + + +def test_no_vspec_branch_nodes() -> None: + """Test that branch nodes (nested types) work without @vspec.""" + schema_str = """ + type Door { + \"\"\"Is door open\"\"\" + isOpen: Boolean + } + + type Vehicle { + \"\"\"Vehicle door\"\"\" + door: Door + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + + # Check branch node + door = result["Vehicle"]["children"]["door"] + assert "type" not in door # No type in default mode + assert door["description"] == "Vehicle door" + assert "children" in door + + # Check leaf node within branch + is_open = door["children"]["isOpen"] + assert is_open == {"description": "Is door open", "datatype": "Boolean"} + + +def test_instance_tag_produces_instances_in_default_mode() -> None: + """Test that @instanceTag fields are excluded as children but produce 'instances' on the parent.""" + schema_str = """ + directive @instanceTag on OBJECT + directive @range(min: Float, max: Float) on FIELD_DEFINITION + + enum RowEnum { + ROW1 + ROW2 + } + + enum SideEnum { + DRIVER_SIDE + PASSENGER_SIDE + } + + type DoorTag @instanceTag { + row: RowEnum + side: SideEnum + } + + type Door { + \"\"\"Is door open\"\"\" + isOpen: Boolean + } + + type Cabin { + \"\"\"All doors\"\"\" + doors: Door + instanceTag: DoorTag + } + + type Vehicle { + cabin: Cabin + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + + cabin = result["Vehicle"]["children"]["cabin"] + + # @instanceTag field must NOT appear as a child + assert "instanceTag" not in cabin["children"] + + # instances derived from enum dimensions in the @instanceTag type + assert cabin["instances"] == [ + ["ROW1", "ROW2"], + ["DRIVER_SIDE", "PASSENGER_SIDE"], + ] + + +def test_instance_tag_single_dimension() -> None: + """Test that a single-dimension @instanceTag produces a one-element instances list.""" + schema_str = """ + directive @instanceTag on OBJECT + + enum SideEnum { + LEFT + RIGHT + } + + type MirrorTag @instanceTag { + side: SideEnum + } + + type Mirror { + \"\"\"Is folded\"\"\" + isFolded: Boolean + } + + type Body { + \"\"\"All mirrors\"\"\" + mirrors: Mirror + instanceTag: MirrorTag + } + + type Vehicle { + body: Body + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + + result = exporter.export(root_type="Vehicle") + + body = result["Vehicle"]["children"]["body"] + assert "instanceTag" not in body["children"] + assert body["instances"] == [["LEFT", "RIGHT"]] diff --git a/tests/test_json_vspec_meta.py b/tests/test_json_vspec_meta.py new file mode 100644 index 00000000..3fa56369 --- /dev/null +++ b/tests/test_json_vspec_meta.py @@ -0,0 +1,347 @@ +"""Unit and e2e tests for the JSON exporter --vspec-meta YAML overlay feature.""" + +import json +import textwrap +from pathlib import Path +from typing import Any + +from click.testing import CliRunner +from graphql import build_schema + +from s2dm.cli import cli +from s2dm.exporters.json import JsonExporter, export_to_json_tree, load_vspec_lookup +from s2dm.exporters.utils.annotated_schema import AnnotatedSchema + +# --------------------------------------------------------------------------- +# load_vspec_lookup +# --------------------------------------------------------------------------- + + +def test_load_vspec_lookup_basic(tmp_path: Path) -> None: + """load_vspec_lookup returns a dict keyed by FQN.""" + yaml_content = textwrap.dedent( + """\ + Vehicle.Speed: + type: sensor + unit: km/h + min: 0 + max: 250 + """ + ) + yaml_file = tmp_path / "lookup.yaml" + yaml_file.write_text(yaml_content) + + result = load_vspec_lookup(yaml_file) + + assert isinstance(result, dict) + assert "Vehicle.Speed" in result + assert result["Vehicle.Speed"]["unit"] == "km/h" + assert result["Vehicle.Speed"]["min"] == 0 + assert result["Vehicle.Speed"]["max"] == 250 + + +def test_load_vspec_lookup_empty_file(tmp_path: Path) -> None: + """load_vspec_lookup returns empty dict for empty file.""" + yaml_file = tmp_path / "empty.yaml" + yaml_file.write_text("") + + result = load_vspec_lookup(yaml_file) + + assert result == {} + + +def test_load_vspec_lookup_partial_keys(tmp_path: Path) -> None: + """load_vspec_lookup handles entries with only some keys present.""" + yaml_content = textwrap.dedent( + """\ + Vehicle.ADAS.ABS.IsEnabled: + type: actuator + datatype: boolean + description: Indicates if ABS is enabled. + """ + ) + yaml_file = tmp_path / "lookup.yaml" + yaml_file.write_text(yaml_content) + + result = load_vspec_lookup(yaml_file) + + entry = result["Vehicle.ADAS.ABS.IsEnabled"] + assert entry["type"] == "actuator" + assert "unit" not in entry + assert "allowed" not in entry + + +# --------------------------------------------------------------------------- +# _apply_vspec_lookup +# --------------------------------------------------------------------------- + + +def test_apply_vspec_lookup_overwrites_unit() -> None: + """_apply_vspec_lookup overwrites unit with YAML value.""" + schema_str = """ + type Vehicle { + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + lookup = {"Vehicle.Speed": {"unit": "km/h", "type": "sensor"}} + exporter = JsonExporter(schema, annotated, vspec_lookup=lookup) + + node: dict[str, Any] = {"datatype": "float", "unit": "KILOMETER_PER_HOUR"} + exporter._apply_vspec_lookup(node, "Vehicle.Speed") + + assert node["unit"] == "km/h" + assert node["type"] == "sensor" + + +def test_apply_vspec_lookup_unknown_fqn_leaves_node_unchanged() -> None: + """_apply_vspec_lookup does nothing for unrecognised FQN.""" + schema_str = """ + type Vehicle { + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + lookup: dict[str, Any] = {} + exporter = JsonExporter(schema, annotated, vspec_lookup=lookup) + + node: dict[str, Any] = {"datatype": "float", "unit": "KILOMETER_PER_HOUR"} + exporter._apply_vspec_lookup(node, "Vehicle.Speed") + + assert node["unit"] == "KILOMETER_PER_HOUR" + + +def test_apply_vspec_lookup_none_lookup_is_noop() -> None: + """_apply_vspec_lookup does nothing when vspec_lookup is None.""" + schema_str = """ + type Vehicle { + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated, vspec_lookup=None) + + node: dict[str, Any] = {"datatype": "float"} + exporter._apply_vspec_lookup(node, "Vehicle.Speed") + + # Unchanged + assert node == {"datatype": "float"} + + +def test_apply_vspec_lookup_adds_new_keys() -> None: + """_apply_vspec_lookup adds keys not originally in the node.""" + schema_str = """ + type Vehicle { + speed: Float + } + """ + schema = build_schema(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + lookup = { + "Vehicle.Speed": { + "comment": "Instantaneous speed", + "default": 0, + "allowed": None, + } + } + exporter = JsonExporter(schema, annotated, vspec_lookup=lookup) + + node: dict[str, Any] = {"datatype": "float"} + exporter._apply_vspec_lookup(node, "Vehicle.Speed") + + assert node["comment"] == "Instantaneous speed" + assert node["default"] == 0 + # None values are skipped + assert "allowed" not in node + + +# --------------------------------------------------------------------------- +# export_to_json_tree with vspec_lookup_path +# --------------------------------------------------------------------------- + + +_SIMPLE_SCHEMA = """\ +directive @vspec(element: String, fqn: String) on FIELD_DEFINITION + +type Vehicle { + \"\"\"Vehicle speed.\"\"\" + averageSpeed: Float @vspec(element: "sensor", fqn: "Vehicle.AverageSpeed") +} +""" + + +def test_export_without_vspec_meta_keeps_raw_unit(tmp_path: Path) -> None: + """Without vspec_lookup_path, unit stays as raw string from field arg.""" + schema_str = """\ +directive @unit(unit: String) on FIELD_DEFINITION + +type Vehicle { + speed: Float @unit(unit: "KILOMETER_PER_HOUR") +} +""" + from graphql import build_schema as _build + + from s2dm.exporters.utils.annotated_schema import AnnotatedSchema + + schema = _build(schema_str) + annotated = AnnotatedSchema(schema=schema, field_metadata={}, type_metadata={}) + exporter = JsonExporter(schema, annotated) + result = exporter.export(root_type="Vehicle") + # raw arg default logic doesn't apply here (no default_value set via build_schema), + # but make sure no KeyError / crash + assert "speed" in result["Vehicle"]["children"] + + +def test_export_with_vspec_meta_overrides_unit(tmp_path: Path) -> None: + """With vspec_lookup_path, unit in leaf node is replaced by YAML value.""" + from s2dm.exporters.utils.schema_loader import load_and_process_schema + + schema_file = tmp_path / "schema.graphql" + schema_file.write_text(_SIMPLE_SCHEMA) + + lookup_yaml = tmp_path / "lookup.yaml" + lookup_yaml.write_text( + textwrap.dedent( + """\ + Vehicle.AverageSpeed: + type: sensor + unit: km/h + min: 0 + max: 250 + """ + ) + ) + + annotated_schema, _, _ = load_and_process_schema( + schema_paths=[schema_file], + naming_config_path=None, + selection_query_path=None, + root_type=None, + expanded_instances=False, + ) + + result = export_to_json_tree(annotated_schema, vspec_lookup_path=lookup_yaml) + + speed = result["Vehicle"]["children"]["AverageSpeed"] + assert speed["unit"] == "km/h" + assert speed["min"] == 0 + assert speed["max"] == 250 + assert speed["type"] == "sensor" + + +def test_export_with_vspec_meta_partial_override(tmp_path: Path) -> None: + """YAML overlay only sets specified keys; schema-derived keys are preserved.""" + from s2dm.exporters.utils.schema_loader import load_and_process_schema + + schema_file = tmp_path / "schema.graphql" + schema_file.write_text(_SIMPLE_SCHEMA) + + lookup_yaml = tmp_path / "lookup.yaml" + lookup_yaml.write_text( + textwrap.dedent( + """\ + Vehicle.AverageSpeed: + unit: km/h + """ + ) + ) + + annotated_schema, _, _ = load_and_process_schema( + schema_paths=[schema_file], + naming_config_path=None, + selection_query_path=None, + root_type=None, + expanded_instances=False, + ) + + result = export_to_json_tree(annotated_schema, vspec_lookup_path=lookup_yaml) + + speed = result["Vehicle"]["children"]["AverageSpeed"] + # YAML overrides unit + assert speed["unit"] == "km/h" + # @vspec(element:...) is still processed in vspec-meta mode + assert speed["type"] == "sensor" + # Datatype is preserved (raw scalar name, may be overwritten by YAML overlay) + assert speed["datatype"] == "Float" + + +# --------------------------------------------------------------------------- +# CLI --vspec-meta option (e2e) +# --------------------------------------------------------------------------- + + +def test_cli_json_vspec_meta_option(tmp_path: Path) -> None: + """CLI --vspec-meta flag passes YAML overlay to exporter.""" + schema_file = tmp_path / "schema.graphql" + schema_file.write_text(_SIMPLE_SCHEMA) + + lookup_yaml = tmp_path / "lookup.yaml" + lookup_yaml.write_text( + textwrap.dedent( + """\ + Vehicle.AverageSpeed: + unit: km/h + comment: Instantaneous speed of the vehicle. + """ + ) + ) + + output_file = tmp_path / "output.json" + runner = CliRunner() + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "--vspec-meta", + str(lookup_yaml), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0, f"CLI failed: {result.output}" + assert output_file.exists() + + data = json.loads(output_file.read_text()) + speed = data["Vehicle"]["children"]["AverageSpeed"] + assert speed["unit"] == "km/h" + assert speed["comment"] == "Instantaneous speed of the vehicle." + + +def test_cli_json_without_vspec_meta_no_error(tmp_path: Path) -> None: + """CLI without --vspec-meta still works correctly.""" + schema_file = tmp_path / "schema.graphql" + schema_file.write_text( + """\ + type Vehicle { + speed: Float + } + """ + ) + + output_file = tmp_path / "output.json" + runner = CliRunner() + + result = runner.invoke( + cli, + [ + "export", + "json", + "-s", + str(schema_file), + "-o", + str(output_file), + ], + ) + + assert result.exit_code == 0, f"CLI failed: {result.output}" + data = json.loads(output_file.read_text()) + assert "Vehicle" in data + assert "speed" in data["Vehicle"]["children"] diff --git a/tests/test_mongodb.py b/tests/test_mongodb.py new file mode 100644 index 00000000..5b5d7556 --- /dev/null +++ b/tests/test_mongodb.py @@ -0,0 +1,675 @@ +"""Unit tests for the MongoDB BSON Schema exporter.""" + +import json +from pathlib import Path +from typing import Any, cast + +import pytest +from graphql import build_schema + +from s2dm.exporters.mongodb.mongodb import transform +from s2dm.exporters.mongodb.transformer import MongoDBTransformer + + +def make_transformer(schema_str: str) -> MongoDBTransformer: + return MongoDBTransformer(build_schema(schema_str)) + + +def make_transform(schema_str: str) -> dict[str, dict[str, Any]]: + return transform(build_schema(schema_str)) + + +# --------------------------------------------------------------------------- +# BSON scalar type mapping +# --------------------------------------------------------------------------- + + +class TestBSONTypeMapping: + BASE = "type Query {{ ping: String }}\ntype TestTypeA {{ {field} }}" + + def _field_schema(self, field_decl: str) -> dict[str, Any]: + schema_str = self.BASE.format(field=field_decl) + t = make_transformer(schema_str) + result = t.transform() + return cast(dict[str, Any], result["TestTypeA"]["properties"]["testField"]) + + def test_string_maps_to_string(self) -> None: + s = self._field_schema("testField: String!") + assert s["bsonType"] == "string" + + def test_int_maps_to_int(self) -> None: + s = self._field_schema("testField: Int!") + assert s["bsonType"] == "int" + + def test_float_maps_to_double(self) -> None: + s = self._field_schema("testField: Float!") + assert s["bsonType"] == "double" + + def test_boolean_maps_to_bool(self) -> None: + s = self._field_schema("testField: Boolean!") + assert s["bsonType"] == "bool" + + def test_id_maps_to_objectid(self) -> None: + s = self._field_schema("testField: ID!") + assert s["bsonType"] == "objectId" + + def test_int8_maps_to_int(self) -> None: + schema_str = "scalar Int8\n" "type Query { ping: String }\n" "type TestTypeA { testField: Int8! }" + t = make_transformer(schema_str) + result = t.transform() + assert result["TestTypeA"]["properties"]["testField"]["bsonType"] == "int" + + def test_int64_maps_to_long(self) -> None: + schema_str = "scalar Int64\n" "type Query { ping: String }\n" "type TestTypeA { testField: Int64! }" + t = make_transformer(schema_str) + result = t.transform() + assert result["TestTypeA"]["properties"]["testField"]["bsonType"] == "long" + + def test_uint64_maps_to_long(self) -> None: + schema_str = "scalar UInt64\n" "type Query { ping: String }\n" "type TestTypeA { testField: UInt64! }" + t = make_transformer(schema_str) + result = t.transform() + assert result["TestTypeA"]["properties"]["testField"]["bsonType"] == "long" + + +# --------------------------------------------------------------------------- +# Nullable / non-null handling +# --------------------------------------------------------------------------- + + +class TestNullableHandling: + BASE = "type Query {{ ping: String }}\ntype TestTypeA {{ {field} }}" + + def _field(self, decl: str) -> dict[str, Any]: + t = make_transformer(self.BASE.format(field=decl)) + return cast(dict[str, Any], t.transform()["TestTypeA"]["properties"]["testField"]) + + def test_nullable_scalar_uses_list_bsontype(self) -> None: + s = self._field("testField: String") + assert isinstance(s["bsonType"], list) + assert "string" in s["bsonType"] + assert "null" in s["bsonType"] + + def test_non_null_scalar_uses_string_bsontype(self) -> None: + s = self._field("testField: String!") + assert s["bsonType"] == "string" + + def test_nullable_field_not_in_required(self) -> None: + t = make_transformer(self.BASE.format(field="testField: String")) + schema = t.transform()["TestTypeA"] + assert "testField" not in schema.get("required", []) + + def test_non_null_field_in_required(self) -> None: + t = make_transformer(self.BASE.format(field="testField: String!")) + schema = t.transform()["TestTypeA"] + assert "testField" in schema["required"] + + def test_nullable_object_bsontype_includes_null(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type TestTypeA { testFieldNested: TestTypeB }\n" + "type TestTypeB { testFieldA: String }" + ) + t = make_transformer(schema_str) + nested = t.transform()["TestTypeA"]["properties"]["testFieldNested"] + assert isinstance(nested["bsonType"], list) + assert "object" in nested["bsonType"] + assert "null" in nested["bsonType"] + + def test_non_null_object_bsontype_is_string(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type TestTypeA { testFieldNested: TestTypeB! }\n" + "type TestTypeB { testFieldA: String }" + ) + t = make_transformer(schema_str) + nested = t.transform()["TestTypeA"]["properties"]["testFieldNested"] + assert nested["bsonType"] == "object" + + +# --------------------------------------------------------------------------- +# Enum inlining (no $ref) +# --------------------------------------------------------------------------- + + +class TestEnumInlining: + SCHEMA = """ + enum AnEnumA { VALUE_1 VALUE_2 VALUE_3 } + type Query { ping: String } + type TestTypeA { testFieldEnum: AnEnumA testFieldEnumRequired: AnEnumA! } + """ + + def test_enum_field_has_bsontype_string_or_list(self) -> None: + t = make_transformer(self.SCHEMA) + bson_t = t.transform()["TestTypeA"]["properties"]["testFieldEnum"]["bsonType"] + if isinstance(bson_t, list): + assert "string" in bson_t + else: + assert bson_t == "string" + + def test_enum_required_field_has_values(self) -> None: + t = make_transformer(self.SCHEMA) + f = t.transform()["TestTypeA"]["properties"]["testFieldEnumRequired"] + assert f["enum"] == ["VALUE_1", "VALUE_2", "VALUE_3"] + + def test_no_ref_in_enum_field(self) -> None: + t = make_transformer(self.SCHEMA) + field_str = json.dumps(t.transform()["TestTypeA"]["properties"]["testFieldEnum"]) + assert "$ref" not in field_str + + def test_enum_type_not_a_top_level_entry(self) -> None: + t = make_transformer(self.SCHEMA) + assert "AnEnumA" not in t.transform() + + def test_nullable_enum_bsontype_includes_null(self) -> None: + t = make_transformer(self.SCHEMA) + bson_t = t.transform()["TestTypeA"]["properties"]["testFieldEnum"]["bsonType"] + assert isinstance(bson_t, list) + assert "null" in bson_t + + def test_non_null_enum_bsontype_is_string(self) -> None: + t = make_transformer(self.SCHEMA) + assert t.transform()["TestTypeA"]["properties"]["testFieldEnumRequired"]["bsonType"] == "string" + + +# --------------------------------------------------------------------------- +# Nested object inlining (no $ref) +# --------------------------------------------------------------------------- + + +class TestNestedTypeInlining: + SCHEMA = """ + type Query { ping: String } + type TestTypeA { testFieldNested: TestTypeB! } + type TestTypeB { testFieldA: String! testFieldB: Int } + """ + + def test_nested_type_inlined(self) -> None: + t = make_transformer(self.SCHEMA) + nested = t.transform()["TestTypeA"]["properties"]["testFieldNested"] + assert nested["bsonType"] == "object" + assert "testFieldA" in nested["properties"] + assert "testFieldB" in nested["properties"] + + def test_no_ref_in_output(self) -> None: + t = make_transformer(self.SCHEMA) + assert "$ref" not in json.dumps(t.transform()) + + def test_nested_required_preserved(self) -> None: + t = make_transformer(self.SCHEMA) + nested = t.transform()["TestTypeA"]["properties"]["testFieldNested"] + assert nested.get("required") == ["testFieldA"] + + +# --------------------------------------------------------------------------- +# List fields +# --------------------------------------------------------------------------- + + +class TestListFields: + SCHEMA = """ + type Query { ping: String } + type TestTypeA { + testFieldList: [TestTypeB] + testFieldListRequired: [TestTypeB]! + } + type TestTypeB { testFieldA: String } + """ + + def test_nullable_list_bsontype_is_array_null(self) -> None: + t = make_transformer(self.SCHEMA) + f = t.transform()["TestTypeA"]["properties"]["testFieldList"] + assert isinstance(f["bsonType"], list) + assert "array" in f["bsonType"] + assert "null" in f["bsonType"] + + def test_non_null_list_bsontype_is_array(self) -> None: + t = make_transformer(self.SCHEMA) + f = t.transform()["TestTypeA"]["properties"]["testFieldListRequired"] + assert f["bsonType"] == "array" + + def test_list_has_items(self) -> None: + t = make_transformer(self.SCHEMA) + f = t.transform()["TestTypeA"]["properties"]["testFieldList"] + assert "items" in f + + def test_scalar_list_items_bsontype(self) -> None: + schema_str = "type Query { ping: String }\ntype TestTypeA { testFieldList: [String]! }" + t = make_transformer(schema_str) + f = t.transform()["TestTypeA"]["properties"]["testFieldList"] + assert f["bsonType"] == "array" + # items are nullable list-item (wrapping is nullable by default in GraphQL) + items_bson = f["items"]["bsonType"] + assert "string" in items_bson if isinstance(items_bson, list) else items_bson == "string" + + +# --------------------------------------------------------------------------- +# Directive support +# --------------------------------------------------------------------------- + + +class TestDirectives: + DIRECTIVES = ( + "directive @range(min: Float, max: Float) on FIELD_DEFINITION\n" + "directive @noDuplicates on FIELD_DEFINITION\n" + "directive @cardinality(min: Int, max: Int) on FIELD_DEFINITION\n" + "directive @metadata(comment: String, vssType: String) on FIELD_DEFINITION | OBJECT\n" + "type Query { ping: String }\n" + ) + + def _transform(self, body: str) -> dict[str, dict[str, Any]]: + return make_transformer(self.DIRECTIVES + body).transform() + + def test_range_adds_minimum_maximum(self) -> None: + result = self._transform("type TestTypeB { testFieldRange: Float @range(min: 0.0, max: 100.0) }") + f = result["TestTypeB"]["properties"]["testFieldRange"] + assert f["minimum"] == 0.0 + assert f["maximum"] == 100.0 + + def test_no_duplicates_adds_unique_items(self) -> None: + result = self._transform("type TestTypeB { testFieldNoDups: [String] @noDuplicates }") + assert result["TestTypeB"]["properties"]["testFieldNoDups"]["uniqueItems"] is True + + def test_cardinality_adds_min_max_items(self) -> None: + result = self._transform("type TestTypeB { testFieldCardinality: [String] @cardinality(min: 1, max: 5) }") + f = result["TestTypeB"]["properties"]["testFieldCardinality"] + assert f["minItems"] == 1 + assert f["maxItems"] == 5 + + def test_graphql_docstring_emitted_as_description(self) -> None: + result = self._transform('type TestTypeB { """A descriptive comment""" testFieldMeta: String }') + f = result["TestTypeB"]["properties"]["testFieldMeta"] + assert f["description"] == "A descriptive comment" + + def test_range_on_list_field_goes_into_items(self) -> None: + result = self._transform("type TestTypeB { testFieldList: [Float] @range(min: 1.0, max: 9.0) }") + f = result["TestTypeB"]["properties"]["testFieldList"] + assert "minimum" not in f + assert f["items"]["minimum"] == 1.0 + assert f["items"]["maximum"] == 9.0 + + def test_no_dollar_comment_in_output(self) -> None: + """$comment is not supported in MongoDB $jsonSchema.""" + result = self._transform('type TestTypeB { """note""" testFieldMeta: String }') + assert '"$comment"' not in json.dumps(result) + + +# --------------------------------------------------------------------------- +# Exclusions +# --------------------------------------------------------------------------- + + +class TestExclusions: + def test_query_type_excluded(self) -> None: + schema_str = "type Query { ping: String }\ntype TestTypeA { testFieldA: String }" + assert "Query" not in make_transformer(schema_str).transform() + + def test_mutation_type_excluded(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type Mutation { doSomething: String }\n" + "type TestTypeA { testFieldA: String }" + ) + assert "Mutation" not in make_transformer(schema_str).transform() + + def test_instance_tag_type_excluded(self) -> None: + schema_str = ( + "directive @instanceTag on OBJECT\n" + "type Query { ping: String }\n" + "type AnInstanceTag @instanceTag { row: String }\n" + "type TestTypeA { testFieldA: String }" + ) + assert "AnInstanceTag" not in make_transformer(schema_str).transform() + + def test_scalar_type_excluded_as_top_level(self) -> None: + schema_str = "scalar Int64\n" "type Query { ping: String }\n" "type TestTypeA { testField: Int64 }" + assert "Int64" not in make_transformer(schema_str).transform() + + def test_enum_type_excluded_as_top_level(self) -> None: + schema_str = ( + "enum AnEnumA { VALUE_1 VALUE_2 }\n" "type Query { ping: String }\n" "type TestTypeA { testField: AnEnumA }" + ) + assert "AnEnumA" not in make_transformer(schema_str).transform() + + +# --------------------------------------------------------------------------- +# Top-level schema structure +# --------------------------------------------------------------------------- + + +class TestJsonSchemaWrapping: + def test_each_type_has_bsontype_object(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type TestTypeA { testFieldA: String }\n" + "type TestTypeB { testFieldB: Int }" + ) + result = make_transform(schema_str) + for type_name in ("TestTypeA", "TestTypeB"): + assert type_name in result + assert result[type_name]["bsonType"] == "object" + + def test_no_json_schema_wrapper(self) -> None: + schema_str = "type Query { ping: String }\ntype TestTypeA { testFieldA: String }" + result = make_transform(schema_str) + assert "$jsonSchema" not in json.dumps(result) + + def test_no_ref_anywhere_in_output(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type TestTypeA { testFieldNested: TestTypeB }\n" + "type TestTypeB { testFieldA: String }" + ) + assert "$ref" not in json.dumps(make_transform(schema_str)) + + def test_no_dollar_schema_or_definitions_in_output(self) -> None: + schema_str = "type Query { ping: String }\ntype TestTypeA { testFieldA: String }" + dumped = json.dumps(make_transform(schema_str)) + assert '"$schema"' not in dumped + assert '"definitions"' not in dumped + + def test_no_integer_type_in_output(self) -> None: + """MongoDB does not support the JSON Schema 'integer' type; must use bsonType int/long.""" + schema_str = "type Query { ping: String }\ntype TestTypeA { testFieldA: Int! }" + dumped = json.dumps(make_transform(schema_str)) + assert '"type": "integer"' not in dumped + assert '"type":"integer"' not in dumped + + +# --------------------------------------------------------------------------- +# Circular reference detection +# --------------------------------------------------------------------------- + + +class TestCircularReference: + def test_circular_reference_raises_value_error(self) -> None: + schema_str = ( + "type Query { ping: String }\n" + "type TestTypeA { testFieldB: TestTypeB }\n" + "type TestTypeB { testFieldA: TestTypeA }\n" + ) + with pytest.raises(ValueError, match="Circular reference"): + make_transformer(schema_str).transform() + + def test_self_reference_raises_value_error(self) -> None: + schema_str = "type Query { ping: String }\n" "type TestTypeA { testFieldSelf: TestTypeA }\n" + with pytest.raises(ValueError, match="Circular reference"): + make_transformer(schema_str).transform() + + +# --------------------------------------------------------------------------- +# GeoJSON scalar with @geoType directive +# --------------------------------------------------------------------------- + + +class TestGeoJSON: + DIRECTIVES = ( + "enum GeoJSONShape { POINT MULTIPOINT LINESTRING MULTILINESTRING POLYGON MULTIPOLYGON }\n" + "directive @geoType(shape: GeoJSONShape!) on FIELD_DEFINITION\n" + "scalar GeoJSON\n" + "type Query { ping: String }\n" + ) + + def _transform(self, body: str) -> dict[str, dict[str, Any]]: + return make_transformer(self.DIRECTIVES + body).transform() + + # --- POINT --- + + def test_point_bsontype_object(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["bsonType"] == "object" + + def test_point_required_fields(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + f = result["TestTypeA"]["properties"]["geo"] + assert set(f["required"]) == {"type", "coordinates"} + + def test_point_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["Point"] + + def test_point_coordinates_array_of_double(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + coords = result["TestTypeA"]["properties"]["geo"]["properties"]["coordinates"] + assert coords["bsonType"] == "array" + assert coords["items"]["bsonType"] == "double" + assert coords["minItems"] == 2 + assert coords["maxItems"] == 2 + + def test_nullable_point_includes_null(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON @geoType(shape: POINT) }") + bson_t = result["TestTypeA"]["properties"]["geo"]["bsonType"] + assert isinstance(bson_t, list) + assert "object" in bson_t + assert "null" in bson_t + + def test_nullable_point_not_in_required(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON @geoType(shape: POINT) }") + assert "geo" not in result["TestTypeA"].get("required", []) + + def test_non_null_point_in_required(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + assert "geo" in result["TestTypeA"]["required"] + + # --- Shape-specific type enums --- + + def test_linestring_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: LINESTRING) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["LineString"] + + def test_polygon_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POLYGON) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["Polygon"] + + def test_multipoint_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: MULTIPOINT) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["MultiPoint"] + + def test_multilinestring_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: MULTILINESTRING) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["MultiLineString"] + + def test_multipolygon_type_enum(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: MULTIPOLYGON) }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["properties"]["type"]["enum"] == ["MultiPolygon"] + + # --- No @geoType → generic permissive schema --- + + def test_no_geo_type_directive_gives_permissive_schema(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! }") + f = result["TestTypeA"]["properties"]["geo"] + assert f["bsonType"] == "object" + assert "type" in f["properties"] + assert "coordinates" in f["properties"] + # No enum constraint on type + assert "enum" not in f["properties"]["type"] + + def test_no_geo_type_nullable_includes_null(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON }") + bson_t = result["TestTypeA"]["properties"]["geo"]["bsonType"] + assert isinstance(bson_t, list) + assert "null" in bson_t + + def test_geo_json_not_top_level_entry(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + assert "GeoJSON" not in result + + def test_no_ref_in_geojson_output(self) -> None: + result = self._transform("type TestTypeA { geo: GeoJSON! @geoType(shape: POINT) }") + assert "$ref" not in json.dumps(result) + + +# --------------------------------------------------------------------------- +# additionalProperties config +# --------------------------------------------------------------------------- + +_NESTED_SCHEMA = """ + type Query { q: String } + type Parent { child: Child nested: Nested } + type Child { name: String } + type Nested { value: Int } +""" + + +class TestAdditionalPropertiesConfig: + """Tests for --properties-config / additional_props_false behaviour.""" + + def _transform_with_cfg(self, schema_str: str, cfg: frozenset[str]) -> dict[str, dict[str, Any]]: + from graphql import build_schema + + return MongoDBTransformer(build_schema(schema_str), cfg).transform() + + def test_no_config_no_additional_properties_key(self) -> None: + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset()) + assert result["Parent"]["additionalProperties"] is True + assert result["Child"]["additionalProperties"] is True + + def test_bare_type_name_sets_top_level(self) -> None: + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset({"Parent"})) + assert result["Parent"]["additionalProperties"] is False + # child top-level entry is not in config → defaults to true + assert result["Child"]["additionalProperties"] is True + + def test_bare_type_name_child_unaffected(self) -> None: + """Top-level Child not listed → additionalProperties: true even when inlined in Parent.""" + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset({"Parent"})) + child_inline = cast(dict[str, Any], result["Parent"]["properties"]["child"]) + assert child_inline["additionalProperties"] is True + + def test_dot_path_applies_to_inline_object(self) -> None: + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset({"Parent.child"})) + child_inline = cast(dict[str, Any], result["Parent"]["properties"]["child"]) + assert child_inline["additionalProperties"] is False + + def test_dot_path_does_not_affect_top_level_child(self) -> None: + """Parent.child config → only the inline occurrence is affected; Child top-level gets true.""" + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset({"Parent.child"})) + # Child appears as top-level too; it gets true (default) + assert result["Child"]["additionalProperties"] is True + + def test_multiple_keys_independent(self) -> None: + result = self._transform_with_cfg(_NESTED_SCHEMA, frozenset({"Child", "Parent.nested"})) + # Child top-level gets additionalProperties: false + assert result["Child"]["additionalProperties"] is False + # Parent.nested inline gets it + nested_inline = cast(dict[str, Any], result["Parent"]["properties"]["nested"]) + assert nested_inline["additionalProperties"] is False + # Parent itself gets true (default) + assert result["Parent"]["additionalProperties"] is True + + def test_nullable_inline_also_gets_flag(self) -> None: + """Nullable nested objects are inlined via _get_type_schema which must propagate key.""" + schema_str = """ + type Query { q: String } + type Parent { child: Child } + type Child { x: String } + """ + result = self._transform_with_cfg(schema_str, frozenset({"Parent.child"})) + child_inline = cast(dict[str, Any], result["Parent"]["properties"]["child"]) + assert child_inline["additionalProperties"] is False + # bsonType must still be ["object", "null"] because field is nullable + assert "null" in child_inline["bsonType"] + + +class TestLoadPropertiesConfig: + """Tests for load_properties_config().""" + + def test_loads_bare_type_names(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("- Address\n- ChargingStation\n") + result = load_properties_config(f) + assert result == frozenset({"Address", "ChargingStation"}) + + def test_loads_dot_paths(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("- Address.street\n- ChargingStation.address\n") + result = load_properties_config(f) + assert result == frozenset({"Address.street", "ChargingStation.address"}) + + def test_mixed_entries(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("- Address\n- ChargingStation.address\n") + result = load_properties_config(f) + assert result == frozenset({"Address", "ChargingStation.address"}) + + def test_rejects_non_list_file(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("key: value\n") + with pytest.raises(ValueError, match="must be a YAML sequence"): + load_properties_config(f) + + def test_rejects_invalid_path_format(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("- a.b.c\n") + with pytest.raises(ValueError, match="Invalid properties-config entry"): + load_properties_config(f) + + def test_rejects_non_string_entry(self, tmp_path: Path) -> None: + from s2dm.exporters.mongodb.mongodb import load_properties_config + + f = tmp_path / "cfg.yaml" + f.write_text("- 42\n") + with pytest.raises(ValueError, match="must be strings"): + load_properties_config(f) + + +class TestPropertiesConfigValidation: + """Tests for schema-level validation of properties-config entries.""" + + def _validate(self, schema_str: str, cfg: frozenset[str]) -> None: + from graphql import build_schema + + MongoDBTransformer(build_schema(schema_str), cfg).transform() + + _SCHEMA = """ + type Query { q: String } + type Parent { child: Child } + type Child { name: String } + enum Status { ACTIVE INACTIVE } + """ + + def test_valid_bare_type_passes(self) -> None: + self._validate(self._SCHEMA, frozenset({"Parent"})) + + def test_valid_dot_path_passes(self) -> None: + self._validate(self._SCHEMA, frozenset({"Parent.child"})) + + def test_unknown_type_raises(self) -> None: + with pytest.raises(ValueError, match="'NonExistent': type 'NonExistent' does not exist"): + self._validate(self._SCHEMA, frozenset({"NonExistent"})) + + def test_unknown_field_raises(self) -> None: + with pytest.raises(ValueError, match="'Parent' has no field 'missing'"): + self._validate(self._SCHEMA, frozenset({"Parent.missing"})) + + def test_non_object_type_raises(self) -> None: + with pytest.raises(ValueError, match="'Status' is not an object or interface type"): + self._validate(self._SCHEMA, frozenset({"Status"})) + + def test_multiple_errors_reported_together(self) -> None: + with pytest.raises(ValueError) as exc_info: + self._validate(self._SCHEMA, frozenset({"NonExistent", "Parent.missing"})) + msg = str(exc_info.value) + assert "NonExistent" in msg + assert "Parent.missing" in msg + + def test_empty_config_skips_validation(self) -> None: + """Empty config must not raise even if called with an otherwise valid schema.""" + self._validate(self._SCHEMA, frozenset())