diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7c62a743b..4e3225a89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,10 +26,10 @@ jobs: with: submodules: true - - name: Set up Python 3.10 + - name: Set up Python 3.9 uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.9' cache: 'pip' cache-dependency-path: | requirements*.txt @@ -56,10 +56,10 @@ jobs: with: submodules: true - - name: Set up Python 3.10 + - name: Set up Python 3.9 uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.9' cache: 'pip' cache-dependency-path: | requirements*.txt @@ -227,27 +227,27 @@ jobs: run: pytest -rs -vvv --cov=./optimade/ --cov-report=xml --cov-append tests/adapters/ - name: Run tests for validator only to assess coverage (mongomock) - if: matrix.python-version == 3.10 + if: matrix.python-version == 3.9 run: pytest -rs --cov=./optimade/ --cov-report=xml:validator_cov.xml --cov-append tests/server/test_server_validation.py env: OPTIMADE_DATABASE_BACKEND: 'mongomock' - name: Run tests for validator only to assess coverage (Elasticsearch) - if: matrix.python-version == 3.10 + if: matrix.python-version == 3.9 run: pytest -rs --cov=./optimade/ --cov-report=xml:validator_cov.xml --cov-append tests/server/test_server_validation.py env: OPTIMADE_DATABASE_BACKEND: 'elastic' OPTIMADE_INSERT_TEST_DATA: false # Must be specified as previous steps will have already inserted the test data - name: Run tests for validator only to assess coverage (MongoDB) - if: matrix.python-version == 3.10 + if: matrix.python-version == 3.9 run: pytest -rs --cov=./optimade/ --cov-report=xml:validator_cov.xml --cov-append tests/server/test_server_validation.py env: OPTIMADE_DATABASE_BACKEND: 'mongodb' OPTIMADE_INSERT_TEST_DATA: false # Must be specified as previous steps will have already inserted the test data - name: Run the OPTIMADE Client CLI - if: matrix.python-version == 3.10 + if: matrix.python-version == 3.9 run: | coverage run --append --source optimade optimade/client/cli.py \ --filter 'nsites = 1' \ @@ -275,7 +275,7 @@ jobs: coverage xml - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' && github.repository == 'Materials-Consortia/optimade-python-tools' + if: matrix.python-version == '3.9' && github.repository == 'Materials-Consortia/optimade-python-tools' uses: codecov/codecov-action@v3 with: name: project @@ -283,7 +283,7 @@ jobs: flags: project - name: Upload validator coverage to Codecov - if: matrix.python-version == '3.10' && github.repository == 'Materials-Consortia/optimade-python-tools' + if: matrix.python-version == '3.9' && github.repository == 'Materials-Consortia/optimade-python-tools' uses: codecov/codecov-action@v3 with: name: validator @@ -300,7 +300,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.9' cache: 'pip' cache-dependency-path: | requirements*.txt @@ -330,10 +330,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.9 uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.9' cache: 'pip' cache-dependency-path: | requirements*.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e09132cd..5aa603b46 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ default_language_version: - python: python3.10 + python: python3.9 repos: - repo: https://github.com/ambv/black @@ -9,7 +9,7 @@ repos: name: Blacken - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-symlinks - id: check-yaml @@ -23,11 +23,17 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + args: ["--py39-plus"] + - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.291' + rev: 'v0.0.292' hooks: - id: ruff - args: [--fix] + args: [--fix, --exit-non-zero-on-fix] - repo: local hooks: @@ -46,7 +52,7 @@ repos: description: Update the API Reference documentation whenever a Python file is touched in the code base. - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 + rev: v1.6.0 hooks: - id: mypy name: "MyPy" diff --git a/docs/api_reference/adapters/jsonl.md b/docs/api_reference/adapters/jsonl.md new file mode 100644 index 000000000..6e9eeb43e --- /dev/null +++ b/docs/api_reference/adapters/jsonl.md @@ -0,0 +1,3 @@ +# jsonl + +::: optimade.adapters.jsonl diff --git a/docs/api_reference/models/partial_data.md b/docs/api_reference/models/partial_data.md new file mode 100644 index 000000000..07200e8b8 --- /dev/null +++ b/docs/api_reference/models/partial_data.md @@ -0,0 +1,5 @@ +# partial_data + +::: optimade.models.partial_data + options: + show_if_no_docstring: true diff --git a/docs/api_reference/server/mappers/partial_data.md b/docs/api_reference/server/mappers/partial_data.md new file mode 100644 index 000000000..fee81e882 --- /dev/null +++ b/docs/api_reference/server/mappers/partial_data.md @@ -0,0 +1,3 @@ +# partial_data + +::: optimade.server.mappers.partial_data diff --git a/docs/api_reference/server/routers/partial_data.md b/docs/api_reference/server/routers/partial_data.md new file mode 100644 index 000000000..c9e94cfa4 --- /dev/null +++ b/docs/api_reference/server/routers/partial_data.md @@ -0,0 +1,3 @@ +# partial_data + +::: optimade.server.routers.partial_data diff --git a/openapi/index_openapi.json b/openapi/index_openapi.json index 62c29ef4a..f346b6efd 100644 --- a/openapi/index_openapi.json +++ b/openapi/index_openapi.json @@ -467,6 +467,29 @@ "title": "BaseRelationshipResource", "description": "Minimum requirements to represent a relationship resource" }, + "EntryMetadata": { + "properties": { + "property_metadata": { + "type": "object", + "title": "Property Metadata", + "description": "An object containing per-entry and per-property metadata. The keys are the names of the fields in attributes for which metadata is available. The values belonging to these keys are dictionaries containing the relevant metadata fields. See also [Metadata properties](https://github.com/Materials-Consortia/OPTIMADE/blob/develop/optimade.rst#metadata-properties)" + }, + "partial_data_links": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/PartialDataLink" + }, + "type": "array" + }, + "type": "object", + "title": "Partial Data Links", + "description": "A dictionary, where the keys are the names of the properties in the attributes field for which the value is too large to be shared by default.\n For each property one or more links are provided from which the value of the attribute can be retrieved." + } + }, + "type": "object", + "title": "EntryMetadata", + "description": "Contains the metadata for the attributes of an entry" + }, "EntryRelationships": { "properties": { "references": { @@ -520,11 +543,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "allOf": [ @@ -1129,11 +1152,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "allOf": [ @@ -1393,6 +1416,29 @@ "title": "OptimadeError", "description": "detail MUST be present" }, + "PartialDataLink": { + "properties": { + "link": { + "type": "string", + "maxLength": 65536, + "minLength": 1, + "format": "uri", + "title": "Link", + "description": "String. A JSON API link that points to a location from which the omitted data can be fetched. There is no requirement on the syntax or format for the link URL." + }, + "format": { + "type": "string", + "title": "Format", + "description": "String. The name of the format provided via this link. For one of the objects this format field SHOULD have the value \"jsonlines\", which refers to the format in OPTIMADE JSON lines partial data format." + } + }, + "type": "object", + "required": [ + "link", + "format" + ], + "title": "PartialDataLink" + }, "Provider": { "properties": { "name": { diff --git a/openapi/openapi.json b/openapi/openapi.json index d66e0fc52..24f5e8a35 100644 --- a/openapi/openapi.json +++ b/openapi/openapi.json @@ -2,7 +2,7 @@ "openapi": "3.1.0", "info": { "title": "OPTIMADE API", - "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\n\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.3) v0.25.3.", + "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.3) v0.25.3.", "version": "1.1.0" }, "paths": { @@ -784,6 +784,17 @@ }, "name": "api_hint", "in": "query" + }, + { + "description": "A list of lists which contains a range for each dimension of the property.", + "required": false, + "schema": { + "type": "string", + "title": "Property Ranges", + "description": "A list of lists which contains a range for each dimension of the property." + }, + "name": "property_ranges", + "in": "query" } ], "responses": { @@ -1202,6 +1213,17 @@ }, "name": "api_hint", "in": "query" + }, + { + "description": "A list of lists which contains a range for each dimension of the property.", + "required": false, + "schema": { + "type": "string", + "title": "Property Ranges", + "description": "A list of lists which contains a range for each dimension of the property." + }, + "name": "property_ranges", + "in": "query" } ], "responses": { @@ -1309,6 +1331,183 @@ } } } + }, + "/partial_data/{entry_id}": { + "get": { + "tags": [ + "partial_data" + ], + "summary": "Get Partial Data", + "operationId": "get_partial_data_partial_data__entry_id__get", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "title": "Entry Id" + }, + "name": "entry_id", + "in": "path" + }, + { + "description": "The output format requested (see section Response Format).\nDefaults to the format string 'json', which specifies the standard output format described in this specification.\nExample: `http://example.com/v1/structures?response_format=xml`", + "required": false, + "schema": { + "type": "string", + "title": "Response Format", + "description": "The output format requested (see section Response Format).\nDefaults to the format string 'json', which specifies the standard output format described in this specification.\nExample: `http://example.com/v1/structures?response_format=xml`", + "default": "jsonlines" + }, + "name": "response_format", + "in": "query" + }, + { + "description": "An email address of the user making the request.\nThe email SHOULD be that of a person and not an automatic system.\nExample: `http://example.com/v1/structures?email_address=user@example.com`", + "required": false, + "schema": { + "type": "string", + "format": "email", + "title": "Email Address", + "description": "An email address of the user making the request.\nThe email SHOULD be that of a person and not an automatic system.\nExample: `http://example.com/v1/structures?email_address=user@example.com`", + "default": "" + }, + "name": "email_address", + "in": "query" + }, + { + "description": "If the client provides the parameter, the value SHOULD have the format `vMAJOR` or `vMAJOR.MINOR`, where MAJOR is a major version and MINOR is a minor version of the API. For example, if a client appends `api_hint=v1.0` to the query string, the hint provided is for major version 1 and minor version 0.", + "required": false, + "schema": { + "type": "string", + "pattern": "(v[0-9]+(\\.[0-9]+)?)?", + "title": "Api Hint", + "description": "If the client provides the parameter, the value SHOULD have the format `vMAJOR` or `vMAJOR.MINOR`, where MAJOR is a major version and MINOR is a minor version of the API. For example, if a client appends `api_hint=v1.0` to the query string, the hint provided is for major version 1 and minor version 0.", + "default": "" + }, + "name": "api_hint", + "in": "query" + }, + { + "description": "A comma-delimited set of fields to be provided in the output.\nIf provided, these fields MUST be returned along with the REQUIRED fields.\nOther OPTIONAL fields MUST NOT be returned when this parameter is present.\nExample: `http://example.com/v1/structures?response_fields=last_modified,nsites`", + "required": false, + "schema": { + "type": "string", + "pattern": "([a-z_][a-z_0-9]*(,[a-z_][a-z_0-9]*)*)?", + "title": "Response Fields", + "description": "A comma-delimited set of fields to be provided in the output.\nIf provided, these fields MUST be returned along with the REQUIRED fields.\nOther OPTIONAL fields MUST NOT be returned when this parameter is present.\nExample: `http://example.com/v1/structures?response_fields=last_modified,nsites`", + "default": "" + }, + "name": "response_fields", + "in": "query" + }, + { + "description": "A filter string, in the format described in section API Filtering Format Specification of the specification.", + "required": false, + "schema": { + "type": "string", + "title": "Filter", + "description": "A filter string, in the format described in section API Filtering Format Specification of the specification.", + "default": "" + }, + "name": "filter", + "in": "query" + }, + { + "description": "A list of lists which contains a range for each dimension of the property.", + "required": false, + "schema": { + "type": "string", + "title": "Property Ranges", + "description": "A list of lists which contains a range for each dimension of the property.", + "default": "" + }, + "name": "property_ranges", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/vnd.api+json": { + "schema": { + "title": "Response Get Partial Data Partial Data Entry Id Get" + } + } + } + }, + "400": { + "description": "Bad Request", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "404": { + "description": "Not Found", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "422": { + "description": "Unprocessable Entity", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "500": { + "description": "Internal Server Error", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "501": { + "description": "Not Implemented", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + }, + "553": { + "description": "Version Not Supported", + "content": { + "application/vnd.api+json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + } + } + } + } + } + } } }, "components": { @@ -1717,6 +1916,29 @@ "title": "EntryInfoResponse", "description": "errors are not allowed" }, + "EntryMetadata": { + "properties": { + "property_metadata": { + "type": "object", + "title": "Property Metadata", + "description": "An object containing per-entry and per-property metadata. The keys are the names of the fields in attributes for which metadata is available. The values belonging to these keys are dictionaries containing the relevant metadata fields. See also [Metadata properties](https://github.com/Materials-Consortia/OPTIMADE/blob/develop/optimade.rst#metadata-properties)" + }, + "partial_data_links": { + "additionalProperties": { + "items": { + "$ref": "#/components/schemas/PartialDataLink" + }, + "type": "array" + }, + "type": "object", + "title": "Partial Data Links", + "description": "A dictionary, where the keys are the names of the properties in the attributes field for which the value is too large to be shared by default.\n For each property one or more links are provided from which the value of the attribute can be retrieved." + } + }, + "type": "object", + "title": "EntryMetadata", + "description": "Contains the metadata for the attributes of an entry" + }, "EntryRelationships": { "properties": { "references": { @@ -1770,11 +1992,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "allOf": [ @@ -2238,11 +2460,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "allOf": [ @@ -2502,6 +2724,29 @@ "title": "OptimadeError", "description": "detail MUST be present" }, + "PartialDataLink": { + "properties": { + "link": { + "type": "string", + "maxLength": 65536, + "minLength": 1, + "format": "uri", + "title": "Link", + "description": "String. A JSON API link that points to a location from which the omitted data can be fetched. There is no requirement on the syntax or format for the link URL." + }, + "format": { + "type": "string", + "title": "Format", + "description": "String. The name of the format provided via this link. For one of the objects this format field SHOULD have the value \"jsonlines\", which refers to the format in OPTIMADE JSON lines partial data format." + } + }, + "type": "object", + "required": [ + "link", + "format" + ], + "title": "PartialDataLink" + }, "Periodicity": { "type": "integer", "enum": [ @@ -2656,11 +2901,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "$ref": "#/components/schemas/ReferenceResourceAttributes" @@ -3462,11 +3707,11 @@ "meta": { "allOf": [ { - "$ref": "#/components/schemas/Meta" + "$ref": "#/components/schemas/EntryMetadata" } ], "title": "Meta", - "description": "a meta object containing non-standard meta-information about a resource that can not be represented as an attribute or relationship." + "description": "A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata." }, "attributes": { "$ref": "#/components/schemas/StructureResourceAttributes" diff --git a/optimade/adapters/base.py b/optimade/adapters/base.py index f1d4bcb63..7d21d06dc 100644 --- a/optimade/adapters/base.py +++ b/optimade/adapters/base.py @@ -19,7 +19,7 @@ and [`StructureResource`][optimade.models.structures.StructureResource]s, respectively. """ import re -from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Callable, Optional, Union from pydantic import BaseModel # pylint: disable=no-name-in-module @@ -42,10 +42,10 @@ class EntryAdapter: """ - ENTRY_RESOURCE: Type[EntryResource] = EntryResource - _type_converters: Dict[str, Callable] = {} - _type_ingesters: Dict[str, Callable] = {} - _type_ingesters_by_type: Dict[str, Type] = {} + ENTRY_RESOURCE: type[EntryResource] = EntryResource + _type_converters: dict[str, Callable] = {} + _type_ingesters: dict[str, Callable] = {} + _type_ingesters_by_type: dict[str, type] = {} def __init__(self, entry: dict) -> None: """ @@ -53,7 +53,7 @@ def __init__(self, entry: dict) -> None: entry (dict): A JSON OPTIMADE single resource entry. """ self._entry: Optional[EntryResource] = None - self._converted: Dict[str, Any] = {} + self._converted: dict[str, Any] = {} self.entry: EntryResource = entry # type: ignore[assignment] @@ -164,7 +164,7 @@ def ingest_from(cls, data: Any, format: Optional[str] = None) -> Any: @staticmethod def _get_model_attributes( - starting_instances: Union[Tuple[BaseModel, ...], List[BaseModel]], name: str + starting_instances: Union[tuple[BaseModel, ...], list[BaseModel]], name: str ) -> Any: """Helper method for retrieving the OPTIMADE model's attribute, supporting "."-nested attributes""" for res in starting_instances: diff --git a/optimade/adapters/jsonl.py b/optimade/adapters/jsonl.py new file mode 100644 index 000000000..9f7bfac21 --- /dev/null +++ b/optimade/adapters/jsonl.py @@ -0,0 +1,42 @@ +from io import BufferedReader, BytesIO +from pathlib import Path +from typing import Union + +from jsonlines import Reader, Writer + +from optimade.models.partial_data import PartialDataResource + + +def to_jsonl(input_data: Union[list[dict], PartialDataResource]) -> bytes: + """This function convert a list of dictionaries to the JSONL format which can be sent back in an OPTIMADE partial data response""" + temp_file = BytesIO() + writer = Writer(temp_file) + if isinstance(input_data, PartialDataResource): + writer.write(input_data.header) + input_data = input_data.data + if isinstance(input_data, list): + writer.write_all(input_data) + else: + writer.write(input_data) + writer.close() + file_content = temp_file.getvalue() + temp_file.close() + return file_content + + +def from_jsonl( + jsonl_input: Union[Path, str, bytes] +) -> Union[list, PartialDataResource]: + if isinstance(jsonl_input, (Path, str)): + fp: Union[BytesIO, BufferedReader] = open(jsonl_input, "rb") + else: + fp = BytesIO(jsonl_input) + decoded = [] + reader = Reader(fp) + for obj in reader: + decoded.append( + obj + ) # Appending is slow, so it would be better to use a more efficient method + reader.close() + fp.close() + return decoded diff --git a/optimade/adapters/references/adapter.py b/optimade/adapters/references/adapter.py index ef03e5396..cd3ebbae6 100644 --- a/optimade/adapters/references/adapter.py +++ b/optimade/adapters/references/adapter.py @@ -1,5 +1,3 @@ -from typing import Type - from optimade.adapters.base import EntryAdapter from optimade.models import ReferenceResource @@ -21,4 +19,4 @@ class Reference(EntryAdapter): """ - ENTRY_RESOURCE: Type[ReferenceResource] = ReferenceResource + ENTRY_RESOURCE: type[ReferenceResource] = ReferenceResource diff --git a/optimade/adapters/structures/adapter.py b/optimade/adapters/structures/adapter.py index 4ae622497..f7641c437 100644 --- a/optimade/adapters/structures/adapter.py +++ b/optimade/adapters/structures/adapter.py @@ -1,4 +1,4 @@ -from typing import Callable, Dict, Type +from typing import Callable from optimade.adapters.base import EntryAdapter from optimade.models import StructureResource @@ -44,8 +44,8 @@ class Structure(EntryAdapter): """ - ENTRY_RESOURCE: Type[StructureResource] = StructureResource - _type_converters: Dict[str, Callable] = { + ENTRY_RESOURCE: type[StructureResource] = StructureResource + _type_converters: dict[str, Callable] = { "aiida_structuredata": get_aiida_structure_data, "ase": get_ase_atoms, "cif": get_cif, @@ -55,12 +55,12 @@ class Structure(EntryAdapter): "jarvis": get_jarvis_atoms, } - _type_ingesters: Dict[str, Callable] = { + _type_ingesters: dict[str, Callable] = { "pymatgen": from_pymatgen, "ase": from_ase_atoms, } - _type_ingesters_by_type: Dict[str, Type] = { + _type_ingesters_by_type: dict[str, type] = { "pymatgen": PymatgenStructure, "ase": ASEAtoms, } diff --git a/optimade/adapters/structures/aiida.py b/optimade/adapters/structures/aiida.py index f65c0babb..8bfe1e297 100644 --- a/optimade/adapters/structures/aiida.py +++ b/optimade/adapters/structures/aiida.py @@ -7,7 +7,7 @@ This conversion function relies on the [`aiida-core`](https://github.com/aiidateam/aiida-core) package. """ -from typing import List, Optional +from typing import Optional from warnings import warn from optimade.adapters.structures.utils import pad_cell, species_from_species_at_sites @@ -48,7 +48,7 @@ def get_aiida_structure_data(optimade_structure: OptimadeStructure) -> Structure structure = StructureData(cell=lattice_vectors) # If species not provided, infer data from species_at_sites - species: Optional[List[OptimadeStructureSpecies]] = attributes.species + species: Optional[list[OptimadeStructureSpecies]] = attributes.species if not species: species = species_from_species_at_sites(attributes.species_at_sites) # type: ignore[arg-type] diff --git a/optimade/adapters/structures/ase.py b/optimade/adapters/structures/ase.py index e049663d6..5c8c09e65 100644 --- a/optimade/adapters/structures/ase.py +++ b/optimade/adapters/structures/ase.py @@ -7,7 +7,6 @@ For more information on the ASE code see [their documentation](https://wiki.fysik.dtu.dk/ase/). """ -from typing import Dict from optimade.adapters.exceptions import ConversionError from optimade.adapters.structures.utils import ( @@ -66,7 +65,7 @@ def get_ase_atoms(optimade_structure: OptimadeStructure) -> Atoms: if not species: species = species_from_species_at_sites(attributes.species_at_sites) # type: ignore[arg-type] - optimade_species: Dict[str, OptimadeStructureSpecies] = {_.name: _ for _ in species} + optimade_species: dict[str, OptimadeStructureSpecies] = {_.name: _ for _ in species} # Since we've made sure there are no species with more than 1 chemical symbol, # asking for index 0 will always work. diff --git a/optimade/adapters/structures/cif.py b/optimade/adapters/structures/cif.py index cbe35901c..295c6e9f7 100644 --- a/optimade/adapters/structures/cif.py +++ b/optimade/adapters/structures/cif.py @@ -16,7 +16,6 @@ This conversion function relies on the [NumPy](https://numpy.org/) library. """ -from typing import Dict from optimade.adapters.structures.utils import ( cell_to_cellpar, @@ -123,11 +122,11 @@ def get_cif( # pylint: disable=too-many-locals,too-many-branches else: sites = attributes.cartesian_site_positions - species: Dict[str, OptimadeStructureSpecies] = { + species: dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species # type: ignore[union-attr] } - symbol_occurences: Dict[str, int] = {} + symbol_occurences: dict[str, int] = {} for site_number in range(attributes.nsites): # type: ignore[arg-type] species_name = attributes.species_at_sites[site_number] # type: ignore[index] site = sites[site_number] diff --git a/optimade/adapters/structures/proteindatabank.py b/optimade/adapters/structures/proteindatabank.py index f2e699408..b6ae96549 100644 --- a/optimade/adapters/structures/proteindatabank.py +++ b/optimade/adapters/structures/proteindatabank.py @@ -21,7 +21,6 @@ Warning: Currently, the PDBx/mmCIF conversion function is not parsing as a complete PDBx/mmCIF file. """ -from typing import Dict try: import numpy as np @@ -164,7 +163,7 @@ def get_pdbx_mmcif( # pylint: disable=too-many-locals else: sites = attributes.cartesian_site_positions - species: Dict[str, OptimadeStructureSpecies] = { + species: dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species # type: ignore[union-attr] } @@ -240,7 +239,7 @@ def get_pdb( # pylint: disable=too-many-locals pdb += "MODEL 1\n" - species: Dict[str, OptimadeStructureSpecies] = { + species: dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species # type:ignore[union-attr] } diff --git a/optimade/adapters/structures/pymatgen.py b/optimade/adapters/structures/pymatgen.py index 8d46bf7fc..f6f62a7ad 100644 --- a/optimade/adapters/structures/pymatgen.py +++ b/optimade/adapters/structures/pymatgen.py @@ -7,7 +7,7 @@ For more information on the pymatgen code see [their documentation](https://pymatgen.org). """ -from typing import Dict, List, Optional, Union +from typing import Optional, Union from optimade.adapters.structures.utils import ( species_from_species_at_sites, @@ -105,9 +105,9 @@ def _get_molecule(optimade_structure: OptimadeStructure) -> Molecule: def _pymatgen_species( nsites: int, - species: Optional[List[OptimadeStructureSpecies]], - species_at_sites: List[str], -) -> List[Dict[str, float]]: + species: Optional[list[OptimadeStructureSpecies]], + species_at_sites: list[str], +) -> list[dict[str, float]]: """ Create list of {"symbol": "concentration"} per site for values to pymatgen species parameters. Remove vacancies, if they are present. diff --git a/optimade/adapters/structures/utils.py b/optimade/adapters/structures/utils.py index 2b36570bd..3461d49dd 100644 --- a/optimade/adapters/structures/utils.py +++ b/optimade/adapters/structures/utils.py @@ -3,7 +3,8 @@ Most of these functions rely on the [NumPy](https://numpy.org/) library. """ -from typing import Iterable, List, Optional, Tuple, Type +from collections.abc import Iterable +from typing import Optional from optimade.models.structures import Species as OptimadeStructureSpecies from optimade.models.structures import Vector3D @@ -19,7 +20,7 @@ NUMPY_NOT_FOUND = "NumPy not found, cannot convert structure to your desired format" -def valid_lattice_vector(lattice_vec: Tuple[Vector3D, Vector3D, Vector3D]): +def valid_lattice_vector(lattice_vec: tuple[Vector3D, Vector3D, Vector3D]): if len(lattice_vec) != 3: return False for vector in lattice_vec: @@ -31,8 +32,8 @@ def valid_lattice_vector(lattice_vec: Tuple[Vector3D, Vector3D, Vector3D]): def scaled_cell( - cell: Tuple[Vector3D, Vector3D, Vector3D] -) -> Tuple[Vector3D, Vector3D, Vector3D]: + cell: tuple[Vector3D, Vector3D, Vector3D] +) -> tuple[Vector3D, Vector3D, Vector3D]: """Return a scaled 3x3 cell from cartesian 3x3 cell (`lattice_vectors`). This 3x3 matrix can be used to calculate the fractional coordinates from the cartesian_site_positions. @@ -62,8 +63,8 @@ def scaled_cell( def fractional_coordinates( - cell: Tuple[Vector3D, Vector3D, Vector3D], cartesian_positions: List[Vector3D] -) -> List[Vector3D]: + cell: tuple[Vector3D, Vector3D, Vector3D], cartesian_positions: list[Vector3D] +) -> list[Vector3D]: """Returns fractional coordinates and wraps coordinates to `[0,1[`. Note: @@ -101,8 +102,8 @@ def fractional_coordinates( def cell_to_cellpar( - cell: Tuple[Vector3D, Vector3D, Vector3D], radians: bool = False -) -> List[float]: + cell: tuple[Vector3D, Vector3D, Vector3D], radians: bool = False +) -> list[float]: """Returns the cell parameters `[a, b, c, alpha, beta, gamma]`. Angles are in degrees unless `radian=True` is used. @@ -161,10 +162,10 @@ def unit_vector(x: Vector3D) -> Vector3D: def cellpar_to_cell( - cellpar: List[float], - ab_normal: Tuple[int, int, int] = (0, 0, 1), - a_direction: Optional[Tuple[int, int, int]] = None, -) -> List[Vector3D]: + cellpar: list[float], + ab_normal: tuple[int, int, int] = (0, 0, 1), + a_direction: Optional[tuple[int, int, int]] = None, +) -> list[Vector3D]: """Return a 3x3 cell matrix from `cellpar=[a,b,c,alpha,beta,gamma]`. Angles must be in degrees. @@ -277,9 +278,9 @@ def cellpar_to_cell( def _pad_iter_of_iters( iterable: Iterable[Iterable], padding: Optional[float] = None, - outer: Optional[Type] = None, - inner: Optional[Type] = None, -) -> Tuple[Iterable[Iterable], bool]: + outer: Optional[type] = None, + inner: Optional[type] = None, +) -> tuple[Iterable[Iterable], bool]: """Turn any null/None values into a float in given iterable of iterables""" try: padding = float(padding) # type: ignore[arg-type] @@ -306,7 +307,7 @@ def _pad_iter_of_iters( def pad_cell( - lattice_vectors: Tuple[Vector3D, Vector3D, Vector3D], + lattice_vectors: tuple[Vector3D, Vector3D, Vector3D], padding: Optional[float] = None, ) -> tuple: # Setting this properly makes MkDocs fail. """Turn any `null`/`None` values into a `float` in given `tuple` of @@ -333,8 +334,8 @@ def pad_cell( def species_from_species_at_sites( - species_at_sites: List[str], -) -> List[OptimadeStructureSpecies]: + species_at_sites: list[str], +) -> list[OptimadeStructureSpecies]: """When a list of species dictionaries is not provided, this function can be used to infer the species from the provided species_at_sites. @@ -357,7 +358,7 @@ def species_from_species_at_sites( ] -def elements_ratios_from_species_at_sites(species_at_sites: List[str]) -> List[float]: +def elements_ratios_from_species_at_sites(species_at_sites: list[str]) -> list[float]: """Compute the OPTIMADE `elements_ratios` field from `species_at_sites` in the case where `species_at_sites` refers to sites wholly occupied by the given elements, e.g., not arbitrary species labels or with partial/mixed occupancy. diff --git a/optimade/client/cli.py b/optimade/client/cli.py index d4f9a923c..2c02af6a4 100644 --- a/optimade/client/cli.py +++ b/optimade/client/cli.py @@ -166,13 +166,13 @@ def _get( base_urls=base_url, use_async=use_async, max_results_per_provider=max_results_per_provider, - include_providers=set(_.strip() for _ in include_providers.split(",")) + include_providers={_.strip() for _ in include_providers.split(",")} if include_providers else None, - exclude_providers=set(_.strip() for _ in exclude_providers.split(",")) + exclude_providers={_.strip() for _ in exclude_providers.split(",")} if exclude_providers else None, - exclude_databases=set(_.strip() for _ in exclude_databases.split(",")) + exclude_databases={_.strip() for _ in exclude_databases.split(",")} if exclude_databases else None, silent=silent, @@ -211,13 +211,15 @@ def _get( if not output_file: if pretty_print: - rich.print_json(data=results, indent=2, default=lambda _: _.dict()) + rich.print_json(data=results, indent=2, default=lambda _: _.asdict()) else: - sys.stdout.write(json.dumps(results, indent=2, default=lambda _: _.dict())) + sys.stdout.write( + json.dumps(results, indent=2, default=lambda _: _.asdict()) + ) if output_file: with open(output_file, "w") as f: - json.dump(results, f, indent=2, default=lambda _: _.dict()) + json.dump(results, f, indent=2, default=lambda _: _.asdict()) if __name__ == "__main__": diff --git a/optimade/client/client.py b/optimade/client/client.py index 1efacca15..3dc7d5fd6 100644 --- a/optimade/client/client.py +++ b/optimade/client/client.py @@ -10,18 +10,8 @@ import json import time from collections import defaultdict -from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Set, - Tuple, - Type, - Union, -) +from collections.abc import Iterable +from typing import Any, Callable, Optional, Union from urllib.parse import urlparse # External deps that are only used in the client code @@ -70,12 +60,12 @@ class OptimadeClient: base_urls: Union[str, Iterable[str]] """A list (or any iterable) of OPTIMADE base URLs to query.""" - all_results: Dict[str, Dict[str, Dict[str, QueryResults]]] = defaultdict(dict) + all_results: dict[str, dict[str, dict[str, QueryResults]]] = defaultdict(dict) """A nested dictionary keyed by endpoint and OPTIMADE filter string that contains the results from each base URL for that particular filter. """ - count_results: Dict[str, Dict[str, Dict[str, int]]] = defaultdict(dict) + count_results: dict[str, dict[str, dict[str, int]]] = defaultdict(dict) """A nested dictionary keyed by endpoint and OPTIMADE filter string that contains the number of results from each base URL for that particular filter. """ @@ -85,12 +75,12 @@ class OptimadeClient: download all. """ - property_lists: Dict[str, Dict[str, List[str]]] = defaultdict(dict) + property_lists: dict[str, dict[str, list[str]]] = defaultdict(dict) """A dictionary containing list of properties served by each database, broken down by entry type, then database. """ - headers: Dict = {"User-Agent": f"optimade-python-tools/{__version__}"} + headers: dict = {"User-Agent": f"optimade-python-tools/{__version__}"} """Additional HTTP headers.""" http_timeout: httpx.Timeout = httpx.Timeout(10.0, read=1000.0) @@ -102,7 +92,7 @@ class OptimadeClient: use_async: bool """Whether or not to make all requests asynchronously using asyncio.""" - callbacks: Optional[List[Callable[[str, Dict], Union[None, Dict]]]] = None + callbacks: Optional[list[Callable[[str, dict], Union[None, dict]]]] = None """A list of callbacks to execute after each successful request, used to e.g., write to a file, add results to a database or perform additional filtering. @@ -121,13 +111,13 @@ class OptimadeClient: silent: bool """Whether to disable progress bar printing.""" - _excluded_providers: Optional[Set[str]] = None + _excluded_providers: Optional[set[str]] = None """A set of providers IDs excluded from future queries.""" - _included_providers: Optional[Set[str]] = None + _included_providers: Optional[set[str]] = None """A set of providers IDs included from future queries.""" - _excluded_databases: Optional[Set[str]] = None + _excluded_databases: Optional[set[str]] = None """A set of child database URLs excluded from future queries.""" __current_endpoint: Optional[str] = None @@ -135,7 +125,7 @@ class OptimadeClient: chosen endpoint. Should be reset to `None` outside of all `get()` calls.""" _http_client: Optional[ - Union[Type[httpx.AsyncClient], Type[requests.Session]] + Union[type[httpx.AsyncClient], type[requests.Session]] ] = None """Override the HTTP client class, primarily used for testing.""" @@ -148,18 +138,18 @@ def __init__( self, base_urls: Optional[Union[str, Iterable[str]]] = None, max_results_per_provider: int = 1000, - headers: Optional[Dict] = None, + headers: Optional[dict] = None, http_timeout: Optional[Union[httpx.Timeout, float]] = None, max_attempts: int = 5, use_async: bool = True, silent: bool = False, - exclude_providers: Optional[List[str]] = None, - include_providers: Optional[List[str]] = None, - exclude_databases: Optional[List[str]] = None, + exclude_providers: Optional[list[str]] = None, + include_providers: Optional[list[str]] = None, + exclude_databases: Optional[list[str]] = None, http_client: Optional[ - Union[Type[httpx.AsyncClient], Type[requests.Session]] + Union[type[httpx.AsyncClient], type[requests.Session]] ] = None, - callbacks: Optional[List[Callable[[str, Dict], Union[None, Dict]]]] = None, + callbacks: Optional[list[Callable[[str, dict], Union[None, dict]]]] = None, ): """Create the OPTIMADE client object. @@ -284,9 +274,9 @@ def get( self, filter: Optional[str] = None, endpoint: Optional[str] = None, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, - ) -> Dict[str, Dict[str, Dict[str, Dict]]]: + ) -> dict[str, dict[str, dict[str, dict]]]: """Gets the results from the endpoint and filter across the defined OPTIMADE APIs. @@ -338,11 +328,11 @@ def get( sort=sort, ) self.all_results[endpoint][filter] = results - return {endpoint: {filter: {k: results[k].dict() for k in results}}} + return {endpoint: {filter: {k: results[k].asdict() for k in results}}} def count( self, filter: Optional[str] = None, endpoint: Optional[str] = None - ) -> Dict[str, Dict[str, Dict[str, Optional[int]]]]: + ) -> dict[str, dict[str, dict[str, Optional[int]]]]: """Counts the number of results for the filter, requiring only 1 request per provider by making use of the `meta->data_returned` key. @@ -405,7 +395,7 @@ def count( def list_properties( self, entry_type: str, - ) -> Dict[str, List[str]]: + ) -> dict[str, list[str]]: """Returns the list of properties reported at `/info/` for the given entry type, for each database. @@ -437,7 +427,7 @@ def list_properties( ) return self.property_lists[entry_type] - def search_property(self, query: str, entry_type: str) -> Dict[str, List[str]]: + def search_property(self, query: str, entry_type: str) -> dict[str, list[str]]: """Searches for the query substring within the listed properties served by each database. @@ -453,7 +443,7 @@ def search_property(self, query: str, entry_type: str) -> Dict[str, List[str]]: if not self.property_lists: self.list_properties(entry_type=entry_type) - matching_properties: Dict[str, Dict[str, List[str]]] = { + matching_properties: dict[str, dict[str, list[str]]] = { entry_type: defaultdict(list) } if entry_type in self.property_lists: @@ -469,9 +459,9 @@ def _execute_queries( endpoint: str, page_limit: Optional[int], paginate: bool, - response_fields: Optional[List[str]], + response_fields: Optional[list[str]], sort: Optional[str], - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """Executes the queries over the base URLs either asynchronously or serially, depending on the `self.use_async` setting. @@ -535,11 +525,11 @@ def get_one( endpoint: str, filter: str, base_url: str, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, page_limit: Optional[int] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """Executes the query synchronously on one API. Parameters: @@ -582,11 +572,11 @@ async def _get_all_async( self, endpoint: str, filter: str, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, page_limit: Optional[int] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """Executes the query asynchronously across all defined APIs. Parameters: @@ -626,10 +616,10 @@ def _get_all( endpoint: str, filter: str, page_limit: Optional[int] = None, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """Executes the query synchronously across all defined APIs. Parameters: @@ -670,11 +660,11 @@ async def get_one_async( endpoint: str, filter: str, base_url: str, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, page_limit: Optional[int] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """Executes the query asynchronously on one API. !!! note @@ -725,11 +715,11 @@ async def _get_one_async( endpoint: str, filter: str, base_url: str, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, page_limit: Optional[int] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """See [`OptimadeClient.get_one_async`][optimade.client.OptimadeClient.get_one_async].""" next_url, _task = self._setup( endpoint=endpoint, @@ -785,9 +775,9 @@ def _get_one( base_url: str, sort: Optional[str] = None, page_limit: Optional[int] = None, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, paginate: bool = True, - ) -> Dict[str, QueryResults]: + ) -> dict[str, QueryResults]: """See [`OptimadeClient.get_one`][optimade.client.OptimadeClient.get_one].""" next_url, _task = self._setup( endpoint=endpoint, @@ -846,9 +836,9 @@ def _setup( base_url: str, filter: str, page_limit: Optional[int], - response_fields: Optional[List[str]], + response_fields: Optional[list[str]], sort: Optional[str], - ) -> Tuple[str, TaskID]: + ) -> tuple[str, TaskID]: """Constructs the first query URL and creates the progress bar task. Returns: @@ -876,7 +866,7 @@ def _build_url( endpoint: Optional[str] = "structures", version: Optional[str] = None, filter: Optional[str] = None, - response_fields: Optional[List[str]] = None, + response_fields: Optional[list[str]] = None, sort: Optional[str] = None, page_limit: Optional[int] = None, ) -> str: @@ -957,7 +947,7 @@ def _check_filter(self, filter: str, endpoint: str) -> None: def _handle_response( self, response: Union[httpx.Response, requests.Response], _task: TaskID - ) -> Tuple[Dict[str, Any], str]: + ) -> tuple[dict[str, Any], str]: """Handle the response from the server. Parameters: @@ -1036,8 +1026,8 @@ def _teardown(self, _task: TaskID, num_results: int) -> None: ) def _execute_callbacks( - self, results: Dict, response: Union[httpx.Response, requests.Response] - ) -> Union[None, Dict]: + self, results: dict, response: Union[httpx.Response, requests.Response] + ) -> Union[None, dict]: """Execute any callbacks registered with the client. Parameters: diff --git a/optimade/client/utils.py b/optimade/client/utils.py index bfa139b9d..6aa9f3d43 100644 --- a/optimade/client/utils.py +++ b/optimade/client/utils.py @@ -1,7 +1,7 @@ import sys from contextlib import contextmanager from dataclasses import asdict, dataclass, field -from typing import Dict, List, Set, Union +from typing import Union from rich.console import Console from rich.progress import ( @@ -34,22 +34,22 @@ class TooManyRequestsException(RecoverableHTTPError): class QueryResults: """A container dataclass for the results from a given query.""" - data: Union[Dict, List[Dict]] = field(default_factory=list, init=False) # type: ignore[assignment] - errors: List[str] = field(default_factory=list, init=False) - links: Dict = field(default_factory=dict, init=False) - included: List[Dict] = field(default_factory=list, init=False) - meta: Dict = field(default_factory=dict, init=False) + data: Union[dict, list[dict]] = field(default_factory=list, init=False) # type: ignore[assignment] + errors: list[str] = field(default_factory=list, init=False) + links: dict = field(default_factory=dict, init=False) + included: list[dict] = field(default_factory=list, init=False) + meta: dict = field(default_factory=dict, init=False) @property - def included_index(self) -> Set[str]: + def included_index(self) -> set[str]: if not getattr(self, "_included_index", None): - self._included_index: Set[str] = set() + self._included_index: set[str] = set() return self._included_index - def dict(self): + def asdict(self): return asdict(self) - def update(self, page_results: Dict) -> None: + def update(self, page_results: dict) -> None: """Combine the results from one page with the existing results for a given query. Parameters: diff --git a/optimade/exceptions.py b/optimade/exceptions.py index 2a75cfd5b..4274c1c8d 100644 --- a/optimade/exceptions.py +++ b/optimade/exceptions.py @@ -1,5 +1,5 @@ from abc import ABC -from typing import Any, Dict, Optional, Tuple, Type +from typing import Any, Optional __all__ = ( "OptimadeHTTPException", @@ -34,7 +34,7 @@ class OptimadeHTTPException(Exception, ABC): status_code: int title: str detail: Optional[str] = None - headers: Optional[Dict[str, Any]] = None + headers: Optional[dict[str, Any]] = None def __init__( self, detail: Optional[str] = None, headers: Optional[dict] = None @@ -104,7 +104,7 @@ class NotImplementedResponse(OptimadeHTTPException): """A tuple of the possible errors that can be returned by an OPTIMADE API.""" -POSSIBLE_ERRORS: Tuple[Type[OptimadeHTTPException], ...] = ( +POSSIBLE_ERRORS: tuple[type[OptimadeHTTPException], ...] = ( BadRequest, Forbidden, NotFound, diff --git a/optimade/filterparser/lark_parser.py b/optimade/filterparser/lark_parser.py index 01d5044cd..67d159916 100644 --- a/optimade/filterparser/lark_parser.py +++ b/optimade/filterparser/lark_parser.py @@ -5,7 +5,7 @@ """ from pathlib import Path -from typing import Dict, Optional, Tuple +from typing import Optional from lark import Lark, Tree @@ -20,7 +20,7 @@ class ParserError(Exception): """ -def get_versions() -> Dict[Tuple[int, int, int], Dict[str, Path]]: +def get_versions() -> dict[tuple[int, int, int], dict[str, Path]]: """Find grammar files within this package's grammar directory, returning a dictionary broken down by scraped grammar version (major, minor, patch) and variant (a string tag). @@ -29,10 +29,10 @@ def get_versions() -> Dict[Tuple[int, int, int], Dict[str, Path]]: A mapping from version, variant to grammar file name. """ - dct: Dict[Tuple[int, int, int], Dict[str, Path]] = {} + dct: dict[tuple[int, int, int], dict[str, Path]] = {} for filename in Path(__file__).parent.joinpath("../grammar").glob("*.lark"): tags = filename.stem.lstrip("v").split(".") - version: Tuple[int, int, int] = (int(tags[0]), int(tags[1]), int(tags[2])) + version: tuple[int, int, int] = (int(tags[0]), int(tags[1]), int(tags[2])) variant: str = "default" if len(tags) == 3 else str(tags[-1]) if version not in dct: dct[version] = {} @@ -50,7 +50,7 @@ class LarkParser: """ def __init__( - self, version: Optional[Tuple[int, int, int]] = None, variant: str = "default" + self, version: Optional[tuple[int, int, int]] = None, variant: str = "default" ): """For a given version and variant, try to load the corresponding grammar. diff --git a/optimade/filtertransformers/base_transformer.py b/optimade/filtertransformers/base_transformer.py index 2bb89086c..9e9928be0 100644 --- a/optimade/filtertransformers/base_transformer.py +++ b/optimade/filtertransformers/base_transformer.py @@ -7,7 +7,7 @@ import abc import warnings -from typing import Any, Dict, Optional, Type +from typing import Any, Optional from lark import Transformer, Tree, v_args @@ -79,8 +79,8 @@ class BaseTransformer(Transformer, abc.ABC): """ - mapper: Optional[Type[BaseResourceMapper]] = None - operator_map: Dict[str, Optional[str]] = { + mapper: Optional[type[BaseResourceMapper]] = None + operator_map: dict[str, Optional[str]] = { "<": None, "<=": None, ">": None, @@ -100,11 +100,11 @@ class BaseTransformer(Transformer, abc.ABC): "!=": "!=", } - _quantity_type: Type[Quantity] = Quantity + _quantity_type: type[Quantity] = Quantity _quantities = None def __init__( - self, mapper: Optional[Type[BaseResourceMapper]] = None + self, mapper: Optional[type[BaseResourceMapper]] = None ): # pylint: disable=super-init-not-called """Initialise the transformer object, optionally loading in a resource mapper for use when post-processing. @@ -113,7 +113,7 @@ def __init__( self.mapper = mapper @property - def backend_mapping(self) -> Dict[str, Quantity]: + def backend_mapping(self) -> dict[str, Quantity]: """A mapping between backend field names (aliases) and the corresponding [`Quantity`][optimade.filtertransformers.base_transformer.Quantity] object. """ @@ -122,7 +122,7 @@ def backend_mapping(self) -> Dict[str, Quantity]: } @property - def quantities(self) -> Dict[str, Quantity]: + def quantities(self) -> dict[str, Quantity]: """A mapping from the OPTIMADE field name to the corresponding [`Quantity`][optimade.filtertransformers.base_transformer.Quantity] objects. """ @@ -132,10 +132,10 @@ def quantities(self) -> Dict[str, Quantity]: return self._quantities @quantities.setter - def quantities(self, quantities: Dict[str, Quantity]) -> None: + def quantities(self, quantities: dict[str, Quantity]) -> None: self._quantities = quantities - def _build_quantities(self) -> Dict[str, Quantity]: + def _build_quantities(self) -> dict[str, Quantity]: """Creates a dictionary of field names mapped to [`Quantity`][optimade.filtertransformers.base_transformer.Quantity] objects from the fields registered by the mapper. diff --git a/optimade/filtertransformers/elasticsearch.py b/optimade/filtertransformers/elasticsearch.py index 195e77748..54d83d0fe 100644 --- a/optimade/filtertransformers/elasticsearch.py +++ b/optimade/filtertransformers/elasticsearch.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Type, Union +from typing import Optional, Union from elasticsearch_dsl import Field, Integer, Keyword, Q, Text from lark import v_args @@ -97,12 +97,12 @@ class ElasticTransformer(BaseTransformer): ">=": "gte", } - _quantity_type: Type[ElasticsearchQuantity] = ElasticsearchQuantity + _quantity_type: type[ElasticsearchQuantity] = ElasticsearchQuantity def __init__( self, - mapper: Type[BaseResourceMapper], - quantities: Optional[Dict[str, Quantity]] = None, + mapper: type[BaseResourceMapper], + quantities: Optional[dict[str, Quantity]] = None, ): if quantities is not None: self.quantities = quantities @@ -143,7 +143,7 @@ def _field( return quantity if nested is not None: - return "%s.%s" % (nested.backend_field, quantity.name) # type: ignore[union-attr] + return f"{nested.backend_field}.{quantity.name}" # type: ignore[union-attr] return quantity.backend_field # type: ignore[union-attr, return-value] diff --git a/optimade/filtertransformers/mongo.py b/optimade/filtertransformers/mongo.py index 862e21f5e..1b9ec8324 100755 --- a/optimade/filtertransformers/mongo.py +++ b/optimade/filtertransformers/mongo.py @@ -7,7 +7,7 @@ import copy import itertools import warnings -from typing import Any, Dict, List, Union +from typing import Any, Union from lark import Token, v_args @@ -56,7 +56,7 @@ class MongoTransformer(BaseTransformer): "$nin": "$in", } - def postprocess(self, query: Dict[str, Any]): + def postprocess(self, query: dict[str, Any]): """Used to post-process the nested dictionary of the parsed query.""" query = self._apply_relationship_filtering(query) query = self._apply_length_operators(query) @@ -229,7 +229,7 @@ def property_zip_addon(self, arg): # property_zip_addon: ":" property (":" property)* raise NotImplementedError("Correlated list queries are not supported.") - def _recursive_expression_phrase(self, arg: List) -> Dict[str, Any]: + def _recursive_expression_phrase(self, arg: list) -> dict[str, Any]: """Helper function for parsing `expression_phrase`. Recursively sorts out the correct precedence for `$not`, `$and` and `$or`. @@ -242,7 +242,7 @@ def _recursive_expression_phrase(self, arg: List) -> Dict[str, Any]: """ - def handle_not_and(arg: Dict[str, List]) -> Dict[str, List]: + def handle_not_and(arg: dict[str, list]) -> dict[str, list]: """Handle the case of `~(A & B) -> (~A | ~B)`. We have to check for the special case in which the "and" was created @@ -271,7 +271,7 @@ def handle_not_and(arg: Dict[str, List]) -> Dict[str, List]: ] } - def handle_not_or(arg: Dict[str, List]) -> Dict[str, List]: + def handle_not_or(arg: dict[str, list]) -> dict[str, list]: """Handle the case of ~(A | B) -> (~A & ~B). !!! note @@ -568,7 +568,7 @@ def replace_str_date_with_datetime(subdict, prop, expr): ) -def recursive_postprocessing(filter_: Union[Dict, List], condition, replacement): +def recursive_postprocessing(filter_: Union[dict, list], condition, replacement): """Recursively descend into the query, checking each dictionary (contained in a list, or as an entry in another dictionary) for the condition passed. If the condition is true, apply the diff --git a/optimade/models/__init__.py b/optimade/models/__init__.py index 018560412..af5814317 100644 --- a/optimade/models/__init__.py +++ b/optimade/models/__init__.py @@ -5,6 +5,7 @@ from .jsonapi import * # noqa: F403 from .links import * # noqa: F403 from .optimade_json import * # noqa: F403 +from .partial_data import * # noqa: F403 from .references import * # noqa: F403 from .responses import * # noqa: F403 from .structures import * # noqa: F403 @@ -18,6 +19,7 @@ + index_metadb.__all__ # type: ignore[name-defined] # noqa: F405 + links.__all__ # type: ignore[name-defined] # noqa: F405 + optimade_json.__all__ # type: ignore[name-defined] # noqa: F405 + + partial_data.__all__ # type: ignore[name-defined] # noqa: F405 + references.__all__ # type: ignore[name-defined] # noqa: F405 + responses.__all__ # type: ignore[name-defined] # noqa: F405 + structures.__all__ # type: ignore[name-defined] # noqa: F405 diff --git a/optimade/models/baseinfo.py b/optimade/models/baseinfo.py index 3374b6a02..36c47695e 100644 --- a/optimade/models/baseinfo.py +++ b/optimade/models/baseinfo.py @@ -1,6 +1,6 @@ # pylint: disable=no-self-argument,no-name-in-module import re -from typing import Dict, List, Optional +from typing import Optional from pydantic import AnyHttpUrl, BaseModel, Field, root_validator, validator @@ -64,18 +64,18 @@ class BaseInfoAttributes(BaseModel): The version number string MUST NOT be prefixed by, e.g., "v". Examples: `1.0.0`, `1.0.0-rc.2`.""", ) - available_api_versions: List[AvailableApiVersion] = StrictField( + available_api_versions: list[AvailableApiVersion] = StrictField( ..., description="A list of dictionaries of available API versions at other base URLs", ) - formats: List[str] = StrictField( + formats: list[str] = StrictField( default=["json"], description="List of available output formats." ) - available_endpoints: List[str] = StrictField( + available_endpoints: list[str] = StrictField( ..., description="List of available endpoints (i.e., the string to be appended to the versioned base URL).", ) - entry_types_by_format: Dict[str, List[str]] = StrictField( + entry_types_by_format: dict[str, list[str]] = StrictField( ..., description="Available entry endpoints as a function of output formats." ) is_index: Optional[bool] = StrictField( diff --git a/optimade/models/entries.py b/optimade/models/entries.py index 7850565ea..169a118d2 100644 --- a/optimade/models/entries.py +++ b/optimade/models/entries.py @@ -1,10 +1,10 @@ # pylint: disable=line-too-long,no-self-argument from datetime import datetime -from typing import Dict, List, Optional +from typing import Optional -from pydantic import BaseModel, validator # pylint: disable=no-name-in-module +from pydantic import AnyUrl, BaseModel, root_validator, validator -from optimade.models.jsonapi import Attributes, Relationships, Resource +from optimade.models.jsonapi import Attributes, Meta, Relationships, Resource from optimade.models.optimade_json import DataType, Relationship from optimade.models.utils import OptimadeField, StrictField, SupportLevel @@ -100,6 +100,77 @@ def cast_immutable_id_to_str(cls, value): return value +class PartialDataLink(BaseModel): + link: AnyUrl = OptimadeField( + ..., + description="String. A JSON API link that points to a location from which the omitted data can be fetched. There is no requirement on the syntax or format for the link URL.", + ) + format: str = OptimadeField( + ..., + description='String. The name of the format provided via this link. For one of the objects this format field SHOULD have the value "jsonlines", which refers to the format in OPTIMADE JSON lines partial data format.', + ) + + @validator("format") + def check_if_format_is_supported(cls, value): + from optimade.server.config import CONFIG + + if value not in [form.value for form in CONFIG.partial_data_formats]: + raise ValueError( + f"The format {value} is not one of the enabled_formats{CONFIG.partial_data_formats}." + ) + return value + + +class EntryMetadata(Meta): + """Contains the metadata for the attributes of an entry""" + + property_metadata: dict = StrictField( + None, + description="""An object containing per-entry and per-property metadata. The keys are the names of the fields in attributes for which metadata is available. The values belonging to these keys are dictionaries containing the relevant metadata fields. See also [Metadata properties](https://github.com/Materials-Consortia/OPTIMADE/blob/develop/optimade.rst#metadata-properties)""", + ) + + partial_data_links: dict[str, list[PartialDataLink]] = StrictField( + None, + description="""A dictionary, where the keys are the names of the properties in the attributes field for which the value is too large to be shared by default. + For each property one or more links are provided from which the value of the attribute can be retrieved.""", + ) + + @validator("property_metadata") + def check_property_metadata_subfields(cls, property_metadata): + from optimade.server.mappers.entries import ( + BaseResourceMapper, + ) + + if property_metadata: + for field in property_metadata: + if attribute_meta_dict := property_metadata.get(field): + for subfield in attribute_meta_dict: + BaseResourceMapper.check_starts_with_supported_prefix( + subfield, + "Currently no OPTIMADE fields have been defined for the per attribute metadata, thus only database and domain specific fields are allowed", + ) + return property_metadata + + @validator("partial_data_links") + def check_partial_data_links_subfields(cls, partial_data_links): + from optimade.server.mappers.entries import ( + BaseResourceMapper, + ) + + if partial_data_links: + for field in partial_data_links: + if attribute_partial_data_link := partial_data_links.get(field): + for subdict in attribute_partial_data_link: + for subfield in subdict.__dict__: + if subfield in ("link", "format"): + continue + BaseResourceMapper.check_starts_with_supported_prefix( + subfield, + "The only OPTIMADE fields defined under the 'partial_data_links' field are 'format'and ĺinks' all other database and domain specific fields must have a database/domain specific prefix.", + ) + return partial_data_links + + class EntryResource(Resource): """The base model for an entry resource.""" @@ -147,12 +218,59 @@ class EntryResource(Resource): Database-provider-specific properties need to include the database-provider-specific prefix (see section on Database-Provider-Specific Namespace Prefixes).""", ) + meta: Optional[EntryMetadata] = StrictField( + None, + description="""A [JSON API meta object](https://jsonapi.org/format/1.1/#document-meta) that is used to communicate metadata.""", + ) + relationships: Optional[EntryRelationships] = StrictField( None, description="""A dictionary containing references to other entries according to the description in section Relationships encoded as [JSON API Relationships](https://jsonapi.org/format/1.0/#document-resource-object-relationships). The OPTIONAL human-readable description of the relationship MAY be provided in the `description` field inside the `meta` dictionary of the JSON API resource identifier object.""", ) + @root_validator(pre=True) + def check_meta(cls, values): + """Validator to check whether meta field has been formatted correctly.""" + from optimade.server.mappers.entries import ( + BaseResourceMapper, + ) + + meta = values.get("meta") + if not meta: + return values + + # todo the code for property_metadata and partial_data_links is very similar so it should be possible to reduce the size of the code here. + if property_metadata := meta.pop("property_metadata", None): + # check that all the fields under property metadata are in attributes + attributes = values.get("attributes", {}) + for subfield in property_metadata: + if subfield not in attributes: + raise ValueError( + f"The keys under the field `property_metadata` need to match with the field names in attributes. The field {subfield} is however not in attributes." + ) + + if partial_data_links := meta.pop("partial_data_links", None): + # check that all the fields under property metadata are in attributes + attributes = values.get("attributes", {}) + for subfield in partial_data_links: + if subfield not in attributes: + raise ValueError( + f"The keys under the field `partial_data_links` need to match with the field names in attributes. The field {subfield} is however not in attributes." + ) + + # At this point I am getting ahead of the specification. There is the intention to allow database specific fields(with the database specific prefixes) here in line with the JSON API specification, but it has not been decided yet how this case should be handled in the property definitions. + for field in meta: + BaseResourceMapper.check_starts_with_supported_prefix( + field, + 'Currently no OPTIMADE fields other than "property_metadata" have been defined for the per entry "meta" field, thus only database and domain specific fields are allowed.', + ) + + values["meta"]["property_metadata"] = property_metadata + values["meta"]["partial_data_links"] = partial_data_links + + return values + class EntryInfoProperty(BaseModel): description: str = StrictField( @@ -184,18 +302,18 @@ class EntryInfoProperty(BaseModel): class EntryInfoResource(BaseModel): - formats: List[str] = StrictField( + formats: list[str] = StrictField( ..., description="List of output formats available for this type of entry." ) description: str = StrictField(..., description="Description of the entry.") - properties: Dict[str, EntryInfoProperty] = StrictField( + properties: dict[str, EntryInfoProperty] = StrictField( ..., description="A dictionary describing queryable properties for this entry type, where each key is a property name.", ) - output_fields_by_format: Dict[str, List[str]] = StrictField( + output_fields_by_format: dict[str, list[str]] = StrictField( ..., description="Dictionary of available output fields for this entry type, where the keys are the values of the `formats` list and the values are the keys of the `properties` dictionary.", ) diff --git a/optimade/models/index_metadb.py b/optimade/models/index_metadb.py index 7c48d666c..e2130bf9a 100644 --- a/optimade/models/index_metadb.py +++ b/optimade/models/index_metadb.py @@ -1,6 +1,6 @@ # pylint: disable=no-self-argument from enum import Enum -from typing import Dict, Union +from typing import Union from pydantic import BaseModel, Field # pylint: disable=no-name-in-module @@ -52,7 +52,7 @@ class IndexInfoResource(BaseInfoResource): attributes: IndexInfoAttributes = Field(...) relationships: Union[ - None, Dict[DefaultRelationship, IndexRelationship] + None, dict[DefaultRelationship, IndexRelationship] ] = StrictField( # type: ignore[assignment] ..., title="Relationships", diff --git a/optimade/models/jsonapi.py b/optimade/models/jsonapi.py index 23e0db241..a9022dc3b 100644 --- a/optimade/models/jsonapi.py +++ b/optimade/models/jsonapi.py @@ -1,7 +1,7 @@ """This module should reproduce JSON API v1.0 https://jsonapi.org/format/1.0/""" # pylint: disable=no-self-argument from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Optional, Union from pydantic import ( # pylint: disable=no-name-in-module AnyUrl, @@ -168,7 +168,7 @@ class BaseResource(BaseModel): class Config: @staticmethod - def schema_extra(schema: Dict[str, Any], model: Type["BaseResource"]) -> None: + def schema_extra(schema: dict[str, Any], model: type["BaseResource"]) -> None: """Ensure `id` and `type` are the first two entries in the list required properties. Note: @@ -227,7 +227,7 @@ class Relationship(BaseModel): None, description="a links object containing at least one of the following: self, related", ) - data: Optional[Union[BaseResource, List[BaseResource]]] = StrictField( + data: Optional[Union[BaseResource, list[BaseResource]]] = StrictField( None, description="Resource linkage" ) meta: Optional[Meta] = StrictField( @@ -323,17 +323,17 @@ class Resource(BaseResource): class Response(BaseModel): """A top-level response""" - data: Optional[Union[None, Resource, List[Resource]]] = StrictField( + data: Optional[Union[None, Resource, list[Resource]]] = StrictField( None, description="Outputted Data", uniqueItems=True ) meta: Optional[Meta] = StrictField( None, description="A meta object containing non-standard information related to the Success", ) - errors: Optional[List[Error]] = StrictField( + errors: Optional[list[Error]] = StrictField( None, description="A list of unique errors", uniqueItems=True ) - included: Optional[List[Resource]] = StrictField( + included: Optional[list[Resource]] = StrictField( None, description="A list of unique included resources", uniqueItems=True ) links: Optional[ToplevelLinks] = StrictField( diff --git a/optimade/models/optimade_json.py b/optimade/models/optimade_json.py index bad738057..2af6ab53a 100644 --- a/optimade/models/optimade_json.py +++ b/optimade/models/optimade_json.py @@ -2,7 +2,7 @@ # pylint: disable=no-self-argument,no-name-in-module from datetime import datetime from enum import Enum -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Optional, Union from pydantic import AnyHttpUrl, AnyUrl, BaseModel, EmailStr, root_validator @@ -43,7 +43,7 @@ class DataType(Enum): @classmethod def get_values(cls): """Get OPTIMADE data types (enum values) as a (sorted) list""" - return sorted((_.value for _ in cls)) + return sorted(_.value for _ in cls) @classmethod def from_python_type(cls, python_type: Union[type, str, object]): @@ -156,7 +156,7 @@ def status_must_not_be_specified(cls, values): class Config: @staticmethod - def schema_extra(schema: Dict[str, Any], model: Type["Warnings"]) -> None: + def schema_extra(schema: dict[str, Any], model: type["Warnings"]) -> None: """Update OpenAPI JSON schema model for `Warning`. * Ensure `type` is in the list required properties and in the correct place. @@ -317,7 +317,7 @@ class ResponseMeta(jsonapi.Meta): None, description="a dictionary describing the server implementation" ) - warnings: Optional[List[Warnings]] = StrictField( + warnings: Optional[list[Warnings]] = StrictField( None, description="""A list of warning resource objects representing non-critical errors or warnings. A warning resource object is defined similarly to a [JSON API error object](http://jsonapi.org/format/1.0/#error-objects), but MUST also include the field `type`, which MUST have the value `"warning"`. @@ -372,5 +372,5 @@ class Relationship(jsonapi.Relationship): """Similar to normal JSON API relationship, but with addition of OPTIONAL meta field for a resource.""" data: Optional[ - Union[BaseRelationshipResource, List[BaseRelationshipResource]] + Union[BaseRelationshipResource, list[BaseRelationshipResource]] ] = StrictField(None, description="Resource linkage", uniqueItems=True) diff --git a/optimade/models/partial_data.py b/optimade/models/partial_data.py new file mode 100644 index 000000000..8630545ff --- /dev/null +++ b/optimade/models/partial_data.py @@ -0,0 +1,182 @@ +from typing import Literal, Optional + +from pydantic import BaseModel + +from optimade.models.entries import EntryResource +from optimade.models.utils import OptimadeField, StrictField, SupportLevel + +__all__ = ("PartialDataHeader", "PartialDataResource", "LinksObject") + + +class LinksObject(BaseModel): + base_url: Optional[str] = OptimadeField( + None, + description="""The base URL of the implementation serving the database to which this property belongs.""", + ) + item_describedby: Optional[ + str + ] = OptimadeField( # The term describedby is used in the json Api, therefore we do not place an underscore between described and by. + None, + description="""A URL to an external JSON Schema that validates the data lines of the response. + The format and requirements on this schema are the same as for the inline schema field :field:`item_schema`. +The format of data lines of the response (i.e., all lines except the first and the last) depends on whether the header object specifies the layout as :val:`"dense"` or :val:`"sparse"`. +""", + ) + + +class PartialDataInfo(BaseModel): + version: str = OptimadeField( + ..., + description="""Specifies the minor version of the partial data format used. + The string MUST be of the format "MAJOR.MINOR", referring to the version of the OPTIMADE standard that describes the format. + The version number string MUST NOT be prefixed by, e.g., "v". In implementations of the present version of the standard, the value MUST be exactly :val:`1.2`. + A client MUST NOT expect to be able to parse the :field:`format` value if the field is not a string of the format MAJOR.MINOR or if the MAJOR version number is unrecognized.""", + ) + + +class PartialDataHeader(BaseModel): + optimade_partial_data: PartialDataInfo = OptimadeField( + ..., + description="""An object identifying the response as being on OPTIMADE partial data format. +It MUST contain the following key: +"version": String. Specifies the minor version of the partial data format used. The string MUST be of the format "MAJOR.MINOR", referring to the version of the OPTIMADE standard that describes the format. The version number string MUST NOT be prefixed by, e.g., "v". In implementations of the present version of the standard, the value MUST be exactly 1.2. A client MUST NOT expect to be able to parse the version value if the field is not a string of the format MAJOR.MINOR or if the MAJOR version number is unrecognized. + +- **Type**: Dictionary. + +- **Requirements/Conventions**: + - **Support**: MUST be supported by all implementations, MUST NOT be `null`. + +- **Examples**: + - `""optimade-partial-data": {"version": "1.2.0"}"`""", + support=SupportLevel.MUST, + ) + layout: Literal["dense", "sparse"] = OptimadeField( + ..., + description="""A string either equal to "dense" or "sparse" to indicate whether the returned format uses a dense or sparse layout. + +- **Type**: string. + +- **Requirements/Conventions**: + - **Support**: MUST be supported by all implementations, MUST NOT be `null`. + +- **Examples**: + - `"dense"` + - `"sparse"`""", + support=SupportLevel.MUST, + ) + returned_ranges: Optional[list[dict]] = OptimadeField( + None, + description="""Array of Objects. For dense layout, and sparse layout of one dimensional list properties, the array contains a single element which is a slice object representing the range of data present in the response. In the specific case of a hierarchy of list properties represented as a sparse multi-dimensional array, if the field "returned_ranges" is given, it MUST contain one slice object per dimension of the multi-dimensional array, representing slices for each dimension that cover the data given in the response. + +- **Type**: List of Dictionaries. + +- **Requirements/Conventions**: + - **Support**: SHOULD be supported by all implementations, SHOULD NOT be `null`. + +- **Examples**: + - `""returned_ranges": [{"start": 10, "stop": 20, "step": 2}]"` + - `""returned_ranges": [{"start": 10, "stop": 20, "step": 2}, {"start": 0, "stop": 9, "step": 1}]"`""", + support=SupportLevel.SHOULD, + ) + property_name: Optional[str] = OptimadeField( + None, + description="""The name of the property being provided. + +- **Type**: string. + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.. + +- **Examples**: + - `"cartesian_site_positions"`""", + support=SupportLevel.OPTIONAL, + ) + entry: Optional[dict] = OptimadeField( + None, + description=""" Object. An object that MUST have the following two keys: + + "id": String. The id of the entry of the property being provided. + "type": String. The type of the entry of the property being provided. + + +- **Type**: string. + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.. + +- **Examples**: + - `"{"id": "mpf_72", "type": structure"}`""", + support=SupportLevel.OPTIONAL, + ) + has_references: Optional[bool] = OptimadeField( + None, + description=""" An optional boolean to indicate whether any of the data lines in the response contains a reference marker. A value of false means that the client does not have to process any of the lines to detect reference markers, which may speed up the parsing. + +- **Type**: boolean. + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.. + +- **Examples**: + - `false`""", + support=SupportLevel.OPTIONAL, + ) + item_schema: Optional[dict] = OptimadeField( + None, + description="""An object that represents a JSON Schema that validates the data lines of the response. The format SHOULD be the relevant partial extract of a valid property definition as described in Property Definitions. If a schema is provided, it MUST be a valid JSON schema using the same version of JSON schema as described in that section. +- **Type**: dictionary. + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.. +""", + support=SupportLevel.OPTIONAL, + ) + + links: Optional[LinksObject] = OptimadeField( + None, + description=""" An object to provide relevant links for the property being provided. It MAY contain the following key: + + "base_url": String. The base URL of the implementation serving the database to which this property belongs. + "item_describedby": String. A URL to an external JSON Schema that validates the data lines of the response. The format and requirements on this schema are the same as for the inline schema field item_schema. + +- **Type**: dictionary. + +- **Requirements/Conventions**: + - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.. +""", + support=SupportLevel.OPTIONAL, + ) + parent_id: Optional[dict] = OptimadeField( + None, + description="""The id of the entry to which this partial data belongs. +""", + support=SupportLevel.OPTIONAL, + ) + + +class PartialDataFormat(BaseModel): + header: PartialDataHeader + data: list + + +class PartialDataResource(EntryResource): + type: str = StrictField( + "partial_data", + description="""The name of the type of an entry. + - **Type**: string. + + - **Requirements/Conventions**: + - **Support**: MUST be supported by all implementations, MUST NOT be `null`. + - **Query**: MUST be a queryable property with support for all mandatory filter features. + - **Response**: REQUIRED in the response. + - MUST be an existing entry type. + - The entry of type `` and ID `` MUST be returned in response to a request for `//` under the versioned base URL. + + - **Examples**: + - `"structures"`""", + regex="^structures$", + support=SupportLevel.MUST, + queryable=SupportLevel.MUST, + ) + + attributes: PartialDataHeader # Todo make a better model for json response diff --git a/optimade/models/references.py b/optimade/models/references.py index afdd2f48f..bde2d538a 100644 --- a/optimade/models/references.py +++ b/optimade/models/references.py @@ -1,5 +1,5 @@ # pylint: disable=line-too-long,no-self-argument -from typing import List, Optional +from typing import Optional from pydantic import AnyUrl, BaseModel, validator # pylint: disable=no-name-in-module @@ -42,14 +42,14 @@ class ReferenceResourceAttributes(EntryResourceAttributes): """ - authors: Optional[List[Person]] = OptimadeField( + authors: Optional[list[Person]] = OptimadeField( None, description="List of person objects containing the authors of the reference.", support=SupportLevel.OPTIONAL, queryable=SupportLevel.OPTIONAL, ) - editors: Optional[List[Person]] = OptimadeField( + editors: Optional[list[Person]] = OptimadeField( None, description="List of person objects containing the editors of the reference.", support=SupportLevel.OPTIONAL, diff --git a/optimade/models/responses.py b/optimade/models/responses.py index 01845dc82..1725f778f 100644 --- a/optimade/models/responses.py +++ b/optimade/models/responses.py @@ -1,5 +1,5 @@ # pylint: disable=no-self-argument -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from pydantic import Field, root_validator @@ -9,6 +9,7 @@ from optimade.models.jsonapi import Response from optimade.models.links import LinksResource from optimade.models.optimade_json import OptimadeError, ResponseMeta, Success +from optimade.models.partial_data import PartialDataResource from optimade.models.references import ReferenceResource from optimade.models.structures import StructureResource from optimade.models.utils import StrictField @@ -25,6 +26,7 @@ "StructureResponseMany", "ReferenceResponseOne", "ReferenceResponseMany", + "PartialDataResponse", ) @@ -34,7 +36,7 @@ class ErrorResponse(Response): meta: ResponseMeta = StrictField( ..., description="A meta object containing non-standard information." ) - errors: List[OptimadeError] = StrictField( + errors: list[OptimadeError] = StrictField( ..., description="A list of OPTIMADE-specific JSON API error objects, where the field detail MUST be present.", uniqueItems=True, @@ -66,23 +68,23 @@ class InfoResponse(Success): class EntryResponseOne(Success): - data: Union[EntryResource, Dict[str, Any], None] = Field(...) # type: ignore[assignment] - included: Optional[Union[List[EntryResource], List[Dict[str, Any]]]] = Field( # type: ignore[assignment] + data: Union[EntryResource, dict[str, Any], None] = Field(...) # type: ignore[assignment] + included: Optional[Union[list[EntryResource], list[dict[str, Any]]]] = Field( # type: ignore[assignment] None, uniqueItems=True ) class EntryResponseMany(Success): - data: Union[List[EntryResource], List[Dict[str, Any]]] = Field( # type: ignore[assignment] + data: Union[list[EntryResource], list[dict[str, Any]]] = Field( # type: ignore[assignment] ..., uniqueItems=True ) - included: Optional[Union[List[EntryResource], List[Dict[str, Any]]]] = Field( # type: ignore[assignment] + included: Optional[Union[list[EntryResource], list[dict[str, Any]]]] = Field( # type: ignore[assignment] None, uniqueItems=True ) class LinksResponse(EntryResponseMany): - data: Union[List[LinksResource], List[Dict[str, Any]]] = StrictField( + data: Union[list[LinksResource], list[dict[str, Any]]] = StrictField( ..., description="List of unique OPTIMADE links resource objects.", uniqueItems=True, @@ -90,13 +92,13 @@ class LinksResponse(EntryResponseMany): class StructureResponseOne(EntryResponseOne): - data: Union[StructureResource, Dict[str, Any], None] = StrictField( + data: Union[StructureResource, dict[str, Any], None] = StrictField( ..., description="A single structures entry resource." ) class StructureResponseMany(EntryResponseMany): - data: Union[List[StructureResource], List[Dict[str, Any]]] = StrictField( + data: Union[list[StructureResource], list[dict[str, Any]]] = StrictField( ..., description="List of unique OPTIMADE structures entry resource objects.", uniqueItems=True, @@ -104,14 +106,20 @@ class StructureResponseMany(EntryResponseMany): class ReferenceResponseOne(EntryResponseOne): - data: Union[ReferenceResource, Dict[str, Any], None] = StrictField( + data: Union[ReferenceResource, dict[str, Any], None] = StrictField( ..., description="A single references entry resource." ) class ReferenceResponseMany(EntryResponseMany): - data: Union[List[ReferenceResource], List[Dict[str, Any]]] = StrictField( + data: Union[list[ReferenceResource], list[dict[str, Any]]] = StrictField( ..., description="List of unique OPTIMADE references entry resource objects.", uniqueItems=True, ) + + +class PartialDataResponse(EntryResponseOne): + data: Union[PartialDataResource, dict[str, Any], None] = StrictField( + ..., description="(Part of) the data for a single property of an entry." + ) diff --git a/optimade/models/structures.py b/optimade/models/structures.py index aa89afe69..565c02362 100644 --- a/optimade/models/structures.py +++ b/optimade/models/structures.py @@ -2,7 +2,7 @@ import re import warnings from enum import Enum, IntEnum -from typing import List, Optional, Union +from typing import Optional, Union from pydantic import BaseModel, conlist, root_validator, validator @@ -84,7 +84,7 @@ class Species(BaseModel): queryable=SupportLevel.OPTIONAL, ) - chemical_symbols: List[str] = OptimadeField( + chemical_symbols: list[str] = OptimadeField( ..., description="""MUST be a list of strings of all chemical elements composing this species. Each item of the list MUST be one of the following: @@ -97,7 +97,7 @@ class Species(BaseModel): queryable=SupportLevel.OPTIONAL, ) - concentration: List[float] = OptimadeField( + concentration: list[float] = OptimadeField( ..., description="""MUST be a list of floats, with same length as `chemical_symbols`. The numbers represent the relative concentration of the corresponding chemical symbol in this species. The numbers SHOULD sum to one. Cases in which the numbers do not sum to one typically fall only in the following two categories: @@ -109,7 +109,7 @@ class Species(BaseModel): queryable=SupportLevel.OPTIONAL, ) - mass: Optional[List[float]] = OptimadeField( + mass: Optional[list[float]] = OptimadeField( None, description="""If present MUST be a list of floats expressed in a.m.u. Elements denoting vacancies MUST have masses equal to 0.""", @@ -127,14 +127,14 @@ class Species(BaseModel): queryable=SupportLevel.OPTIONAL, ) - attached: Optional[List[str]] = OptimadeField( + attached: Optional[list[str]] = OptimadeField( None, description="""If provided MUST be a list of length 1 or more of strings of chemical symbols for the elements attached to this site, or "X" for a non-chemical element.""", support=SupportLevel.OPTIONAL, queryable=SupportLevel.OPTIONAL, ) - nattached: Optional[List[int]] = OptimadeField( + nattached: Optional[list[int]] = OptimadeField( None, description="""If provided MUST be a list of length 1 or more of integers indicating the number of attached atoms of the kind specified in the value of the :field:`attached` key.""", support=SupportLevel.OPTIONAL, @@ -210,7 +210,7 @@ class Assembly(BaseModel): """ - sites_in_groups: List[List[int]] = OptimadeField( + sites_in_groups: list[list[int]] = OptimadeField( ..., description="""Index of the sites (0-based) that belong to each group for each assembly. @@ -221,7 +221,7 @@ class Assembly(BaseModel): queryable=SupportLevel.OPTIONAL, ) - group_probabilities: List[float] = OptimadeField( + group_probabilities: list[float] = OptimadeField( ..., description="""Statistical probability of each group. It MUST have the same length as `sites_in_groups`. It SHOULD sum to one. @@ -263,7 +263,7 @@ def check_self_consistency(cls, v, values): class StructureResourceAttributes(EntryResourceAttributes): """This class contains the Field for the attributes used to represent a structure, e.g. unit cell, atoms, positions.""" - elements: Optional[List[str]] = OptimadeField( + elements: Optional[list[str]] = OptimadeField( ..., description="""The chemical symbols of the different elements present in the structure. @@ -311,7 +311,7 @@ class StructureResourceAttributes(EntryResourceAttributes): queryable=SupportLevel.MUST, ) - elements_ratios: Optional[List[float]] = OptimadeField( + elements_ratios: Optional[list[float]] = OptimadeField( ..., description="""Relative proportions of different elements in the structure. @@ -523,7 +523,7 @@ class StructureResourceAttributes(EntryResourceAttributes): queryable=SupportLevel.OPTIONAL, ) - cartesian_site_positions: Optional[List[Vector3D]] = OptimadeField( # type: ignore[valid-type] + cartesian_site_positions: Optional[list[Vector3D]] = OptimadeField( # type: ignore[valid-type] ..., description="""Cartesian positions of each site in the structure. A site is usually used to describe positions of atoms; what atoms can be encountered at a given site is conveyed by the `species_at_sites` property, and the species themselves are described in the `species` property. @@ -564,7 +564,7 @@ class StructureResourceAttributes(EntryResourceAttributes): support=SupportLevel.SHOULD, ) - species: Optional[List[Species]] = OptimadeField( + species: Optional[list[Species]] = OptimadeField( ..., description="""A list describing the species of the sites of this structure. Species can represent pure chemical elements, virtual-crystal atoms representing a statistical occupation of a given site by multiple chemical elements, and/or a location to which there are attached atoms, i.e., atoms whose precise location are unknown beyond that they are attached to that position (frequently used to indicate hydrogen atoms attached to another element, e.g., a carbon with three attached hydrogens might represent a methyl group, -CH3). @@ -633,7 +633,7 @@ class StructureResourceAttributes(EntryResourceAttributes): queryable=SupportLevel.OPTIONAL, ) - species_at_sites: Optional[List[str]] = OptimadeField( + species_at_sites: Optional[list[str]] = OptimadeField( ..., description="""Name of the species at each site (where values for sites are specified with the same order of the property `cartesian_site_positions`). The properties of the species are found in the property `species`. @@ -657,7 +657,7 @@ class StructureResourceAttributes(EntryResourceAttributes): queryable=SupportLevel.OPTIONAL, ) - assemblies: Optional[List[Assembly]] = OptimadeField( + assemblies: Optional[list[Assembly]] = OptimadeField( None, description="""A description of groups of sites that are statistically correlated. @@ -765,7 +765,7 @@ class StructureResourceAttributes(EntryResourceAttributes): queryable=SupportLevel.OPTIONAL, ) - structure_features: List[StructureFeatures] = OptimadeField( + structure_features: list[StructureFeatures] = OptimadeField( ..., title="Structure Features", description="""A list of strings that flag which special features are used by the structure. @@ -961,7 +961,7 @@ def null_values_for_whole_vector(cls, v): return v for vector in v: - if None in vector and any((isinstance(_, float) for _ in vector)): + if None in vector and any(isinstance(_, float) for _ in vector): raise ValueError( f"A lattice vector MUST be either all `null` or all numbers (vector: {vector}, all vectors: {v})" ) @@ -1020,7 +1020,7 @@ def validate_species(cls, v): @validator("structure_features", always=True) def validate_structure_features(cls, v, values): - if [StructureFeatures(value) for value in sorted((_.value for _ in v))] != v: + if [StructureFeatures(value) for value in sorted(_.value for _ in v)] != v: raise ValueError( f"structure_features MUST be sorted alphabetically, given value: {v}" ) diff --git a/optimade/models/utils.py b/optimade/models/utils.py index 7e04bac07..99a0c04c6 100644 --- a/optimade/models/utils.py +++ b/optimade/models/utils.py @@ -5,7 +5,7 @@ import warnings from enum import Enum from functools import reduce -from typing import TYPE_CHECKING, List, Optional, Union +from typing import TYPE_CHECKING, Optional, Union from pydantic import Field from pydantic.fields import FieldInfo @@ -237,7 +237,7 @@ def _reduce_or_anonymize_formula( import re import sys - numbers: List[int] = [ + numbers: list[int] = [ int(n.strip() or 1) for n in re.split(r"[A-Z][a-z]*", formula)[1:] ] # Need to remove leading 1 from split and convert to ints diff --git a/optimade/server/config.py b/optimade/server/config.py index 3a3e7cea7..da1e6ffe7 100644 --- a/optimade/server/config.py +++ b/optimade/server/config.py @@ -2,7 +2,7 @@ import warnings from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Union from pydantic import ( # pylint: disable=no-name-in-module AnyHttpUrl, @@ -14,7 +14,7 @@ from pydantic.env_settings import SettingsSourceCallable from optimade import __api_version__, __version__ -from optimade.models import Implementation, Provider +from optimade.models import Implementation, Provider # type: ignore[attr-defined] DEFAULT_CONFIG_FILE_PATH: str = str(Path.home().joinpath(".optimade.json")) """Default configuration file path. @@ -67,7 +67,19 @@ class SupportedBackend(Enum): MONGOMOCK = "mongomock" -def config_file_settings(settings: BaseSettings) -> Dict[str, Any]: +class SupportedResponseFormats(Enum): + """Enumeration of supported response formats. + + - 'JSON': [JSON](https://www.json.org/json-en.html) + - `JSONL`: [JSONL](https://jsonlines.org/) + + """ + + JSON = "json" + JSONL = "jsonlines" + + +def config_file_settings(settings: BaseSettings) -> dict[str, Any]: """Configuration file settings source. Based on the example in the @@ -87,8 +99,6 @@ def config_file_settings(settings: BaseSettings) -> Dict[str, Any]: import json import os - import yaml - encoding = settings.__config__.env_file_encoding config_file = Path(os.getenv("OPTIMADE_CONFIG_FILE", DEFAULT_CONFIG_FILE_PATH)) @@ -100,6 +110,8 @@ def config_file_settings(settings: BaseSettings) -> Dict[str, Any]: res = json.loads(config_file_content) except json.JSONDecodeError as json_exc: try: + import yaml + # This can essentially also load JSON files, as JSON is a subset of YAML v1, # but I suspect it is not as rigorous res = yaml.safe_load(config_file_content) @@ -153,7 +165,7 @@ class ServerConfig(BaseSettings): description="Which database backend to use out of the supported backends.", ) - elastic_hosts: Optional[Union[str, List[str], Dict, List[Dict]]] = Field( + elastic_hosts: Optional[Union[str, list[str], dict, list[dict]]] = Field( None, description="Host settings to pass through to the `Elasticsearch` class." ) @@ -178,6 +190,10 @@ class ServerConfig(BaseSettings): "structures", description="Mongo collection name for /structures endpoint resources", ) + partial_data_collection: str = Field( + "fs", + description="Mongo Grid FS system containing the data that needs to be returned via the partial data mechanism.", + ) page_limit: int = Field(20, description="Default number of resources per page") page_limit_max: int = Field( 500, description="Max allowed number of resources per page" @@ -224,9 +240,9 @@ class ServerConfig(BaseSettings): ), description="General information about the provider of this OPTIMADE implementation", ) - provider_fields: Dict[ + provider_fields: dict[ Literal["links", "references", "structures"], - List[Union[str, Dict[Literal["name", "type", "unit", "description"], str]]], + list[Union[str, dict[Literal["name", "type", "unit", "description"], str]]], ] = Field( {}, description=( @@ -234,15 +250,18 @@ class ServerConfig(BaseSettings): "broken down by endpoint." ), ) - aliases: Dict[Literal["links", "references", "structures"], Dict[str, str]] = Field( + supported_prefixes: list[str] = Field( + [], description="A list of all the prefixes that are supported by this server." + ) + aliases: dict[Literal["links", "references", "structures"], dict[str, str]] = Field( {}, description=( "A mapping between field names in the database with their corresponding OPTIMADE field" " names, broken down by endpoint." ), ) - length_aliases: Dict[ - Literal["links", "references", "structures"], Dict[str, str] + length_aliases: dict[ + Literal["links", "references", "structures"], dict[str, str] ] = Field( {}, description=( @@ -308,6 +327,20 @@ class ServerConfig(BaseSettings): description="""If False, data from the database will not undergo validation before being emitted by the API, and only the mapping of aliases will occur.""", ) + partial_data_formats: list[SupportedResponseFormats] = Field( + ["json", "jsonlines"], + description="""A list of the response formats that are supported by this server. Must include the "json" format.""", + ) + max_response_size: dict[SupportedResponseFormats, int] = Field( + {"json": 10, "jsonlines": 10}, + description="""This dictionary contains the approximate maximum size for a trajectory response in megabytes for the different response_formats. The keys indicate the response_format and the values the maximum size.""", + ) + + @validator("supported_prefixes") + def add_own_prefix_to_supported_prefixes(value, values): + if values["provider"].prefix not in value: + value.append(values["provider"].prefix) + return value @validator("implementation", pre=True) def set_implementation_version(cls, v): @@ -354,7 +387,7 @@ def customise_sources( init_settings: SettingsSourceCallable, env_settings: SettingsSourceCallable, file_secret_settings: SettingsSourceCallable, - ) -> Tuple[SettingsSourceCallable, ...]: + ) -> tuple[SettingsSourceCallable, ...]: """ **Priority of config settings sources**: diff --git a/optimade/server/data/__init__.py b/optimade/server/data/__init__.py index 87060d387..b4cee3665 100644 --- a/optimade/server/data/__init__.py +++ b/optimade/server/data/__init__.py @@ -10,6 +10,18 @@ "providers": "providers.json", } +data_files = [ + ( + "mpf_551:cartesian_site_positions.npy", + "numpy", + { + "endpoint": "structures", + "parent_id": "mpf_551", + "property_name": "cartesian_site_positions", + "dim_names": ["dim_sites", "dim_cartesian_dimensions"], + }, + ) +] for var, path in data_paths.items(): try: diff --git a/optimade/server/data/mpf_551:cartesian_site_positions.npy b/optimade/server/data/mpf_551:cartesian_site_positions.npy new file mode 100644 index 000000000..a996c9266 Binary files /dev/null and b/optimade/server/data/mpf_551:cartesian_site_positions.npy differ diff --git a/optimade/server/data/test_structures.json b/optimade/server/data/test_structures.json index 5a91e3b61..7ef98df69 100644 --- a/optimade/server/data/test_structures.json +++ b/optimade/server/data/test_structures.json @@ -3,6 +3,13 @@ "_id": { "$oid": "5cfb441f053b174410700d02" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": "Pure Metals" + } + } + }, "assemblies": null, "chemsys": "Ac", "cartesian_site_positions": [ @@ -80,6 +87,13 @@ "_id": { "$oid": "5cfb441f053b174410700d03" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": "Actinides_Alloys" + } + } + }, "assemblies": null, "chemsys": "Ac-Ag-Ir", "cartesian_site_positions": [ @@ -197,6 +211,13 @@ "_id": { "$oid": "5cfb441f053b174410700d04" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": "Actinides_Alloys" + } + } + }, "assemblies": null, "chemsys": "Ac-Ag-Pb", "cartesian_site_positions": [ @@ -323,6 +344,13 @@ "_id": { "$oid": "5cfb441f053b174410700d18" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": "Actinides_Alloys" + } + } + }, "assemblies": null, "chemsys": "Ac-Mg", "cartesian_site_positions": [ @@ -413,6 +441,13 @@ "_id": { "$oid": "5cfb441f053b174410700d1f" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": null + } + } + }, "assemblies": null, "chemsys": "Ac-O", "cartesian_site_positions": [ @@ -515,6 +550,11 @@ "_id": { "$oid": "5cfb441f053b174410700d6f" }, + "meta": { + "property_metadata": { + "elements_ratios": {} + } + }, "assemblies": null, "chemsys": "Ac-Cu-F-O", "cartesian_site_positions": [ @@ -639,6 +679,13 @@ "_id": { "$oid": "5cfb441f053b174410700dc9" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project": "Pure Metals" + } + } + }, "assemblies": null, "chemsys": "Ag", "cartesian_site_positions": [ @@ -706,6 +753,11 @@ "_id": { "$oid": "5cfb441f053b174410700ddd" }, + "meta": { + "property_metadata": { + "elements_ratios": null + } + }, "assemblies": null, "chemsys": "Ag-Br-Cl-Te", "cartesian_site_positions": [ @@ -896,6 +948,9 @@ "_id": { "$oid": "5cfb441f053b174410700e04" }, + "meta": { + "property_metadata": {} + }, "assemblies": null, "chemsys": "Ag-C-Cl-N-O-S", "cartesian_site_positions": [ @@ -1072,6 +1127,9 @@ "_id": { "$oid": "5cfb441f053b174410700e11" }, + "meta": { + "property_metadata": null + }, "assemblies": null, "chemsys": "Ag-C-Cl-H-N", "cartesian_site_positions": [ @@ -2067,378 +2125,12 @@ }, "assemblies": null, "chemsys": "Ag-B-C-Cl-H-N-O-P", - "cartesian_site_positions": [ - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ], - [ - 0.449480176317956, - 0.449480176317956, - 0.449480176317956 - ] - ], + "cartesian_site_positions": null, + "meta": { + "partial_data_links": { + "cartesian_site_positions": [] + } + }, "dimension_types": [ 1, 1, diff --git a/optimade/server/entry_collections/elasticsearch.py b/optimade/server/entry_collections/elasticsearch.py index d14ad6499..7b0fffc7e 100644 --- a/optimade/server/entry_collections/elasticsearch.py +++ b/optimade/server/entry_collections/elasticsearch.py @@ -1,6 +1,7 @@ import json +from collections.abc import Iterable from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple, Type +from typing import Any, Optional from optimade.filtertransformers.elasticsearch import ElasticTransformer from optimade.models import EntryResource @@ -24,8 +25,8 @@ class ElasticCollection(EntryCollection): def __init__( self, name: str, - resource_cls: Type[EntryResource], - resource_mapper: Type[BaseResourceMapper], + resource_cls: type[EntryResource], + resource_mapper: type[BaseResourceMapper], client: Optional["Elasticsearch"] = None, ): """Initialize the ElasticCollection for the given parameters. @@ -78,7 +79,7 @@ def create_optimade_index(self) -> None: LOGGER.debug(f"Created Elastic index for {self.name!r} with parameters {body}") @property - def predefined_index(self) -> Dict[str, Any]: + def predefined_index(self) -> dict[str, Any]: """Loads and returns the default pre-defined index.""" with open(Path(__file__).parent.joinpath("elastic_indexes.json")) as f: index = json.load(f) @@ -86,8 +87,8 @@ def predefined_index(self) -> Dict[str, Any]: @staticmethod def create_elastic_index_from_mapper( - resource_mapper: Type[BaseResourceMapper], fields: Iterable[str] - ) -> Dict[str, Any]: + resource_mapper: type[BaseResourceMapper], fields: Iterable[str] + ) -> dict[str, Any]: """Create a fallback elastic index based on a resource mapper. Arguments: @@ -110,7 +111,7 @@ def __len__(self): """Returns the total number of entries in the collection.""" return Search(using=self.client, index=self.name).execute().hits.total.value - def insert(self, data: List[EntryResource]) -> None: + def insert(self, data: list[EntryResource]) -> None: """Add the given entries to the underlying database. Warning: @@ -123,7 +124,7 @@ def insert(self, data: List[EntryResource]) -> None: def get_id(item): if self.name == "links": - id_ = "%s-%s" % (item["id"], item["type"]) + id_ = f"{item['id']}-{item['type']}" elif "id" in item: id_ = item["id"] elif "_id" in item: @@ -148,8 +149,8 @@ def get_id(item): ) def _run_db_query( - self, criteria: Dict[str, Any], single_entry=False - ) -> Tuple[List[Dict[str, Any]], int, bool]: + self, criteria: dict[str, Any], single_entry=False + ) -> tuple[list[dict[str, Any]], int, bool]: """Run the query on the backend and collect the results. Arguments: diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index d59bb8713..a6fbf90f4 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -2,16 +2,24 @@ import re import warnings from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union +from collections.abc import Iterable +from typing import Any, Optional, Union from lark import Transformer from optimade.exceptions import BadRequest, Forbidden, NotFound from optimade.filterparser import LarkParser from optimade.models.entries import EntryResource -from optimade.server.config import CONFIG, SupportedBackend -from optimade.server.mappers import BaseResourceMapper -from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams +from optimade.server.config import CONFIG, SupportedBackend, SupportedResponseFormats +from optimade.server.mappers import ( # type: ignore[attr-defined] + BaseResourceMapper, + PartialDataMapper, +) +from optimade.server.query_params import ( + EntryListingQueryParams, + PartialDataQueryParams, + SingleEntryQueryParams, +) from optimade.warnings import ( FieldValueNotRecognized, QueryParamNotUsed, @@ -21,8 +29,8 @@ def create_collection( name: str, - resource_cls: Type[EntryResource], - resource_mapper: Type[BaseResourceMapper], + resource_cls: type[EntryResource], + resource_mapper: type[BaseResourceMapper], ) -> "EntryCollection": """Create an `EntryCollection` of the configured type, depending on the value of `CONFIG.database_backend`. @@ -40,6 +48,17 @@ def create_collection( SupportedBackend.MONGODB, SupportedBackend.MONGOMOCK, ): + from optimade.models import PartialDataResource + + if resource_cls is PartialDataResource: + from optimade.server.entry_collections.mongo import GridFSCollection + + return GridFSCollection( + name=name, + resource_cls=resource_cls, + resource_mapper=resource_mapper, + ) + from optimade.server.entry_collections.mongo import MongoCollection return MongoCollection( @@ -83,8 +102,8 @@ class EntryCollection(ABC): def __init__( self, - resource_cls: Type[EntryResource], - resource_mapper: Type[BaseResourceMapper], + resource_cls: type[EntryResource], + resource_mapper: type[BaseResourceMapper], transformer: Transformer, ): """Initialize the collection for the given parameters. @@ -110,14 +129,14 @@ def __init__( for field in CONFIG.provider_fields.get(resource_mapper.ENDPOINT, []) ] - self._all_fields: Set[str] = set() + self._all_fields: set[str] = set() @abstractmethod def __len__(self) -> int: """Returns the total number of entries in the collection.""" @abstractmethod - def insert(self, data: List[EntryResource]) -> None: + def insert(self, data: list[EntryResource]) -> None: """Add the given entries to the underlying database. Arguments: @@ -136,8 +155,11 @@ def count(self, **kwargs: Any) -> Union[int, None]: """ def find( - self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Tuple[Union[None, Dict, List[Dict]], Optional[int], bool, Set[str], Set[str],]: + self, + params: Union[ + EntryListingQueryParams, SingleEntryQueryParams, PartialDataQueryParams + ], + ) -> tuple[Union[None, dict, list[dict]], Optional[int], bool, set[str], set[str]]: """ Fetches results and indicates if more data is available. @@ -158,7 +180,9 @@ def find( """ criteria = self.handle_query_params(params) - single_entry = isinstance(params, SingleEntryQueryParams) + single_entry = isinstance( + params, (SingleEntryQueryParams, PartialDataQueryParams) + ) response_fields = criteria.pop("fields") raw_results, data_returned, more_data_available = self._run_db_query( @@ -166,40 +190,45 @@ def find( ) exclude_fields = self.all_fields - response_fields - include_fields = ( - response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS - ) - bad_optimade_fields = set() - bad_provider_fields = set() supported_prefixes = self.resource_mapper.SUPPORTED_PREFIXES all_attributes = self.resource_mapper.ALL_ATTRIBUTES - for field in include_fields: - if field not in all_attributes: - if field.startswith("_"): - if any( - field.startswith(f"_{prefix}_") for prefix in supported_prefixes - ): - bad_provider_fields.add(field) - else: - bad_optimade_fields.add(field) - if bad_provider_fields: - warnings.warn( - message=f"Unrecognised field(s) for this provider requested in `response_fields`: {bad_provider_fields}.", - category=UnknownProviderProperty, + if not self.resource_mapper == PartialDataMapper: + include_fields = ( + response_fields - self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS ) + bad_optimade_fields = set() + bad_provider_fields = set() + + for field in include_fields: + if field not in all_attributes: + if field.startswith("_"): + if any( + field.startswith(f"_{prefix}_") + for prefix in supported_prefixes + ): + bad_provider_fields.add(field) + else: + bad_optimade_fields.add(field) + + if bad_provider_fields: + warnings.warn( + message=f"Unrecognised field(s) for this provider requested in `response_fields`: {bad_provider_fields}.", + category=UnknownProviderProperty, + ) - if bad_optimade_fields: - raise BadRequest( - detail=f"Unrecognised OPTIMADE field(s) in requested `response_fields`: {bad_optimade_fields}." - ) + if bad_optimade_fields: + raise BadRequest( + detail=f"Unrecognised OPTIMADE field(s) in requested `response_fields`: {bad_optimade_fields}." + ) + else: + include_fields = set() - results: Union[None, List[Dict], Dict] = None + results: Union[None, list[dict], dict] = None if raw_results: results = [self.resource_mapper.map_back(doc) for doc in raw_results] - if single_entry: results = results[0] # type: ignore[assignment] @@ -224,8 +253,8 @@ def find( @abstractmethod def _run_db_query( - self, criteria: Dict[str, Any], single_entry: bool = False - ) -> Tuple[List[Dict[str, Any]], Optional[int], bool]: + self, criteria: dict[str, Any], single_entry: bool = False + ) -> tuple[list[dict[str, Any]], Optional[int], bool]: """Run the query on the backend and collect the results. Arguments: @@ -239,7 +268,7 @@ def _run_db_query( """ @property - def all_fields(self) -> Set[str]: + def all_fields(self) -> set[str]: """Get the set of all fields handled in this collection, from attribute fields in the schema, provider fields and top-level OPTIMADE fields. @@ -266,7 +295,7 @@ def all_fields(self) -> Set[str]: return self._all_fields - def get_attribute_fields(self) -> Set[str]: + def get_attribute_fields(self) -> set[str]: """Get the set of attribute fields Return only the _first-level_ attribute fields from the schema of the resource class, @@ -297,8 +326,11 @@ def get_attribute_fields(self) -> Set[str]: return set(attributes["properties"].keys()) def handle_query_params( - self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Dict[str, Any]: + self, + params: Union[ + EntryListingQueryParams, SingleEntryQueryParams, PartialDataQueryParams + ], + ) -> dict[str, Any]: """Parse and interpret the backend-agnostic query parameter models into a dictionary that can be used by the specific backend. @@ -329,12 +361,11 @@ def handle_query_params( cursor_kwargs["filter"] = {} # response_format - if ( - getattr(params, "response_format", False) - and params.response_format != "json" + if getattr(params, "response_format", False) and params.response_format not in ( + x.value for x in SupportedResponseFormats ): raise BadRequest( - detail=f"Response format {params.response_format} is not supported, please use response_format='json'" + detail=f"Response format {params.response_format} is not supported, please use one of the supported response formats: {', '.join((x.value for x in SupportedResponseFormats))}" ) # page_limit @@ -404,7 +435,7 @@ def handle_query_params( return cursor_kwargs - def parse_sort_params(self, sort_params: str) -> Iterable[Tuple[str, int]]: + def parse_sort_params(self, sort_params: str) -> Iterable[tuple[str, int]]: """Handles any sort parameters passed to the collection, resolving aliases and dealing with any invalid fields. @@ -416,7 +447,7 @@ def parse_sort_params(self, sort_params: str) -> Iterable[Tuple[str, int]]: sort direction encoded as 1 (ascending) or -1 (descending). """ - sort_spec: List[Tuple[str, int]] = [] + sort_spec: list[tuple[str, int]] = [] for field in sort_params.split(","): sort_dir = 1 if field.startswith("-"): @@ -464,8 +495,8 @@ def parse_sort_params(self, sort_params: str) -> Iterable[Tuple[str, int]]: def get_next_query_params( self, params: EntryListingQueryParams, - results: Union[None, Dict, List[Dict]], - ) -> Dict[str, List[str]]: + results: Union[None, dict, list[dict]], + ) -> dict[str, list[str]]: """Provides url query pagination parameters that will be used in the next link. @@ -477,7 +508,7 @@ def get_next_query_params( A dictionary with the necessary query parameters. """ - query: Dict[str, List[str]] = dict() + query: dict[str, list[str]] = dict() if isinstance(results, list) and results: # If a user passed a particular pagination mechanism, keep using it # Otherwise, use the default pagination mechanism of the collection diff --git a/optimade/server/entry_collections/mongo.py b/optimade/server/entry_collections/mongo.py index b7bd20f9b..c49ebb6b4 100644 --- a/optimade/server/entry_collections/mongo.py +++ b/optimade/server/entry_collections/mongo.py @@ -1,12 +1,17 @@ -from typing import Any, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Optional, Union +from optimade.exceptions import BadRequest from optimade.filtertransformers.mongo import MongoTransformer -from optimade.models import EntryResource -from optimade.server.config import CONFIG, SupportedBackend +from optimade.models import EntryResource # type: ignore[attr-defined] +from optimade.server.config import CONFIG, SupportedBackend, SupportedResponseFormats from optimade.server.entry_collections import EntryCollection from optimade.server.logger import LOGGER -from optimade.server.mappers import BaseResourceMapper -from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams +from optimade.server.mappers import BaseResourceMapper # type: ignore[attr-defined] +from optimade.server.query_params import ( + EntryListingQueryParams, + PartialDataQueryParams, + SingleEntryQueryParams, +) if CONFIG.database_backend.value == "mongodb": from pymongo import MongoClient, version_tuple @@ -21,15 +26,287 @@ LOGGER.info("Using: Real MongoDB (pymongo)") elif CONFIG.database_backend.value == "mongomock": + import mongomock.gridfs from mongomock import MongoClient LOGGER.info("Using: Mock MongoDB (mongomock)") + mongomock.gridfs.enable_gridfs_integration() if CONFIG.database_backend.value in ("mongomock", "mongodb"): CLIENT = MongoClient(CONFIG.mongo_uri) + import gridfs -class MongoCollection(EntryCollection): +class MongoBaseCollection(EntryCollection): + def _check_aliases(self, aliases): + """Check that aliases do not clash with mongo keywords.""" + if any( + alias[0].startswith("$") or alias[1].startswith("$") for alias in aliases + ): + raise RuntimeError(f"Cannot define an alias starting with a '$': {aliases}") + + +class GridFSCollection(MongoBaseCollection): + """Class for querying gridfs collections (implemented by either pymongo or mongomock).""" + + def __init__( + self, + name: str, + resource_cls: type[EntryResource], + resource_mapper: type[BaseResourceMapper], + database: str = CONFIG.mongo_database, + ): + """Initialize the GridFSCollection for the given parameters. + + Parameters: + name: The name of the collection. + resource_cls: The type of entry resource that is stored by the collection. + resource_mapper: A resource mapper object that handles aliases and + format changes between deserialization and response. + database: The name of the underlying MongoDB database to connect to. + + """ + super().__init__( + resource_cls, + resource_mapper, + MongoTransformer(mapper=resource_mapper), + ) + db = MongoClient(CONFIG.mongo_uri)[ + database + ] # Somehow importing the client from optimade.server.entry_collections.mongo gives an error that the type of db is not "Database" even though it is. + + self.collection = gridfs.GridFS(db, name) + + # check aliases do not clash with mongo operators + self._check_aliases(self.resource_mapper.all_aliases()) + self._check_aliases(self.resource_mapper.all_length_aliases()) + + def __len__(self) -> int: + """Returns the total number of entries in the collection.""" + return len(self.collection.list()) + + def count(self, **kwargs: Any) -> int: + """Returns the number of entries matching the query specified + by the keyword arguments. + + Parameters: + **kwargs: Query parameters as keyword arguments. The keys + 'filter', 'skip', 'limit', 'hint' and 'maxTimeMS' will be passed + to the `pymongo.collection.Collection.count_documents` method. + + """ + for k in list(kwargs.keys()): + if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"): + del kwargs[k] + if "filter" not in kwargs: # "filter" is needed for count_documents() + kwargs["filter"] = {} + return len(self.collection.find(**kwargs)) + + def insert(self, data: list) -> None: + """Add the given entries to the underlying database. + + Warning: + No validation is performed on the incoming data. + + Arguments: + data: a list of dictionaries. Each dictionary contains the data belonging to one file. + These dictionaries contain the fields: + data: The file content to add to gridfs. + filename: The filename of the added content. + metadata: extra metadata to add to the gridfs entry. + """ + for entry in data: # todo check whether I can insert multiple files in one go. + self.collection.put(**entry) + + def handle_query_params( + self, params: Union[SingleEntryQueryParams, PartialDataQueryParams] # type: ignore[override] + ) -> dict[str, Any]: + """Parse and interpret the backend-agnostic query parameter models into a dictionary + that can be used by MongoDB. + + This Mongo-specific method calls the base `EntryCollection.handle_query_params` method + and adds additional handling of the MongoDB ObjectID type. + + Parameters: + params: The initialized query parameter model from the server. + + Raises: + Forbidden: If too large of a page limit is provided. + BadRequest: If an invalid request is made, e.g., with incorrect fields + or response format. + + Returns: + A dictionary representation of the query parameters. + + """ + + criteria = super().handle_query_params(params) + # Handle MongoDB ObjectIDs: + # - If they were not requested, then explicitly remove them + # - If they were requested, then cast them to strings in the response + if "_id" not in criteria.get("projection", {}): + criteria["projection"]["_id"] = False + + if "page_above" in criteria: + raise NotImplementedError( + "`page_above` is not implemented for this backend." + ) + + if criteria.get("projection", {}).get("_id"): + criteria["projection"]["_id"] = {"$toString": "$_id"} + + if isinstance(params, PartialDataQueryParams): + entry_id = params.filter.split("=")[1][1:-1] + criteria["filter"] = { + "filename": { + "$eq": f"{entry_id}:{params.response_fields}.npy" + } # todo Should we add support for other file extensions? + } # Todo make sure response fields has only one value + + # response_format + if getattr(params, "response_format", False) and params.response_format not in ( + x.value for x in CONFIG.partial_data_formats + ): + raise BadRequest( + detail=f"Response format {params.response_format} is not supported, please use one of the supported response formats: {', '.join((x.value for x in CONFIG.partial_data_formats))}" + ) + criteria["response_format"] = params.response_format + criteria["property_ranges"] = params.property_ranges + + return criteria + + # todo test if it is more efficient to use the get method of gridfs + def _run_db_query( + self, + criteria: dict[str, Any], + single_entry: bool = False, + ) -> tuple[list[dict[str, Any]], int, bool]: + """Run the query on the backend and collect the results. + + Arguments: + criteria: A dictionary representation of the query parameters. + single_entry: Whether or not the caller is expecting a single entry response. + + Returns: + The list of entries from the database (without any re-mapping), the total number of + entries matching the query and a boolean for whether or not there is more data available. + + """ + + # TODO handle case where the type does not have a fixed width. For example strings or dictionaries. + response_format = criteria.pop("response_format") + max_return_size = ( + CONFIG.max_response_size[SupportedResponseFormats(response_format)] + * 1024 + * 1024 + ) # todo adjust for different output formats(take into account that the number of numbers to read is larger for a text based output format than for a binary format. + results = [] + filterdict = criteria.pop("filter", {}) + + # I have tried to use just **criteria as is mentioned in the documentation but this does not seem to work. + gridcursor = self.collection.find(filterdict) + more_data_available = False + nresults = 0 + # todo add code that can handle very sparse requests where reading individual sections of files is more efficient. + for ( + file_obj + ) in ( + gridcursor + ): # Next throws an error when there are zero files returned, so I use a for loop instead to get one result. + nresults += 1 + metadata = file_obj.metadata + property_ranges = self.parse_property_ranges( + criteria.pop("property_ranges", None), + metadata["slice_obj"], + metadata["dim_names"], + ) + item_size = metadata["dtype"]["itemsize"] + dim_sizes = [ + (i["stop"] - i["start"] + 1) // i["step"] for i in metadata["slice_obj"] + ] + top_stepsize = 1 + for i in dim_sizes[1:]: + top_stepsize *= i + offset = (property_ranges[0]["start"] - 1) * item_size * top_stepsize + np_header = file_obj.readline() + file_obj.seek( + offset + len(np_header) + ) # set the correct starting point fo the read from the gridfs file system. + if (max_return_size / item_size) < ( + 1 + property_ranges[0]["stop"] - property_ranges[0]["start"] + ) * top_stepsize: # case more data requested then fits in the response + more_data_available = True + n_val_return = max_return_size / item_size + n_outer = max( + int(n_val_return / top_stepsize), 1 + ) # always read at least one line for now. + read_size = n_outer * top_stepsize * item_size + shape = [n_outer] + dim_sizes[1:] + else: + read_size = ( + (1 + property_ranges[0]["stop"] - property_ranges[0]["start"]) + * top_stepsize + * item_size + ) + shape = [ + 1 + property_ranges[0]["stop"] - property_ranges[0]["start"] + ] + dim_sizes[1:] + + values = file_obj.read(read_size) + entry = { + "id": metadata.get("parent_id", None), + "type": metadata.get("endpoint", None), + } + results = [ + { + "type": "partial_data", + "id": str(file_obj._id), + "property_name": metadata.get("property_name", None), + "entry": entry, + "data": values, + "dtype": metadata["dtype"], + "shape": shape, + "property_ranges": property_ranges, + } + ] + if more_data_available: + property_ranges_str = f"property_ranges={metadata['dim_names'][0]}:{property_ranges[0]['start']+n_outer}:{property_ranges[0]['stop']}:{property_ranges[0]['step']}" + for i, name in enumerate(metadata["dim_names"][1:]): + property_ranges_str += f",{name}:{property_ranges[i+1]['start']}:{property_ranges[i+1]['stop']}:{property_ranges[i+1]['step']}" + results[0][ + "next" + ] = f"{CONFIG.base_url}/partial_data/{metadata['parent_id']}?response_fields={metadata['property_name']}&response_format={response_format}&{property_ranges_str}" + break + + return results, nresults, more_data_available + + def parse_property_ranges( + self, property_range_str: str, attribute_slice_obj: list, dim_names: list + ) -> list[dict]: + property_range_dict = {} + if property_range_str: + ranges = [dimrange.split(":") for dimrange in property_range_str.split(",")] + + for subrange in ranges: + property_range_dict[subrange[0]] = { + "start": int(subrange[1]) + if subrange[1] + else attribute_slice_obj[dim_names.index(subrange[0])]["start"], + "stop": int(subrange[2]) + if subrange[2] + else attribute_slice_obj[dim_names.index(subrange[0])]["stop"], + "step": int(subrange[3]) + if subrange[3] + else attribute_slice_obj[dim_names.index(subrange[0])]["step"], + } + for i, dim in enumerate(dim_names): + if dim not in property_range_dict: + property_range_dict[dim] = attribute_slice_obj[i] + + return [property_range_dict[dim] for dim in dim_names] + + +class MongoCollection(MongoBaseCollection): """Class for querying MongoDB collections (implemented by either pymongo or mongomock) containing serialized [`EntryResource`][optimade.models.entries.EntryResource]s objects. @@ -38,8 +315,8 @@ class MongoCollection(EntryCollection): def __init__( self, name: str, - resource_cls: Type[EntryResource], - resource_mapper: Type[BaseResourceMapper], + resource_cls: type[EntryResource], + resource_mapper: type[BaseResourceMapper], database: str = CONFIG.mongo_database, ): """Initialize the MongoCollection for the given parameters. @@ -91,7 +368,7 @@ def count(self, **kwargs: Any) -> Union[int, None]: except ExecutionTimeout: return None - def insert(self, data: List[EntryResource]) -> None: + def insert(self, data: list[EntryResource]) -> None: """Add the given entries to the underlying database. Warning: @@ -104,8 +381,8 @@ def insert(self, data: List[EntryResource]) -> None: self.collection.insert_many(data) def handle_query_params( - self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Dict[str, Any]: + self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] # type: ignore[override] + ) -> dict[str, Any]: """Parse and interpret the backend-agnostic query parameter models into a dictionary that can be used by MongoDB. @@ -142,8 +419,8 @@ def handle_query_params( return criteria def _run_db_query( - self, criteria: Dict[str, Any], single_entry: bool = False - ) -> Tuple[List[Dict[str, Any]], Optional[int], bool]: + self, criteria: dict[str, Any], single_entry: bool = False + ) -> tuple[list[dict[str, Any]], Optional[int], bool]: """Run the query on the backend and collect the results. Arguments: @@ -182,10 +459,3 @@ def _run_db_query( more_data_available = False return results, data_returned, more_data_available - - def _check_aliases(self, aliases): - """Check that aliases do not clash with mongo keywords.""" - if any( - alias[0].startswith("$") or alias[1].startswith("$") for alias in aliases - ): - raise RuntimeError(f"Cannot define an alias starting with a '$': {aliases}") diff --git a/optimade/server/exception_handlers.py b/optimade/server/exception_handlers.py index 06fc083a8..909ad61b8 100644 --- a/optimade/server/exception_handlers.py +++ b/optimade/server/exception_handlers.py @@ -1,5 +1,6 @@ import traceback -from typing import Callable, Iterable, List, Optional, Tuple, Type, Union +from collections.abc import Iterable +from typing import Callable, Optional, Union from fastapi import Request from fastapi.encoders import jsonable_encoder @@ -18,7 +19,7 @@ def general_exception( request: Request, exc: Exception, status_code: int = 500, # A status_code in `exc` will take precedence - errors: Optional[List[OptimadeError]] = None, + errors: Optional[list[OptimadeError]] = None, ) -> JSONAPIResponse: """Handle an exception @@ -221,8 +222,8 @@ def general_exception_handler(request: Request, exc: Exception) -> JSONAPIRespon OPTIMADE_EXCEPTIONS: Iterable[ - Tuple[ - Type[Exception], + tuple[ + type[Exception], Callable[[Request, Exception], JSONAPIResponse], ] ] = [ @@ -230,7 +231,7 @@ def general_exception_handler(request: Request, exc: Exception) -> JSONAPIRespon (OptimadeHTTPException, http_exception_handler), (RequestValidationError, request_validation_exception_handler), (ValidationError, validation_exception_handler), - (VisitError, grammar_not_implemented_handler), + (VisitError, grammar_not_implemented_handler), # type: ignore[list-item] # not entirely sure why this entry triggers mypy (NotImplementedError, not_implemented_handler), # type: ignore[list-item] # not entirely sure why this entry triggers mypy (Exception, general_exception_handler), ] diff --git a/optimade/server/main.py b/optimade/server/main.py index 4f95dcccb..f6acf9c7c 100644 --- a/optimade/server/main.py +++ b/optimade/server/main.py @@ -25,6 +25,7 @@ info, landing, links, + partial_data, references, structures, versions, @@ -49,7 +50,6 @@ title="OPTIMADE API", description=( f"""The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API. - This specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v{__version__}) v{__version__}.""" ), version=__api_version__, @@ -68,6 +68,41 @@ from optimade.server.routers import ENTRY_COLLECTIONS from optimade.server.routers.utils import get_providers + # Todo Do we need to check a file is not already stored in gridfs? + # Load test data from files into gridfs + + if CONFIG.database_backend.value in ("mongomock", "mongodb"): + from pathlib import Path + + import numpy + + from optimade.server.routers.partial_data import partial_data_coll + + # todo create seperate function for storing data files in gridfs + # read_array_header function originally from https://stackoverflow.com/a/64226659 by https://stackoverflow.com/users/982257/iguananaut + def read_array_header(fobj): + version = numpy.lib.format.read_magic(fobj) + func_name = "read_array_header_" + "_".join(str(v) for v in version) + func = getattr(numpy.lib.format, func_name) + return func(fobj) + + for filename, filetype, metadata in getattr(data, "data_files", []): + with open(Path(__file__).parent / "data" / filename, "rb") as f: + if filetype == "numpy": + numpy_meta = read_array_header(f) + if "slice_obj" not in metadata: + slice_obj = [ + {"start": 1, "stop": i, "step": 1} for i in numpy_meta[0] + ] + metadata["slice_obj"] = slice_obj + if "dtype" not in metadata: + metadata["dtype"] = { + "name": numpy_meta[2].name, + "itemsize": numpy_meta[2].itemsize, + } + f.seek(0) + partial_data_coll.insert([{"data": f, "filename": filename, "metadata": metadata}]) # type: ignore[list-item] # Todo : Perhaps this can be reduced to a single insert statement. + def load_entries(endpoint_name: str, endpoint_collection: EntryCollection): LOGGER.debug("Loading test %s...", endpoint_name) @@ -103,13 +138,13 @@ def load_entries(endpoint_name: str, endpoint_collection: EntryCollection): app.add_exception_handler(exception, handler) # Add various endpoints to unversioned URL -for endpoint in (info, links, references, structures, landing, versions): +for endpoint in (info, links, references, structures, landing, versions, partial_data): app.include_router(endpoint.router) def add_major_version_base_url(app: FastAPI): """Add mandatory vMajor endpoints, i.e. all except versions.""" - for endpoint in (info, links, references, structures, landing): + for endpoint in (info, links, references, structures, landing, partial_data): app.include_router(endpoint.router, prefix=BASE_URL_PREFIXES["major"]) @@ -121,7 +156,7 @@ def add_optional_versioned_base_urls(app: FastAPI): ``` """ for version in ("minor", "patch"): - for endpoint in (info, links, references, structures, landing): + for endpoint in (info, links, references, structures, landing, partial_data): app.include_router(endpoint.router, prefix=BASE_URL_PREFIXES[version]) diff --git a/optimade/server/mappers/__init__.py b/optimade/server/mappers/__init__.py index c38e6ccd0..882abe95b 100644 --- a/optimade/server/mappers/__init__.py +++ b/optimade/server/mappers/__init__.py @@ -1,12 +1,14 @@ # pylint: disable=undefined-variable from .entries import * # noqa: F403 from .links import * # noqa: F403 +from .partial_data import * # noqa: F403 from .references import * # noqa: F403 from .structures import * # noqa: F403 __all__ = ( entries.__all__ # type: ignore[name-defined] # noqa: F405 + links.__all__ # type: ignore[name-defined] # noqa: F405 + + partial_data.__all__ # type: ignore[name-defined] # noqa: F405 + references.__all__ # type: ignore[name-defined] # noqa: F405 + structures.__all__ # type: ignore[name-defined] # noqa: F405 ) diff --git a/optimade/server/mappers/entries.py b/optimade/server/mappers/entries.py index 103671d3a..1269cdad1 100644 --- a/optimade/server/mappers/entries.py +++ b/optimade/server/mappers/entries.py @@ -1,6 +1,7 @@ import warnings +from collections.abc import Iterable from functools import lru_cache -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union +from typing import Any, Optional, Union from optimade.models.entries import EntryResource @@ -65,19 +66,25 @@ class BaseResourceMapper: except (ImportError, ModuleNotFoundError): PROVIDERS = {} - KNOWN_PROVIDER_PREFIXES: Set[str] = set( + KNOWN_PROVIDER_PREFIXES: set[str] = { prov["id"] for prov in PROVIDERS.get("data", []) - ) - ALIASES: Tuple[Tuple[str, str], ...] = () - LENGTH_ALIASES: Tuple[Tuple[str, str], ...] = () - PROVIDER_FIELDS: Tuple[str, ...] = () - ENTRY_RESOURCE_CLASS: Type[EntryResource] = EntryResource - RELATIONSHIP_ENTRY_TYPES: Set[str] = {"references", "structures"} - TOP_LEVEL_NON_ATTRIBUTES_FIELDS: Set[str] = {"id", "type", "relationships", "links"} + } + ALIASES: tuple[tuple[str, str], ...] = () + LENGTH_ALIASES: tuple[tuple[str, str], ...] = () + PROVIDER_FIELDS: tuple[str, ...] = () + ENTRY_RESOURCE_CLASS: type[EntryResource] = EntryResource + RELATIONSHIP_ENTRY_TYPES: set[str] = {"references", "structures"} + TOP_LEVEL_NON_ATTRIBUTES_FIELDS: set[str] = { + "id", + "type", + "relationships", + "links", + "meta", + } @classmethod @lru_cache(maxsize=NUM_ENTRY_TYPES) - def all_aliases(cls) -> Iterable[Tuple[str, str]]: + def all_aliases(cls) -> Iterable[tuple[str, str]]: """Returns all of the associated aliases for this entry type, including those defined by the server config. The first member of each tuple is the OPTIMADE-compliant field name, the second @@ -116,22 +123,14 @@ def all_aliases(cls) -> Iterable[Tuple[str, str]]: @classproperty @lru_cache(maxsize=1) - def SUPPORTED_PREFIXES(cls) -> Set[str]: - """A set of prefixes handled by this entry type. - - !!! note - This implementation only includes the provider prefix, - but in the future this property may be extended to include other - namespaces (for serving fields from, e.g., other providers or - domain-specific terms). - - """ + def SUPPORTED_PREFIXES(cls) -> set[str]: + """A set of prefixes handled by this entry type.""" from optimade.server.config import CONFIG - return {CONFIG.provider.prefix} + return set(CONFIG.supported_prefixes) @classproperty - def ALL_ATTRIBUTES(cls) -> Set[str]: + def ALL_ATTRIBUTES(cls) -> set[str]: """Returns all attributes served by this entry.""" from optimade.server.config import CONFIG @@ -147,11 +146,11 @@ def ALL_ATTRIBUTES(cls) -> Set[str]: for field in CONFIG.provider_fields.get(cls.ENDPOINT, ()) if isinstance(field, dict) ) - .union(set(cls.get_optimade_field(field) for field in cls.PROVIDER_FIELDS)) + .union({cls.get_optimade_field(field) for field in cls.PROVIDER_FIELDS}) ) @classproperty - def ENTRY_RESOURCE_ATTRIBUTES(cls) -> Dict[str, Any]: + def ENTRY_RESOURCE_ATTRIBUTES(cls) -> dict[str, Any]: """Returns the dictionary of attributes defined by the underlying entry resource class.""" from optimade.server.schemas import retrieve_queryable_properties @@ -173,7 +172,7 @@ def ENDPOINT(cls) -> str: @classmethod @lru_cache(maxsize=NUM_ENTRY_TYPES) - def all_length_aliases(cls) -> Tuple[Tuple[str, str], ...]: + def all_length_aliases(cls) -> tuple[tuple[str, str], ...]: """Returns all of the associated length aliases for this class, including those defined by the server config. @@ -368,7 +367,7 @@ def map_back(cls, doc: dict) -> dict: @classmethod def deserialize( cls, results: Union[dict, Iterable[dict]] - ) -> Union[List[EntryResource], EntryResource]: + ) -> Union[list[EntryResource], EntryResource]: """Converts the raw database entries for this class into serialized models, mapping the data along the way. @@ -377,3 +376,38 @@ def deserialize( return cls.ENTRY_RESOURCE_CLASS(**cls.map_back(results)) return [cls.ENTRY_RESOURCE_CLASS(**cls.map_back(doc)) for doc in results] + + @staticmethod + def starts_with_supported_prefix(field: str) -> tuple[bool, Union[str, None]]: + """Tests whether the supplied field has a field that is supported by this server. + Parameters: + field: The field/string for which it should be checked that it starts with a supported prefix. + + Returns: + A boolean which is true if the field/string starts with a supported prefix. + A string, containing the prefix if the field has a prefix otherwise it returns 'None'. + """ + + prefix = None + if field.startswith("_"): + prefix = field.split("_")[1] + if prefix in BaseResourceMapper.SUPPORTED_PREFIXES: + return True, prefix + return False, prefix + + @classmethod + def check_starts_with_supported_prefix(cls, field: str, message: str = "") -> None: + """Raises a value error if the field does not start with a supported prefix. + Parameters: + field: The field/string for which it should be checked that it starts with a supported prefix. + message: An additional error message that will be appended to the default error message. + Returns: + Raises a value error when the field has no valid prefix. + """ + + prefixed, prefix = cls.starts_with_supported_prefix(field) + if not prefixed: + raise ValueError( + f"The field {field} either has no prefix or the prefix {prefix} is not supported by this server." + + message + ) diff --git a/optimade/server/mappers/partial_data.py b/optimade/server/mappers/partial_data.py new file mode 100644 index 000000000..c959c6eae --- /dev/null +++ b/optimade/server/mappers/partial_data.py @@ -0,0 +1,9 @@ +from optimade.models.partial_data import PartialDataResource +from optimade.server.mappers.entries import BaseResourceMapper + +__all__ = ("PartialDataMapper",) + + +class PartialDataMapper(BaseResourceMapper): + LENGTH_ALIASES = () + ENTRY_RESOURCE_CLASS = PartialDataResource diff --git a/optimade/server/middleware.py b/optimade/server/middleware.py index 64cb44f90..79863dee8 100644 --- a/optimade/server/middleware.py +++ b/optimade/server/middleware.py @@ -9,7 +9,8 @@ import re import urllib.parse import warnings -from typing import Generator, Iterable, List, Optional, TextIO, Type, Union +from collections.abc import Generator, Iterable +from typing import Optional, TextIO, Union from starlette.datastructures import URL as StarletteURL from starlette.middleware.base import BaseHTTPMiddleware @@ -111,7 +112,7 @@ class HandleApiHint(BaseHTTPMiddleware): """Handle `api_hint` query parameter.""" @staticmethod - def handle_api_hint(api_hint: List[str]) -> Union[None, str]: + def handle_api_hint(api_hint: list[str]) -> Union[None, str]: """Handle `api_hint` parameter value. There are several scenarios that can play out, when handling the `api_hint` @@ -308,12 +309,12 @@ class AddWarnings(BaseHTTPMiddleware): """ - _warnings: List[Warnings] + _warnings: list[Warnings] def showwarning( self, message: Union[Warning, str], - category: Type[Warning], + category: type[Warning], filename: str, lineno: int, file: Optional[TextIO] = None, diff --git a/optimade/server/query_params.py b/optimade/server/query_params.py index a955d8ef7..d96614fe5 100644 --- a/optimade/server/query_params.py +++ b/optimade/server/query_params.py @@ -1,5 +1,5 @@ from abc import ABC -from typing import Iterable, List +from collections.abc import Iterable from warnings import warn from fastapi import Query @@ -7,7 +7,7 @@ from optimade.exceptions import BadRequest from optimade.server.config import CONFIG -from optimade.server.mappers import BaseResourceMapper +from optimade.server.mappers import BaseResourceMapper # type: ignore[attr-defined] from optimade.warnings import QueryParamNotUsed, UnknownProviderQueryParameter @@ -21,7 +21,7 @@ class BaseQueryParams(ABC): """ - unsupported_params: List[str] = [] + unsupported_params: list[str] = [] def check_params(self, query_params: Iterable[str]) -> None: """This method checks whether all the query parameters that are specified @@ -173,7 +173,7 @@ class EntryListingQueryParams(BaseQueryParams): """ # The reference server implementation only supports offset/number-based pagination - unsupported_params: List[str] = [ + unsupported_params: list[str] = [ "page_cursor", "page_below", ] @@ -324,9 +324,79 @@ def __init__( description="If the client provides the parameter, the value SHOULD have the format `vMAJOR` or `vMAJOR.MINOR`, where MAJOR is a major version and MINOR is a minor version of the API. For example, if a client appends `api_hint=v1.0` to the query string, the hint provided is for major version 1 and minor version 0.", pattern=r"(v[0-9]+(\.[0-9]+)?)?", ), + property_ranges: str = Query( + None, + description="A list of lists which contains a range for each dimension of the property.", + ), ): self.response_format = response_format self.email_address = email_address self.response_fields = response_fields self.include = include self.api_hint = api_hint + self.property_ranges = property_ranges + + +class PartialDataQueryParams(BaseQueryParams): + """ + Common query params for single entry endpoints. + + Attributes: + response_format (str): The output format requested (see section Response Format). + Defaults to the format string 'json', which specifies the standard output format described in this specification. + + **Example**: `http://example.com/v1/structures?response_format=xml` + + email_address (EmailStr): An email address of the user making the request. + The email SHOULD be that of a person and not an automatic system. + + **Example**: `http://example.com/v1/structures?email_address=user@example.com` + + response_fields (str): A comma-delimited set of fields to be provided in the output. + If provided, these fields MUST be returned along with the REQUIRED fields. + Other OPTIONAL fields MUST NOT be returned when this parameter is present. + + **Example**: `http://example.com/v1/structures?response_fields=last_modified,nsites` + + api_hint (str): If the client provides the parameter, the value SHOULD have the format `vMAJOR` or `vMAJOR.MINOR`, + where MAJOR is a major version and MINOR is a minor version of the API. + For example, if a client appends `api_hint=v1.0` to the query string, the hint provided is for major version 1 and minor version 0. + + """ + + def __init__( + self, + *, + response_format: str = Query( + "jsonlines", + description="The output format requested (see section Response Format).\nDefaults to the format string 'json', which specifies the standard output format described in this specification.\nExample: `http://example.com/v1/structures?response_format=xml`", + ), + email_address: EmailStr = Query( + "", + description="An email address of the user making the request.\nThe email SHOULD be that of a person and not an automatic system.\nExample: `http://example.com/v1/structures?email_address=user@example.com`", + ), + api_hint: str = Query( + "", + description="If the client provides the parameter, the value SHOULD have the format `vMAJOR` or `vMAJOR.MINOR`, where MAJOR is a major version and MINOR is a minor version of the API. For example, if a client appends `api_hint=v1.0` to the query string, the hint provided is for major version 1 and minor version 0.", + pattern=r"(v[0-9]+(\.[0-9]+)?)?", + ), + response_fields: str = Query( + "", + description="A comma-delimited set of fields to be provided in the output.\nIf provided, these fields MUST be returned along with the REQUIRED fields.\nOther OPTIONAL fields MUST NOT be returned when this parameter is present.\nExample: `http://example.com/v1/structures?response_fields=last_modified,nsites`", + pattern=r"([a-z_][a-z_0-9]*(,[a-z_][a-z_0-9]*)*)?", + ), + filter: str = Query( # pylint: disable=redefined-builtin + "", + description="A filter string, in the format described in section API Filtering Format Specification of the specification.", + ), + property_ranges: str = Query( + "", + description="A list of lists which contains a range for each dimension of the property.", + ), + ): + self.filter = filter + self.response_format = response_format + self.email_address = email_address + self.response_fields = response_fields + self.api_hint = api_hint + self.property_ranges = property_ranges diff --git a/optimade/server/routers/landing.py b/optimade/server/routers/landing.py index 3b51f2a1c..8860ae252 100644 --- a/optimade/server/routers/landing.py +++ b/optimade/server/routers/landing.py @@ -13,7 +13,7 @@ from optimade.server.routers.utils import get_base_url, meta_values -@lru_cache() +@lru_cache def render_landing_page(url: str) -> HTMLResponse: """Render and cache the landing page. diff --git a/optimade/server/routers/links.py b/optimade/server/routers/links.py index 025d9f76c..a01bcb797 100644 --- a/optimade/server/routers/links.py +++ b/optimade/server/routers/links.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any from fastapi import APIRouter, Depends, Request @@ -21,7 +21,7 @@ @router.get( "/links", - response_model=LinksResponse if CONFIG.validate_api_response else Dict, + response_model=LinksResponse if CONFIG.validate_api_response else dict, response_model_exclude_unset=True, tags=["Links"], responses=ERROR_RESPONSES, diff --git a/optimade/server/routers/partial_data.py b/optimade/server/routers/partial_data.py new file mode 100644 index 000000000..5a9a59436 --- /dev/null +++ b/optimade/server/routers/partial_data.py @@ -0,0 +1,36 @@ +from typing import Any + +from fastapi import APIRouter, Depends, Request + +from optimade.models import PartialDataResource # type: ignore[attr-defined] +from optimade.server.config import CONFIG +from optimade.server.entry_collections import create_collection +from optimade.server.mappers import PartialDataMapper +from optimade.server.query_params import PartialDataQueryParams +from optimade.server.routers.utils import get_partial_entry +from optimade.server.schemas import ERROR_RESPONSES + +router = APIRouter(redirect_slashes=True) + +partial_data_coll = create_collection( + name=CONFIG.partial_data_collection, + resource_cls=PartialDataResource, + resource_mapper=PartialDataMapper, +) + + +@router.get( + "/partial_data/{entry_id:path}", + response_model_exclude_unset=True, + tags=["partial_data"], + responses=ERROR_RESPONSES, +) +def get_partial_data( + request: Request, entry_id: str, params: PartialDataQueryParams = Depends() +) -> Any: + return get_partial_entry( + collection=partial_data_coll, + entry_id=entry_id, + request=request, + params=params, + ) diff --git a/optimade/server/routers/references.py b/optimade/server/routers/references.py index 2508c700a..b2b55dda5 100644 --- a/optimade/server/routers/references.py +++ b/optimade/server/routers/references.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any from fastapi import APIRouter, Depends, Request @@ -25,7 +25,7 @@ @router.get( "/references", - response_model=ReferenceResponseMany if CONFIG.validate_api_response else Dict, + response_model=ReferenceResponseMany if CONFIG.validate_api_response else dict, response_model_exclude_unset=True, tags=["References"], responses=ERROR_RESPONSES, @@ -43,7 +43,7 @@ def get_references( @router.get( "/references/{entry_id:path}", - response_model=ReferenceResponseOne if CONFIG.validate_api_response else Dict, + response_model=ReferenceResponseOne if CONFIG.validate_api_response else dict, response_model_exclude_unset=True, tags=["References"], responses=ERROR_RESPONSES, diff --git a/optimade/server/routers/structures.py b/optimade/server/routers/structures.py index 00980b246..1253d53d3 100644 --- a/optimade/server/routers/structures.py +++ b/optimade/server/routers/structures.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any from fastapi import APIRouter, Depends, Request @@ -10,8 +10,15 @@ from optimade.server.config import CONFIG from optimade.server.entry_collections import create_collection from optimade.server.mappers import StructureMapper -from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams -from optimade.server.routers.utils import get_entries, get_single_entry +from optimade.server.query_params import ( + EntryListingQueryParams, + SingleEntryQueryParams, +) +from optimade.server.routers.utils import ( + get_entries, + get_partial_entry, + get_single_entry, +) from optimade.server.schemas import ERROR_RESPONSES router = APIRouter(redirect_slashes=True) @@ -25,7 +32,7 @@ @router.get( "/structures", - response_model=StructureResponseMany if CONFIG.validate_api_response else Dict, + response_model=StructureResponseMany if CONFIG.validate_api_response else dict, response_model_exclude_unset=True, tags=["Structures"], responses=ERROR_RESPONSES, @@ -43,14 +50,26 @@ def get_structures( @router.get( "/structures/{entry_id:path}", - response_model=StructureResponseOne if CONFIG.validate_api_response else Dict, + response_model=StructureResponseOne if CONFIG.validate_api_response else dict, response_model_exclude_unset=True, tags=["Structures"], responses=ERROR_RESPONSES, ) def get_single_structure( - request: Request, entry_id: str, params: SingleEntryQueryParams = Depends() + request: Request, + entry_id: str, + params: SingleEntryQueryParams = Depends(), ) -> Any: + if params.property_ranges is not None: # todo add test for this + from optimade.server.routers.partial_data import partial_data_coll + + return get_partial_entry( + collection=partial_data_coll, + entry_id=entry_id, + request=request, + params=params, # type: ignore[arg-type] + ) + return get_single_entry( collection=structures_coll, entry_id=entry_id, diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index cd85a76ed..868bc20c5 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -1,25 +1,31 @@ # pylint: disable=import-outside-toplevel,too-many-locals +import io import re import urllib.parse from datetime import datetime -from typing import Any, Dict, List, Optional, Set, Type, Union +from typing import Any, Optional, Union -from fastapi import Request +import numpy as np +from fastapi import Request, Response from fastapi.responses import JSONResponse from starlette.datastructures import URL as StarletteURL from optimade import __api_version__ -from optimade.exceptions import BadRequest, InternalServerError -from optimade.models import ( - EntryResource, +from optimade.exceptions import BadRequest, InternalServerError, NotFound +from optimade.models import ( # type: ignore[attr-defined] + EntryResource, # type: ignore[attr-defined] EntryResponseMany, EntryResponseOne, - ResponseMeta, - ToplevelLinks, + ResponseMeta, # type: ignore[attr-defined] + ToplevelLinks, # type: ignore[attr-defined] ) from optimade.server.config import CONFIG from optimade.server.entry_collections import EntryCollection -from optimade.server.query_params import EntryListingQueryParams, SingleEntryQueryParams +from optimade.server.query_params import ( + EntryListingQueryParams, + PartialDataQueryParams, + SingleEntryQueryParams, +) from optimade.utils import PROVIDER_LIST_URLS, get_providers, mongo_id_for_database __all__ = ( @@ -30,6 +36,7 @@ "get_base_url", "get_entries", "get_single_entry", + "get_partial_entry", "mongo_id_for_database", "get_providers", "PROVIDER_LIST_URLS", @@ -62,7 +69,7 @@ def meta_values( **kwargs, ) -> ResponseMeta: """Helper to initialize the meta values""" - from optimade.models import ResponseMetaQuery + from optimade.models import ResponseMetaQuery # type: ignore[attr-defined] if isinstance(url, str): url = urllib.parse.urlparse(url) @@ -91,10 +98,10 @@ def meta_values( def handle_response_fields( - results: Union[List[EntryResource], EntryResource, List[Dict], Dict], - exclude_fields: Set[str], - include_fields: Set[str], -) -> List[Dict[str, Any]]: + results: Union[list[EntryResource], EntryResource, list[dict], dict], + exclude_fields: set[str], + include_fields: set[str], +) -> list[dict[str, Any]]: """Handle query parameter `response_fields`. It is assumed that all fields are under `attributes`. @@ -125,6 +132,13 @@ def handle_response_fields( for field in exclude_fields: if field in new_entry["attributes"]: del new_entry["attributes"][field] + if new_entry.get("meta") and ( + property_meta_data_fields := new_entry.get("meta").get( # type: ignore[union-attr] + "property_metadata" + ) + ): + if field in property_meta_data_fields: + del new_entry["meta"]["property_metadata"][field] # Include missing fields that were requested in `response_fields` for field in include_fields: @@ -137,10 +151,10 @@ def handle_response_fields( def get_included_relationships( - results: Union[EntryResource, List[EntryResource], Dict, List[Dict]], - ENTRY_COLLECTIONS: Dict[str, EntryCollection], - include_param: List[str], -) -> List[Union[EntryResource, Dict]]: + results: Union[EntryResource, list[EntryResource], dict, list[dict]], + ENTRY_COLLECTIONS: dict[str, EntryCollection], + include_param: list[str], +) -> list[Union[EntryResource, dict]]: """Filters the included relationships and makes the appropriate compound request to include them in the response. @@ -168,7 +182,7 @@ def get_included_relationships( f"Known relationship types: {sorted(ENTRY_COLLECTIONS.keys())}" ) - endpoint_includes: Dict[Any, Dict] = defaultdict(dict) + endpoint_includes: dict[Any, dict] = defaultdict(dict) for doc in results: # convert list of references into dict by ID to only included unique IDs if doc is None: @@ -197,12 +211,12 @@ def get_included_relationships( if ref["id"] not in endpoint_includes[entry_type]: endpoint_includes[entry_type][ref["id"]] = ref - included: Dict[ - str, Union[List[EntryResource], EntryResource, List[Dict], Dict] + included: dict[ + str, Union[list[EntryResource], EntryResource, list[dict], dict] ] = {} for entry_type in endpoint_includes: compound_filter = " OR ".join( - ['id="{}"'.format(ref_id) for ref_id in endpoint_includes[entry_type]] + [f'id="{ref_id}"' for ref_id in endpoint_includes[entry_type]] ) params = EntryListingQueryParams( filter=compound_filter, @@ -244,12 +258,38 @@ def get_base_url( ) +def generate_links_partial_data( + results, + parsed_url_request: Union[ + urllib.parse.ParseResult, urllib.parse.SplitResult, StarletteURL, str + ], +): + for entry in results: + if entry.get("meta", {}) and entry["meta"].get("partial_data_links", {}): + for property in entry["meta"]["partial_data_links"]: + for response_format in CONFIG.partial_data_formats: + link = { + "format": str(response_format.value), + "link": get_base_url(parsed_url_request) + + "/partial_data/" + + entry["id"] + + "?response_fields=" + + property + + "&response_format=" + + str(response_format.value), + } + if isinstance(entry["meta"]["partial_data_links"][property], list): + entry["meta"]["partial_data_links"][property].append(link) + else: + entry["meta"]["partial_data_links"][property] = [link] + + def get_entries( collection: EntryCollection, - response: Type[EntryResponseMany], # noqa + response: type[EntryResponseMany], # noqa request: Request, params: EntryListingQueryParams, -) -> Dict: +) -> dict: """Generalized /{entry} endpoint getter""" from optimade.server.routers import ENTRY_COLLECTIONS @@ -268,6 +308,7 @@ def get_entries( included = [] if results is not None: + generate_links_partial_data(results, request.url) included = get_included_relationships(results, ENTRY_COLLECTIONS, include) if more_data_available: @@ -304,10 +345,10 @@ def get_entries( def get_single_entry( collection: EntryCollection, entry_id: str, - response: Type[EntryResponseOne], + response: type[EntryResponseOne], request: Request, params: SingleEntryQueryParams, -) -> Dict: +) -> dict: from optimade.server.routers import ENTRY_COLLECTIONS params.check_params(request.query_params) @@ -332,6 +373,7 @@ def get_single_entry( included = [] if results is not None: included = get_included_relationships(results, ENTRY_COLLECTIONS, include) + generate_links_partial_data([results], request.url) links = ToplevelLinks(next=None) @@ -352,3 +394,116 @@ def get_single_entry( ), included=included, ) + + +def get_partial_entry( + collection: EntryCollection, + entry_id: str, + request: Request, + params: Union[PartialDataQueryParams], +) -> Union[dict, Response]: + # from optimade.server.routers import ENTRY_COLLECTIONS + from optimade.adapters.jsonl import to_jsonl + + params.check_params(request.query_params) + params.filter = f'parent_id="{entry_id}"' + ( + results, + data_returned, + more_data_available, + fields, + include_fields, + ) = collection.find(params) + + links = ToplevelLinks(next=None) + + if results is None: + raise NotFound( + detail=f"No data available for the combination of entry {entry_id} and property {params.response_fields}", + ) + + array = np.frombuffer( + results["attributes"]["data"], # type: ignore[call-overload] + dtype=getattr(np, results["attributes"]["dtype"]["name"]), # type: ignore[call-overload] + ).reshape( + results["attributes"]["shape"] # type: ignore[call-overload] + ) + # slice array + property_ranges = results["attributes"]["property_ranges"] # type: ignore[call-overload] + slice_ind = [ + slice( + 0, + 1 + property_ranges[0]["stop"] - property_ranges[0]["start"], + property_ranges[0]["step"], + ) + ] + for dim_range in property_ranges[1:]: + slice_ind.append( + slice(dim_range["start"] - 1, dim_range["stop"], dim_range["step"]) + ) + array = array[tuple(slice_ind)] + + if fields or include_fields: + results = handle_response_fields(results, fields, include_fields)[0] # type: ignore[assignment] + + slice_obj = [] + for i, size in enumerate(array.shape): + slice_obj.append( + { + "start": property_ranges[i]["start"], + "stop": min( + size * property_ranges[i]["step"] + property_ranges[i]["start"] - 1, + property_ranges[i]["stop"], + ), + "step": property_ranges[i]["step"], + } + ) + header = { + "optimade-partial-data": {"format": "1.2.0"}, + "layout": "dense", + "property_name": params.response_fields, + "returned_ranges": slice_obj, + # "entry": {"id": entry_id, "type": None}, #Todo add type information to metadata entry + "has_references": False, + } # Todo: add support for non_dense data + if more_data_available: + next_link = ["PARTIAL-DATA-NEXT", [results["attributes"].pop("next")]] # type: ignore[call-overload] + + if params.response_format == "json": + for key in header: + results["attributes"][key] = header[key] # type: ignore[call-overload] + results["attributes"]["data"] = array.tolist() # type: ignore[call-overload] + if more_data_available: + results["attributes"]["next"] = next_link # type: ignore[call-overload] + return dict( + links=links, + data=[results] if results else None, + meta=meta_values( + url=request.url, + data_returned=data_returned, + data_available=len(collection), + more_data_available=more_data_available, + schema=CONFIG.schema_url + if not CONFIG.is_index + else CONFIG.index_schema_url, + ), + # included=included, + ) + + jsonl_content = [header] + [array[i].tolist() for i in range(array.shape[0])] + if more_data_available: + jsonl_content.append(next_link) + return Response( + content=to_jsonl(jsonl_content), + media_type="application/jsonlines", + headers={ + "Content-disposition": f"attachment; filename={entry_id + ':' + params.response_fields}.jsonl" + }, + ) + + +def convert_data_to_str(results): + values = results["attributes"]["data"] + if isinstance(values, bytes): + results["attributes"]["data"] = np.array2string(np.load(io.BytesIO(values))) + return results diff --git a/optimade/server/schemas.py b/optimade/server/schemas.py index c01cc914e..fa900a9a3 100644 --- a/optimade/server/schemas.py +++ b/optimade/server/schemas.py @@ -1,4 +1,5 @@ -from typing import Callable, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import Callable, Optional from optimade.models import ( DataType, @@ -9,7 +10,7 @@ __all__ = ("ENTRY_INFO_SCHEMAS", "ERROR_RESPONSES", "retrieve_queryable_properties") -ENTRY_INFO_SCHEMAS: Dict[str, Callable[[], Dict]] = { +ENTRY_INFO_SCHEMAS: dict[str, Callable[[], dict]] = { "structures": StructureResource.schema, "references": ReferenceResource.schema, } @@ -24,7 +25,7 @@ """ from optimade.exceptions import POSSIBLE_ERRORS - ERROR_RESPONSES: Optional[Dict[int, Dict]] = { + ERROR_RESPONSES: Optional[dict[int, dict]] = { err.status_code: {"model": ErrorResponse, "description": err.title} for err in POSSIBLE_ERRORS } diff --git a/optimade/utils.py b/optimade/utils.py index cb3039565..3540263ff 100644 --- a/optimade/utils.py +++ b/optimade/utils.py @@ -4,7 +4,8 @@ """ import json -from typing import Container, Iterable, List, Optional +from collections.abc import Container, Iterable +from typing import Optional from pydantic import ValidationError @@ -102,7 +103,7 @@ def get_providers(add_mongo_id: bool = False) -> list: def get_child_database_links( provider: LinksResource, obey_aggregate: bool = True -) -> List[LinksResource]: +) -> list[LinksResource]: """For a provider, return a list of available child databases. Arguments: diff --git a/optimade/validator/config.py b/optimade/validator/config.py index 390a3f7b8..69a3ed0a6 100644 --- a/optimade/validator/config.py +++ b/optimade/validator/config.py @@ -7,7 +7,8 @@ """ -from typing import Any, Container, Dict, List, Set +from collections.abc import Container +from typing import Any from pydantic import BaseSettings, Field @@ -122,26 +123,26 @@ class ValidatorConfig(BaseSettings): """ - response_classes: Dict[str, Any] = Field( + response_classes: dict[str, Any] = Field( _RESPONSE_CLASSES, description="Dictionary containing the mapping between endpoints and response classes for the main database", ) - response_classes_index: Dict[str, Any] = Field( + response_classes_index: dict[str, Any] = Field( _RESPONSE_CLASSES_INDEX, description="Dictionary containing the mapping between endpoints and response classes for the index meta-database", ) - entry_schemas: Dict[str, Any] = Field( + entry_schemas: dict[str, Any] = Field( _ENTRY_SCHEMAS, description="The entry listing endpoint schemas" ) - entry_endpoints: Set[str] = Field( + entry_endpoints: set[str] = Field( _ENTRY_ENDPOINTS, description="The entry endpoints to validate, if present in the API's `/info` response `entry_types_by_format['json']`", ) - unique_properties: Set[str] = Field( + unique_properties: set[str] = Field( _UNIQUE_PROPERTIES, description=( "Fields that should be treated as unique indexes for all endpoints, " @@ -149,7 +150,7 @@ class ValidatorConfig(BaseSettings): ), ) - inclusive_operators: Dict[DataType, Set[str]] = Field( + inclusive_operators: dict[DataType, set[str]] = Field( _INCLUSIVE_OPERATORS, description=( "Dictionary mapping OPTIMADE `DataType`s to a list of operators that are 'inclusive', " @@ -157,7 +158,7 @@ class ValidatorConfig(BaseSettings): ), ) - exclusive_operators: Dict[DataType, Set[str]] = Field( + exclusive_operators: dict[DataType, set[str]] = Field( _EXCLUSIVE_OPERATORS, description=( "Dictionary mapping OPTIMADE `DataType`s to a list of operators that are 'exclusive', " @@ -165,7 +166,7 @@ class ValidatorConfig(BaseSettings): ), ) - field_specific_overrides: Dict[str, Dict[SupportLevel, Container[str]]] = Field( + field_specific_overrides: dict[str, dict[SupportLevel, Container[str]]] = Field( _FIELD_SPECIFIC_OVERRIDES, description=( "Some fields do not require all type comparison operators to be supported. " @@ -181,16 +182,16 @@ class ValidatorConfig(BaseSettings): ) info_endpoint: str = Field("info", description="The name of the info endpoint") - non_entry_endpoints: Set[str] = Field( + non_entry_endpoints: set[str] = Field( _NON_ENTRY_ENDPOINTS, description="The list specification-mandated endpoint names that do not contain entries", ) - top_level_non_attribute_fields: Set[str] = Field( + top_level_non_attribute_fields: set[str] = Field( BaseResourceMapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS, description="Field names to treat as top-level", ) - enum_fallback_values: Dict[str, Dict[str, List[str]]] = Field( + enum_fallback_values: dict[str, dict[str, list[str]]] = Field( _ENUM_DUMMY_VALUES, description="Provide fallback values for enum fields to use when validating filters.", ) diff --git a/optimade/validator/utils.py b/optimade/validator/utils.py index 81e5e11b7..02d0d3dd1 100644 --- a/optimade/validator/utils.py +++ b/optimade/validator/utils.py @@ -18,7 +18,7 @@ import time import traceback as tb import urllib.parse -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, Optional import requests from pydantic import Field, ValidationError @@ -80,11 +80,11 @@ class ValidatorResults: internal_failure_count: int = 0 optional_success_count: int = 0 optional_failure_count: int = 0 - failure_messages: List[Tuple[str, str]] = dataclasses.field(default_factory=list) - internal_failure_messages: List[Tuple[str, str]] = dataclasses.field( + failure_messages: list[tuple[str, str]] = dataclasses.field(default_factory=list) + internal_failure_messages: list[tuple[str, str]] = dataclasses.field( default_factory=list ) - optional_failure_messages: List[Tuple[str, str]] = dataclasses.field( + optional_failure_messages: list[tuple[str, str]] = dataclasses.field( default_factory=list ) verbosity: int = 0 @@ -146,7 +146,7 @@ def add_failure( self.optional_failure_count += 1 self.optional_failure_messages.append((summary, message)) - pprint_types: Dict[str, Tuple[Callable, Callable]] = { + pprint_types: dict[str, tuple[Callable, Callable]] = { "internal": (print_notify, print_warning), "optional": (print, print), } @@ -168,7 +168,7 @@ def __init__( self, base_url: str, max_retries: int = 5, - headers: Optional[Dict[str, str]] = None, + headers: Optional[dict[str, str]] = None, timeout: Optional[float] = DEFAULT_CONN_TIMEOUT, read_timeout: Optional[float] = DEFAULT_READ_TIMEOUT, ) -> None: @@ -267,7 +267,7 @@ def get(self, request: str): raise ResponseError(message) -def test_case(test_fn: Callable[..., Tuple[Any, str]]): +def test_case(test_fn: Callable[..., tuple[Any, str]]): """Wrapper for test case functions, which pretty-prints any errors depending on verbosity level, collates the number and severity of test failures, returns the response and summary string to the caller. @@ -404,19 +404,18 @@ def wrapper( class ValidatorLinksResponse(Success): meta: ResponseMeta = Field(...) - data: List[LinksResource] = Field(...) + data: list[LinksResource] = Field(...) class ValidatorEntryResponseOne(Success): - meta: ResponseMeta = Field(...) data: EntryResource = Field(...) - included: Optional[List[Dict[str, Any]]] = Field(None) # type: ignore[assignment] + included: Optional[list[dict[str, Any]]] = Field(None) # type: ignore[assignment] class ValidatorEntryResponseMany(Success): meta: ResponseMeta = Field(...) - data: List[EntryResource] = Field(...) - included: Optional[List[Dict[str, Any]]] = Field(None) # type: ignore[assignment] + data: list[EntryResource] = Field(...) + included: Optional[list[dict[str, Any]]] = Field(None) # type: ignore[assignment] class ValidatorReferenceResponseOne(ValidatorEntryResponseOne): @@ -424,7 +423,7 @@ class ValidatorReferenceResponseOne(ValidatorEntryResponseOne): class ValidatorReferenceResponseMany(ValidatorEntryResponseMany): - data: List[ReferenceResource] = Field(...) + data: list[ReferenceResource] = Field(...) class ValidatorStructureResponseOne(ValidatorEntryResponseOne): @@ -432,4 +431,4 @@ class ValidatorStructureResponseOne(ValidatorEntryResponseOne): class ValidatorStructureResponseMany(ValidatorEntryResponseMany): - data: List[StructureResource] = Field(...) + data: list[StructureResource] = Field(...) diff --git a/optimade/validator/validator.py b/optimade/validator/validator.py index 82f4b583a..84ca8ae92 100644 --- a/optimade/validator/validator.py +++ b/optimade/validator/validator.py @@ -13,7 +13,7 @@ class that can be pointed at an OPTIMADE implementation and validated import re import sys import urllib.parse -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Optional, Union import requests @@ -72,7 +72,7 @@ def __init__( # pylint: disable=too-many-arguments as_type: Optional[str] = None, index: bool = False, minimal: bool = False, - http_headers: Optional[Dict[str, str]] = None, + http_headers: Optional[dict[str, str]] = None, timeout: float = DEFAULT_CONN_TIMEOUT, read_timeout: float = DEFAULT_READ_TIMEOUT, ): @@ -176,8 +176,8 @@ def __init__( # pylint: disable=too-many-arguments self.valid = None - self._test_id_by_type: Dict[str, Any] = {} - self._entry_info_by_type: Dict[str, Any] = {} + self._test_id_by_type: dict[str, Any] = {} + self._entry_info_by_type: dict[str, Any] = {} self.results = ValidatorResults(verbosity=self.verbosity) @@ -353,7 +353,7 @@ def validate_implementation(self): self.print_summary() @test_case - def _recurse_through_endpoint(self, endp: str) -> Tuple[Optional[bool], str]: + def _recurse_through_endpoint(self, endp: str) -> tuple[Optional[bool], str]: """For a given endpoint (`endp`), get the entry type and supported fields, testing that all mandatory fields are supported, then test queries on every property according @@ -450,8 +450,8 @@ def _test_unknown_provider_property(self, endp): ) def _check_entry_info( - self, entry_info: Dict[str, Any], endp: str - ) -> Dict[str, Dict[str, Any]]: + self, entry_info: dict[str, Any], endp: str + ) -> dict[str, dict[str, Any]]: """Checks that `entry_info` contains all the required properties, and returns the property list for the endpoint. @@ -473,8 +473,8 @@ def _check_entry_info( @test_case def _test_must_properties( - self, properties: List[str], endp: str - ) -> Tuple[bool, str]: + self, properties: list[str], endp: str + ) -> tuple[bool, str]: """Check that the entry info lists all properties with the "MUST" support level for this endpoint. @@ -486,13 +486,13 @@ def _test_must_properties( `True` if the properties were found, and a string summary. """ - must_props = set( + must_props = { prop for prop in CONF.entry_schemas.get(endp, {}) if CONF.entry_schemas[endp].get(prop, {}).get("support") == SupportLevel.MUST - ) - must_props_supported = set(prop for prop in properties if prop in must_props) + } + must_props_supported = {prop for prop in properties if prop in must_props} missing = must_props - must_props_supported if len(missing) != 0: raise ResponseError( @@ -503,8 +503,8 @@ def _test_must_properties( @test_case def _get_archetypal_entry( - self, endp: str, properties: List[str] - ) -> Tuple[Optional[Dict[str, Any]], str]: + self, endp: str, properties: list[str] + ) -> tuple[Optional[dict[str, Any]], str]: """Get a random entry from the first page of results for this endpoint. @@ -544,8 +544,8 @@ def _get_archetypal_entry( @test_case def _check_response_fields( - self, endp: str, fields: List[str] - ) -> Tuple[Optional[bool], str]: + self, endp: str, fields: list[str] + ) -> tuple[Optional[bool], str]: """Check that the response field query parameter is obeyed. Parameters: @@ -593,8 +593,8 @@ def _construct_queries_for_property( prop_type: DataType, sortable: bool, endp: str, - chosen_entry: Dict[str, Any], - ) -> Tuple[Optional[bool], str]: + chosen_entry: dict[str, Any], + ) -> tuple[Optional[bool], str]: """For the given property, property type and chose entry, this method runs a series of queries for each field in the entry, testing that the initial document is returned where expected. @@ -704,9 +704,9 @@ def _construct_single_property_filters( prop_type: DataType, sortable: bool, endp: str, - chosen_entry: Dict[str, Any], + chosen_entry: dict[str, Any], query_optional: bool, - ) -> Tuple[Optional[bool], str]: + ) -> tuple[Optional[bool], str]: """This method constructs appropriate queries using all operators for a certain field and applies some tests: @@ -847,7 +847,7 @@ def _construct_single_property_filters( # if we have all results on this page, check that the blessed ID is in the response if excluded and ( chosen_entry.get("id", "") - in set(entry.get("id") for entry in response["data"]) + in {entry.get("id") for entry in response["data"]} ): raise ResponseError( f"Entry {chosen_entry['id']} with value {prop!r}: {test_value} was not excluded by {query!r}" @@ -1060,7 +1060,7 @@ def _test_multi_entry_endpoint(self, endp: str) -> None: @test_case def _test_data_available_matches_data_returned( self, deserialized: Any - ) -> Tuple[Optional[bool], str]: + ) -> tuple[Optional[bool], str]: """In the case where no query is requested, `data_available` must equal `data_returned` in the meta response, which is tested here. @@ -1126,7 +1126,7 @@ def _test_versions_endpoint(self): @test_case def _test_versions_endpoint_content( self, response: requests.Response - ) -> Tuple[requests.Response, str]: + ) -> tuple[requests.Response, str]: """Checks that the response from the versions endpoint complies with the specification and that its 'Content-Type' header complies with [RFC 4180](https://tools.ietf.org/html/rfc4180.html). @@ -1186,9 +1186,9 @@ def _test_versions_endpoint_content( @test_case def _test_versions_headers( self, - content_type: Dict[str, Any], - expected_parameter: Union[str, List[str]], - ) -> Tuple[Dict[str, Any], str]: + content_type: dict[str, Any], + expected_parameter: Union[str, list[str]], + ) -> tuple[dict[str, Any], str]: """Tests that the `Content-Type` field of the `/versions` header contains the passed parameter. @@ -1270,8 +1270,8 @@ def _test_page_limit( self, response: requests.models.Response, check_next_link: int = 5, - previous_links: Optional[Set[str]] = None, - ) -> Tuple[Optional[bool], str]: + previous_links: Optional[set[str]] = None, + ) -> tuple[Optional[bool], str]: """Test that a multi-entry endpoint obeys the page limit by following pagination links up to a depth of `check_next_link`. @@ -1387,7 +1387,7 @@ def _deserialize_response( response: requests.models.Response, response_cls: Any, request: Optional[str] = None, - ) -> Tuple[Any, str]: + ) -> tuple[Any, str]: """Try to create the appropriate pydantic model from the response. Parameters: @@ -1416,13 +1416,13 @@ def _deserialize_response( return ( response_cls(**json_response), - "deserialized correctly as object of type {}".format(response_cls), + f"deserialized correctly as object of type {response_cls}", ) @test_case def _get_available_endpoints( - self, base_info: Union[Any, Dict[str, Any]] - ) -> Tuple[Optional[List[str]], str]: + self, base_info: Union[Any, dict[str, Any]] + ) -> tuple[Optional[list[str]], str]: """Tries to get `entry_types_by_format` from base info response even if it could not be deserialized. @@ -1478,8 +1478,8 @@ def _get_available_endpoints( @test_case def _get_endpoint( - self, request_str: str, expected_status_code: Union[List[int], int] = 200 - ) -> Tuple[Optional[requests.Response], str]: + self, request_str: str, expected_status_code: Union[list[int], int] = 200 + ) -> tuple[Optional[requests.Response], str]: """Gets the response from the endpoint specified by `request_str`. function is wrapped by the `test_case` decorator diff --git a/pyproject.toml b/pyproject.toml index 1cbfffc3a..536eca497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,8 @@ server = [ "fastapi>=0.103.1", "pyyaml~=6.0", "optimade[mongo]", + "numpy>=1.20", + "jsonlines>=3.1", ] # Client minded diff --git a/requirements-client.txt b/requirements-client.txt index 2c0b7f4f4..d25adb217 100644 --- a/requirements-client.txt +++ b/requirements-client.txt @@ -2,7 +2,6 @@ aiida-core==2.4.0 ase==3.22.1 emmet_core==0.68.0 jarvis-tools==2023.9.20 -jarvis-tools==2023.9.20 mp-api==0.36.1 numpy>=1.20 pymatgen==2023.9.10 diff --git a/requirements-dev.txt b/requirements-dev.txt index 33423d46c..4c0c0620c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,7 @@ flake8==6.1.0 invoke==2.2.0 isort==5.12.0 jsondiff==2.0.0 +jsonlines>=3.1 mypy==1.5.1 pre-commit==3.4.0 pylint==2.17.5 diff --git a/requirements-server.txt b/requirements-server.txt index 04f5dc957..f486c1d85 100644 --- a/requirements-server.txt +++ b/requirements-server.txt @@ -1,5 +1,7 @@ elasticsearch==7.17.7 elasticsearch-dsl==7.4.0 fastapi==0.103.1 +jsonlines>=3.1 mongomock==4.1.2 +numpy>=1.20 pymongo==4.5.0 diff --git a/tasks.py b/tasks.py index 216cdd753..1c78a2150 100644 --- a/tasks.py +++ b/tasks.py @@ -3,7 +3,7 @@ import re import sys from pathlib import Path -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Optional from invoke import task from jsondiff import diff @@ -15,9 +15,9 @@ TOP_DIR = Path(__file__).parent.resolve() -def update_file(filename: str, sub_line: Tuple[str, str], strip: Optional[str] = None): +def update_file(filename: str, sub_line: tuple[str, str], strip: Optional[str] = None): """Utility function for tasks to read, update, and write files""" - with open(filename, "r") as handle: + with open(filename) as handle: lines = [ re.sub(sub_line[0], sub_line[1], line.rstrip(strip)) for line in handle ] @@ -119,7 +119,7 @@ def setver(_, ver=""): (r'"version": ".*",', f'"version": "{ver}",'), ) - print("Bumped version to {}".format(ver)) + print(f"Bumped version to {ver}") @task(help={"ver": "OPTIMADE API version to set"}, post=[update_openapijson]) @@ -191,7 +191,7 @@ def create_api_reference_docs(context, pre_clean=False, pre_commit=False): def write_file(full_path: Path, content: str): """Write file with `content` to `full_path`""" if full_path.exists(): - with open(full_path, "r") as handle: + with open(full_path) as handle: cached_content = handle.read() if content == cached_content: del cached_content @@ -306,7 +306,7 @@ def print_error(string): print(f"\033[31m{line}\033[0m") swagger_url = "https://validator.swagger.io/validator/debug" - with open(fname, "r") as f: + with open(fname) as f: schema = json.load(f) response = requests.post(swagger_url, json=schema) diff --git a/tests/adapters/references/conftest.py b/tests/adapters/references/conftest.py index 52fbe3f36..2afe72cc7 100644 --- a/tests/adapters/references/conftest.py +++ b/tests/adapters/references/conftest.py @@ -10,9 +10,7 @@ @pytest.fixture def RAW_REFERENCES(): """Read and return raw_references.json""" - with open( - Path(__file__).parent.joinpath("raw_test_references.json"), "r" - ) as raw_data: + with open(Path(__file__).parent.joinpath("raw_test_references.json")) as raw_data: return json.load(raw_data) diff --git a/tests/adapters/structures/conftest.py b/tests/adapters/structures/conftest.py index 44afd7e47..c151f962e 100644 --- a/tests/adapters/structures/conftest.py +++ b/tests/adapters/structures/conftest.py @@ -1,7 +1,6 @@ import json from pathlib import Path from random import choice -from typing import List import pytest @@ -9,18 +8,16 @@ @pytest.fixture -def RAW_STRUCTURES() -> List[dict]: +def RAW_STRUCTURES() -> list[dict]: """Read and return raw_structures.json""" - with open( - Path(__file__).parent.joinpath("raw_test_structures.json"), "r" - ) as raw_data: + with open(Path(__file__).parent.joinpath("raw_test_structures.json")) as raw_data: return json.load(raw_data) @pytest.fixture -def SPECIAL_SPECIES_STRUCTURES() -> List[dict]: +def SPECIAL_SPECIES_STRUCTURES() -> list[dict]: """Read and return special_species.json""" - with open(Path(__file__).parent.joinpath("special_species.json"), "r") as raw_data: + with open(Path(__file__).parent.joinpath("special_species.json")) as raw_data: return json.load(raw_data) @@ -37,7 +34,7 @@ def structure(raw_structure) -> Structure: @pytest.fixture -def structures(RAW_STRUCTURES) -> List[Structure]: +def structures(RAW_STRUCTURES) -> list[Structure]: """Create and return list of adapters.Structure""" return [Structure(_) for _ in RAW_STRUCTURES] diff --git a/tests/adapters/structures/utils.py b/tests/adapters/structures/utils.py index b11df3182..ae7fc9ed3 100644 --- a/tests/adapters/structures/utils.py +++ b/tests/adapters/structures/utils.py @@ -5,7 +5,7 @@ def get_min_ver(dependency: str) -> str: """Retrieve version of `dependency` from setup.py, raise if not found.""" pyproject_toml = Path(__file__).parent.joinpath("../../../pyproject.toml") - with open(pyproject_toml, "r") as setup_file: + with open(pyproject_toml) as setup_file: for line in setup_file.readlines(): min_ver = re.findall(rf'"{dependency}((=|!|<|>|~)=|>|<)(.+)"', line) if min_ver: diff --git a/tests/adapters/test_jsonl.py b/tests/adapters/test_jsonl.py new file mode 100644 index 000000000..d5843fcc3 --- /dev/null +++ b/tests/adapters/test_jsonl.py @@ -0,0 +1,11 @@ +from pathlib import Path + +from optimade.adapters.jsonl import from_jsonl, to_jsonl + +test_object = from_jsonl(Path(__file__).parent.resolve() / "testdata.jsonl") + + +def test_to_and_from_jsonl(): + file_content = to_jsonl(test_object) + reprocessed_file = from_jsonl(file_content) + assert test_object == reprocessed_file diff --git a/tests/adapters/testdata.jsonl b/tests/adapters/testdata.jsonl new file mode 100644 index 000000000..47eb9346b --- /dev/null +++ b/tests/adapters/testdata.jsonl @@ -0,0 +1,5 @@ +{"optimade_partial_data": {"version": "1.2.0"}, "layout": "dense", "returned_ranges": [{"start": 10, "stop": 20, "step": 2}]} +1243 +345 +-12.6 +["PARTIAL-DATA-NEXT", ["https://example.db.org/value4"]] diff --git a/tests/filterparser/test_filterparser.py b/tests/filterparser/test_filterparser.py index 5296bf797..5a01130b9 100644 --- a/tests/filterparser/test_filterparser.py +++ b/tests/filterparser/test_filterparser.py @@ -1,5 +1,4 @@ import abc -from typing import Tuple import pytest from lark import Tree @@ -11,7 +10,7 @@ class BaseTestFilterParser(abc.ABC): """Base class for parsing different versions of the grammar using `LarkParser`.""" - version: Tuple[int, int, int] + version: tuple[int, int, int] variant: str = "default" @pytest.fixture(autouse=True) diff --git a/tests/models/conftest.py b/tests/models/conftest.py index a6cc48564..461c8d5a3 100644 --- a/tests/models/conftest.py +++ b/tests/models/conftest.py @@ -12,7 +12,7 @@ def load_test_data(filename: str) -> list: if not json_file_path.exists(): raise RuntimeError(f"Could not find {filename!r} in 'tests.models.test_data'") - with open(json_file_path, "r") as handle: + with open(json_file_path) as handle: data = json.load(handle) return data diff --git a/tests/models/test_data/test_good_structures.json b/tests/models/test_data/test_good_structures.json index b84605832..63c7c6044 100644 --- a/tests/models/test_data/test_good_structures.json +++ b/tests/models/test_data/test_good_structures.json @@ -165,6 +165,13 @@ "last_modified": { "$date": "2019-06-08T05:13:37.331Z" }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_originates_from_project":"piezoelectic_perovskites" + } + } + }, "band_gap": 1.23456, "chemsys": "C-H-Cl-N-Na-O-Os-P", "elements": ["C", "Cl", "H", "N", "Na", "O", "Os", "P"], diff --git a/tests/models/test_entries.py b/tests/models/test_entries.py index a3ab7e318..e1a8a492e 100644 --- a/tests/models/test_entries.py +++ b/tests/models/test_entries.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from optimade.models.entries import EntryRelationships +from optimade.models.entries import EntryRelationships, EntryResource def test_simple_relationships(): @@ -48,3 +48,53 @@ def test_advanced_relationships(): } with pytest.raises(ValidationError): EntryRelationships(**relationship) + + +def test_meta(): + import copy + + good_entry_resource = { + "id": "goodstruct123", + "type": "structure", + "attributes": { + "last_modified": "2023-07-21T05:13:37.331Z", + "elements": ["Ac"], + "_exmpl_database_specific_property": "value1", + "elements_ratios": [1.0], + }, + "meta": { + "property_metadata": { + "elements_ratios": { + "_exmpl_mearsurement_method": "ICP-OES", + }, + "_exmpl_database_specific_property": { + "_exmpl_metadata_property": "metadata_value" + }, + } + }, + } + + EntryResource(**good_entry_resource) + + bad_entry_resources = [ + good_entry_resource, + copy.deepcopy(good_entry_resource), + copy.deepcopy(good_entry_resource), + copy.deepcopy(good_entry_resource), + ] + bad_entry_resources[0]["meta"]["property_metadata"][ + "_exmpl_database_specific_property" + ] = {"metadata_property": "metadata_value"} + bad_entry_resources[1]["meta"]["property_metadata"][ + "database_specific_property" + ] = {"_exmpl_metadata_property": "metadata_value"} + bad_entry_resources[2]["meta"]["database_specific_property"] = { + "_exmpl_metadata_property": "metadata_value" + } + bad_entry_resources[3]["meta"]["_other_database_specific_property"] = { + "_exmpl_metadata_property": "metadata_value" + } + + for bad_entry in bad_entry_resources: + with pytest.raises(ValueError): + EntryResource(**bad_entry) diff --git a/tests/models/test_jsonapi.py b/tests/models/test_jsonapi.py index b2e09cb4a..4623e79e5 100644 --- a/tests/models/test_jsonapi.py +++ b/tests/models/test_jsonapi.py @@ -8,7 +8,7 @@ def test_hashability(): from optimade.models.jsonapi import Error error = Error(id="test") - assert set([error]) + assert {error} def test_toplevel_links(): diff --git a/tests/models/test_optimade_json.py b/tests/models/test_optimade_json.py index 49c91d985..f96b88aa1 100644 --- a/tests/models/test_optimade_json.py +++ b/tests/models/test_optimade_json.py @@ -31,7 +31,7 @@ def test_convert_python_types(): test_none = None python_types_as_objects = [ - str("Test"), + "Test", 42, 42.42, ["Test", 42], diff --git a/tests/models/test_partialdata.py b/tests/models/test_partialdata.py new file mode 100644 index 000000000..e6f1b3582 --- /dev/null +++ b/tests/models/test_partialdata.py @@ -0,0 +1,38 @@ +from optimade.models.partial_data import PartialDataFormat + + +def test_partial_data_object_generation(): + test_object = { + "header": { + "optimade_partial_data": {"version": "1.2.0"}, + "layout": "dense", + "returned_ranges": [{"start": 10, "stop": 20, "step": 2}], + }, + "data": [ + 123, + 345, + -12.6, + ["PARTIAL-DATA-NEXT", ["https://example.db.org/value4"]], + ], + } + + PartialDataFormat(**test_object) + + +# todo finish test below +# def test_json_object_generation(): +# test_object = { +# "header": { +# "optimade_partial_data": {"version": "1.2.0"}, +# "layout": "dense", +# "returned_ranges": [{"start": 10, "stop": 20, "step": 2}], +# }, +# "data": [ +# 123, +# 345, +# -12.6, +# ["PARTIAL-DATA-NEXT", ["https://example.db.org/value4"]], +# ], +# } +# +# PartialDataResource(**test_object) diff --git a/tests/models/test_structures.py b/tests/models/test_structures.py index 213dfd5eb..c36e0ca1c 100644 --- a/tests/models/test_structures.py +++ b/tests/models/test_structures.py @@ -194,7 +194,7 @@ def test_structure_fatal_deformities(good_structure, deformity): minor_deformities = ( - {f: None} for f in set(f for _ in CORRELATED_STRUCTURE_FIELDS for f in _) + {f: None} for f in {f for _ in CORRELATED_STRUCTURE_FIELDS for f in _} ) diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py index e5b3f7502..57de92030 100644 --- a/tests/models/test_utils.py +++ b/tests/models/test_utils.py @@ -1,4 +1,4 @@ -from typing import Callable, List +from typing import Callable import pytest from pydantic import BaseModel, Field, ValidationError @@ -46,7 +46,7 @@ def test_compatible_strict_optimade_field() -> None: class CorrectModelWithStrictField(BaseModel): # check that unit and uniqueItems are passed through - good_field: List[str] = StrictField( + good_field: list[str] = StrictField( ..., support=SupportLevel.MUST, queryable=SupportLevel.OPTIONAL, @@ -58,7 +58,7 @@ class CorrectModelWithStrictField(BaseModel): ) class CorrectModelWithOptimadeField(BaseModel): - good_field: List[str] = OptimadeField( + good_field: list[str] = OptimadeField( ..., # Only difference here is that OptimadeField allows case-insensitive # strings to be passed instead of support levels directly diff --git a/tests/server/conftest.py b/tests/server/conftest.py index a943b36ca..4fd7567c5 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional, Union +from typing import Optional, Union import pytest @@ -124,7 +124,6 @@ def check_response(get_good_response): server: The type of server to test, or the actual test client class. """ - from typing import List from optimade.server.config import CONFIG @@ -132,11 +131,11 @@ def check_response(get_good_response): def inner( request: str, - expected_ids: Union[str, List[str]], + expected_ids: Union[str, list[str]], page_limit: int = CONFIG.page_limit, expected_return: Optional[int] = None, expected_as_is: bool = False, - expected_warnings: Optional[List[Dict[str, str]]] = None, + expected_warnings: Optional[list[dict[str, str]]] = None, server: Union[str, OptimadeTestClient] = "regular", ): if expected_warnings: diff --git a/tests/server/query_params/conftest.py b/tests/server/query_params/conftest.py index ce11819be..e79e60754 100644 --- a/tests/server/query_params/conftest.py +++ b/tests/server/query_params/conftest.py @@ -12,13 +12,13 @@ def structures(): @pytest.fixture def check_include_response(get_good_response): """Fixture to check "good" `include` response""" - from typing import List, Optional, Set, Union + from typing import Optional, Union def inner( request: str, - expected_included_types: Union[List, Set], - expected_included_resources: Union[List, Set], - expected_relationship_types: Optional[Union[List, Set]] = None, + expected_included_types: Union[list, set], + expected_included_resources: Union[list, set], + expected_relationship_types: Optional[Union[list, set]] = None, server: str = "regular", ): response = get_good_response(request, server) @@ -78,11 +78,13 @@ def inner( response = get_good_response(request, server) expected_fields.add("attributes") - + expected_fields.discard("meta") response_fields = set() for entry in response["data"]: response_fields.update(set(entry.keys())) response_fields.update(set(entry["attributes"].keys())) + # As "meta" is an optional field the response may or may not have it, so we remove it here to prevent problems in the assert below. + response_fields.discard("meta") assert sorted(expected_fields) == sorted(response_fields) return inner diff --git a/tests/server/routers/test_partial_data.py b/tests/server/routers/test_partial_data.py new file mode 100644 index 000000000..01e44a477 --- /dev/null +++ b/tests/server/routers/test_partial_data.py @@ -0,0 +1,52 @@ +import pytest + +from optimade.models import PartialDataResponse +from optimade.server.config import CONFIG + +from ..utils import NoJsonEndpointTests + + +@pytest.mark.skipif( + CONFIG.database_backend.value not in ("mongomock", "mongodb"), + reason="At the moment partial data is only supported for the MongoDB backend", +) +class TestPartialDataEndpoint(NoJsonEndpointTests): + """Tests for /partial_data/""" + + test_id = "mpf_551" + params = "response_fields=cartesian_site_positions" + request_str = f"/partial_data/{test_id}?{params}" + response_cls = PartialDataResponse + + +@pytest.mark.skipif( + CONFIG.database_backend.value not in ("mongomock", "mongodb"), + reason="At the moment partial data is only supported for the MongoDB backend", +) +def test_property_ranges_link(get_good_response, client): + test_id = "mpf_551" + params = "response_fields=cartesian_site_positions&property_ranges=dim_sites:2:74:1,dim_cartesian_dimensions:1:3:1&response_format=json" + request = f"/partial_data/{test_id}?{params}" + get_good_response( + request, server=client + ) # todo expand test to check content better. + + +@pytest.mark.skipif( + CONFIG.database_backend.value not in ("mongomock", "mongodb"), + reason="At the moment partial data is only supported for the MongoDB backend", +) +def test_wrong_id_partial_data(check_error_response, client): + """ + A specific JSON response should also occur. + """ + test_id = "mpf_486" + params = "response_fields=cartesian_site_positions" + request = f"/partial_data/{test_id}?{params}" + check_error_response( + request, + expected_status=404, + expected_title="Not Found", + expected_detail="No data available for the combination of entry mpf_486 and property cartesian_site_positions", + server=client, + ) diff --git a/tests/server/routers/test_structures.py b/tests/server/routers/test_structures.py index a0e089f2d..7f8bfa726 100644 --- a/tests/server/routers/test_structures.py +++ b/tests/server/routers/test_structures.py @@ -69,6 +69,12 @@ def test_structures_endpoint_data(self): assert self.json_response["data"]["type"] == "structures" assert "attributes" in self.json_response["data"] assert "_exmpl_chemsys" in self.json_response["data"]["attributes"] + assert ( + self.json_response["data"]["meta"]["property_metadata"]["elements_ratios"][ + "_exmpl_originates_from_project" + ] + == "Pure Metals" + ) def test_check_response_single_structure(check_response): diff --git a/tests/server/routers/test_utils.py b/tests/server/routers/test_utils.py index 37b1f8ee5..7a94bfc2d 100644 --- a/tests/server/routers/test_utils.py +++ b/tests/server/routers/test_utils.py @@ -1,5 +1,6 @@ """Tests specifically for optimade.servers.routers.utils.""" -from typing import Mapping, Optional, Tuple, Union +from collections.abc import Mapping +from typing import Optional, Union from unittest import mock import pytest @@ -8,7 +9,7 @@ def mocked_providers_list_response( url: Union[str, bytes] = "", - param: Optional[Union[Mapping[str, str], Tuple[str, str]]] = None, + param: Optional[Union[Mapping[str, str], tuple[str, str]]] = None, **kwargs, ): """This function will be used to mock requests.get diff --git a/tests/server/test_client.py b/tests/server/test_client.py index 732385bcc..406366f84 100644 --- a/tests/server/test_client.py +++ b/tests/server/test_client.py @@ -5,7 +5,7 @@ import warnings from functools import partial from pathlib import Path -from typing import Dict, Optional +from typing import Optional import httpx import pytest @@ -337,7 +337,7 @@ def test_command_line_client_write_to_file( assert 'Performing query structures/?filter=elements HAS "Ag"' in captured.err assert not captured.out assert Path(test_filename).is_file() - with open(test_filename, "r") as f: + with open(test_filename) as f: results = json.load(f) for url in TEST_URLS: assert len(results["structures"]['elements HAS "Ag"'][url]["data"]) == 11 @@ -360,9 +360,9 @@ def test_strict_async(async_http_client, http_client, use_async): @pytest.mark.parametrize("use_async", [True, False]) def test_client_global_data_callback(async_http_client, http_client, use_async): - container: Dict[str, str] = {} + container: dict[str, str] = {} - def global_database_callback(_: str, results: Dict): + def global_database_callback(_: str, results: dict): """A test callback that creates a flat dictionary of results via global state""" for structure in results["data"]: @@ -386,7 +386,7 @@ def global_database_callback(_: str, results: Dict): @pytest.mark.parametrize("use_async", [True, False]) def test_client_page_skip_callback(async_http_client, http_client, use_async): - def page_skip_callback(_: str, results: Dict) -> Optional[Dict]: + def page_skip_callback(_: str, results: dict) -> Optional[dict]: """A test callback that skips to the final page of results.""" if len(results["data"]) > 16: return {"next": f"{TEST_URL}/structures?page_offset=16"} @@ -407,10 +407,10 @@ def page_skip_callback(_: str, results: Dict) -> Optional[Dict]: @pytest.mark.parametrize("use_async", [True, False]) def test_client_mutable_data_callback(async_http_client, http_client, use_async): - container: Dict[str, str] = {} + container: dict[str, str] = {} def mutable_database_callback( - _: str, results: Dict, db: Optional[Dict[str, str]] = None + _: str, results: dict, db: Optional[dict[str, str]] = None ) -> None: """A test callback that creates a flat dictionary of results via mutable args.""" @@ -436,7 +436,7 @@ def mutable_database_callback( def test_client_asynchronous_write_callback( async_http_client, http_client, use_async, tmp_path ): - def write_to_file(_: str, results: Dict): + def write_to_file(_: str, results: dict): """A test callback that creates a flat dictionary of results via global state""" with open(tmp_path / "formulae.csv", "a") as f: @@ -458,7 +458,7 @@ def write_to_file(_: str, results: Dict): cli.get(response_fields=["chemical_formula_reduced"]) - with open(tmp_path / "formulae.csv", "r") as f: + with open(tmp_path / "formulae.csv") as f: lines = f.readlines() assert len(lines) == 17 * len(TEST_URLS) + 1 diff --git a/tests/server/test_config.py b/tests/server/test_config.py index db0175b3f..6c8d70b28 100644 --- a/tests/server/test_config.py +++ b/tests/server/test_config.py @@ -33,7 +33,7 @@ def test_default_config_path(top_dir): org_env_var = os.getenv("OPTIMADE_CONFIG_FILE") - with open(top_dir.joinpath("tests/test_config.json"), "r") as config_file: + with open(top_dir.joinpath("tests/test_config.json")) as config_file: config = json.load(config_file) different_base_url = "http://something_you_will_never_think_of.com" diff --git a/tests/server/utils.py b/tests/server/utils.py index 6a046c36b..bbba9f615 100644 --- a/tests/server/utils.py +++ b/tests/server/utils.py @@ -1,7 +1,8 @@ import json import re import warnings -from typing import Iterable, Optional, Type, Union +from collections.abc import Iterable +from typing import Optional, Union from urllib.parse import urlparse import httpx @@ -31,7 +32,7 @@ def __init__( root_path: str = "", version: str = "", ) -> None: - super(OptimadeTestClient, self).__init__( + super().__init__( app=app, base_url=base_url, raise_server_exceptions=raise_server_exceptions, @@ -64,7 +65,7 @@ def request( # pylint: disable=too-many-locals while url.startswith("/"): url = url[1:] url = f"{self.version}/{url}" - return super(OptimadeTestClient, self).request( + return super().request( method=method, url=url, **kwargs, @@ -75,7 +76,7 @@ class BaseEndpointTests: """Base class for common tests of endpoints""" request_str: Optional[str] = None - response_cls: Optional[Type[jsonapi.Response]] = None + response_cls: Optional[type[jsonapi.Response]] = None response: Optional[httpx.Response] = None json_response: Optional[dict] = None @@ -223,14 +224,14 @@ class NoJsonEndpointTests: """A simplified mixin class for tests on non-JSON endpoints.""" request_str: Optional[str] = None - response_cls: Optional[Type] = None + response_cls: Optional[type] = None response: Optional[httpx.Response] = None @pytest.fixture(autouse=True) - def get_response(self, both_clients): + def get_response(self, client): """Get response from client""" - self.response = both_clients.get(self.request_str) + self.response = client.get(self.request_str) yield self.response = None