diff --git a/Makefile b/Makefile index 026a684..828475f 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ testcaches = .hypothesis .pytest_cache .pytype coverage.xml htmlcov .coverage all: version test build -develop: devversion package +develop: devversion package test python3 setup.py develop --uninstall python3 setup.py develop @@ -33,7 +33,9 @@ publish: distclean version package test @git push origin `cat VERSION` $(generatedcode): VERSION - python3 setup.py donothing + # this will generate the version subpackage inside clams package + python3 setup.py --help 2>/dev/null || echo "Ignore setuptools import error for now" + ls $(generatedcode)* # generating jsonschema depends on mmif-python and pydantic docs: mmif := $(shell grep mmif-python requirements.txt) diff --git a/clams/app/__init__.py b/clams/app/__init__.py index d873204..d6c6dcf 100644 --- a/clams/app/__init__.py +++ b/clams/app/__init__.py @@ -404,7 +404,13 @@ def cast(self, args: Dict[str, List[str]]) \ if valuetype == dict: casted.setdefault(k, {}).update(v) else: - casted.setdefault(k, []).append(v) + # pytype will complain about the next line, but it is actually correct + # casted.setdefault(k, []).append(v) + # so doing it in a more explicit way + if k in casted and isinstance(casted[k], list): + casted[k].append(v) + else: + casted[k] = [v] else: casted[k] = v # when an empty value is passed (usually as a default value) diff --git a/clams/appmetadata/__init__.py b/clams/appmetadata/__init__.py index 2265f2e..4a7bd3c 100644 --- a/clams/appmetadata/__init__.py +++ b/clams/appmetadata/__init__.py @@ -3,7 +3,7 @@ import subprocess import sys from pathlib import Path -from typing import Union, Dict, List, Optional, Literal +from typing import Union, Dict, List, Optional, Literal, Any import mmif import pydantic @@ -31,8 +31,8 @@ def get_clams_pyver(): import clams return clams.__version__ except ImportError: - version_fname = os.path.join(os.path.dirname(__file__), '..', '..', 'VERSION') - if os.path.exists(version_fname): + version_fname = Path(__file__).joinpath('../../VERSION') + if version_fname.exists(): with open(version_fname) as version_f: return version_f.read().strip() else: @@ -59,13 +59,21 @@ def get_mmif_specver(): return mmif.__specver__ +def pop_titles(js): + for prop in js.get('properties', {}).values(): + prop.pop('title', None) + + +def jsonschema_versioning(js): + js['$schema'] = pydantic.json_schema.GenerateJsonSchema.schema_dialect + js['$comment'] = f"clams-python SDK {get_clams_pyver()} was used to generate this schema" + + class _BaseModel(pydantic.BaseModel): - class Config: - @staticmethod - def json_schema_extra(schema, model) -> None: - for prop in schema.get('properties', {}).values(): - prop.pop('title', None) + model_config = { + "json_schema_extra": pop_titles + } class Output(_BaseModel): @@ -93,21 +101,53 @@ class Output(_BaseModel): "and also can be used as a expansion specification for the type definition beyond the base " "vocabulary." ) - properties: Dict[str, real_valued_primitives] = pydantic.Field( - {}, + # TODO (krim @ 5/12/21): currently there's no way to validate the property + # types based on vocabulary specification of an annotation type. As a result, + # we allow "any" type and do some basic validation below, but we need a + # better way for validation. + properties: Dict[str, Any] = pydantic.Field( + {}, description="(optional) Specification for type properties, if any. ``\"*\"`` indicates any value." ) + + @pydantic.field_validator('properties', mode='before') + @classmethod + def validate_properties(cls, value): + if not isinstance(value, dict): + raise ValueError("Properties must be a dictionary.") + for key, val in value.items(): + if not isinstance(key, str): + raise ValueError(f"Property key '{key}' must be a string.") + if isinstance(val, list): + if not all(isinstance(item, type(val[0])) for item in val): + raise ValueError(f"All elements in the list for key '{key}' must be of the same type.") + elif isinstance(val, dict): + if not all(isinstance(k, str) for k in val.keys()): + raise ValueError(f"All keys in the dictionary for key '{key}' must be strings.") + if not all(isinstance(v, type(next(iter(val.values())))) for v in val.values()): + raise ValueError(f"All values in the dictionary for key '{key}' must be of the same type.") + return value + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @pydantic.field_validator('at_type', mode='after') # because pydantic v2 doesn't auto-convert url to string + @classmethod + def stringify(cls, val): + return str(val) - @pydantic.validator('at_type', pre=True) + @pydantic.field_validator('at_type', mode='before') + @classmethod def at_type_must_be_str(cls, v): if not isinstance(v, str): return str(v) return v - class Config: - title = 'CLAMS Output Specification' - extra = 'forbid' - allow_population_by_field_name = True + model_config = { + 'title': 'CLAMS Output Specification', + 'extra': 'forbid', + 'validate_by_name': True, + } def add_description(self, description: str): """ @@ -127,20 +167,21 @@ class Input(Output): Developers should take diligent care to include all input types and their properties in the app metadata. """ - required: bool = pydantic.Field( + required: Optional[bool] = pydantic.Field( None, description="(optional, True by default) Indicating whether this input type is mandatory or optional." ) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, **kwargs): + super().__init__(**kwargs) if self.required is None: self.required = True - class Config: - title = 'CLAMS Input Specification' - extra = 'forbid' - allow_population_by_field_name = True + model_config = { + 'title': 'CLAMS Input Specification', + 'extra': 'forbid', + 'validate_by_name': True, + } class RuntimeParameter(_BaseModel): @@ -178,12 +219,13 @@ class RuntimeParameter(_BaseModel): "desired dictionary is ``{'key1': 'value1', 'key2': 'value2'}``, the default value (used when " "initializing a parameter) should be ``['key1:value1','key2:value2']``\n." ) - choices: List[real_valued_primitives] = pydantic.Field( + choices: Optional[List[real_valued_primitives]] = pydantic.Field( None, description="(optional) List of string values that can be accepted." ) - default: Union[real_valued_primitives, List[real_valued_primitives]] = pydantic.Field( + default: Optional[Union[real_valued_primitives, List[real_valued_primitives]]] = pydantic.Field( None, + union_mode='left_to_right', description="(optional) Default value for the parameter.\n\n" "Notes for developers: \n\n" "Setting a default value makes a parameter `optional`. \n\n" @@ -208,9 +250,10 @@ def __init__(self, **kwargs): if self.multivalued and self.default is not None and not isinstance(self.default, list): self.default = [self.default] - class Config: - title = 'CLAMS App Runtime Parameter' - extra = 'forbid' + model_config = { + 'title': 'CLAMS App Runtime Parameter', + 'extra': 'forbid', + } class AppMetadata(pydantic.BaseModel): @@ -236,18 +279,19 @@ class AppMetadata(pydantic.BaseModel): description="A longer description of the app (what it does, how to use, etc.)." ) app_version: str = pydantic.Field( - default_factory=generate_app_version, + '', # instead of using default_factory, I will use model_validator to set the default value + # this will work around the limitation of exclude_defaults=True condition when serializing description="(AUTO-GENERATED, DO NOT SET MANUALLY)\n\n" "Version of the app.\n\n" "When the metadata is generated using clams-python SDK, this field is automatically filled in" ) mmif_version: str = pydantic.Field( - default_factory=get_mmif_specver, + '', # same as above description="(AUTO-GENERATED, DO NOT SET MANUALLY)\n\n" "Version of MMIF specification the app.\n\n" "When the metadata is generated using clams-python SDK, this field is automatically filled in." ) - analyzer_version: str = pydantic.Field( + analyzer_version: Optional[str] = pydantic.Field( None, description="(optional) Version of an analyzer software, if the app is working as a wrapper for one. " ) @@ -255,7 +299,7 @@ class AppMetadata(pydantic.BaseModel): ..., description="License information of the app." ) - analyzer_license: str = pydantic.Field( + analyzer_license: Optional[str] = pydantic.Field( None, description="(optional) License information of an analyzer software, if the app works as a wrapper for one. " ) @@ -298,7 +342,7 @@ class AppMetadata(pydantic.BaseModel): [], description="List of runtime parameters. Can be empty." ) - dependencies: List[str] = pydantic.Field( + dependencies: Optional[List[str]] = pydantic.Field( None, description="(optional) List of software dependencies of the app. \n\n" "This list is completely optional, as in most cases such dependencies are specified in a separate " @@ -307,36 +351,38 @@ class AppMetadata(pydantic.BaseModel): "List items must be strings, not any kind of structured data. Thus, it is recommended to include " "a package name and its version in the string value at the minimum (e.g., ``clams-python==1.2.3``)." ) - more: Dict[str, str] = pydantic.Field( + more: Optional[Dict[str, str]] = pydantic.Field( None, description="(optional) A string-to-string map that can be used to store any additional metadata of the app." ) - class Config: - title = "CLAMS AppMetadata" - extra = 'forbid' - allow_population_by_field_name = True - - @staticmethod - def json_schema_extra(schema, model) -> None: - for prop in schema.get('properties', {}).values(): - prop.pop('title', None) - schema['$schema'] = "http://json-schema.org/draft-07/schema#" # currently pydantic doesn't natively support the $schema field. See https://github.com/samuelcolvin/pydantic/issues/1478 - schema['$comment'] = f"clams-python SDK {get_clams_pyver()} was used to generate this schema" # this is only to hold version information - - @pydantic.validator('identifier', pre=True) + model_config = { + 'title': 'CLAMS AppMetadata', + 'extra': 'forbid', + 'validate_by_name': True, + 'json_schema_extra': lambda schema, model: [adjust(schema) for adjust in [pop_titles, jsonschema_versioning]], + } + + @pydantic.model_validator(mode='after') + @classmethod + def assign_versions(cls, data): + if data.app_version == '': + data.app_version = generate_app_version() + if data.mmif_version == '': + data.mmif_version = get_mmif_specver() + return data + + @pydantic.field_validator('identifier', mode='before') + @classmethod def append_version(cls, val): prefix = f'{app_directory_baseurl if "/" not in val else""}' suffix = generate_app_version() return '/'.join(map(lambda x: x.strip('/'), filter(None, (prefix, val, suffix)))) - @pydantic.validator('mmif_version', pre=True) - def auto_mmif_version(cls, val): - return get_mmif_specver() - - @pydantic.validator('app_version', pre=True) - def auto_app_version(cls, val): - return generate_app_version() + @pydantic.field_validator('url', 'identifier', mode='after') # because pydantic v2 doesn't auto-convert url to string + @classmethod + def stringify(cls, val): + return str(val) def _check_input_duplicate(self, a_input): for elem in self.input: @@ -400,9 +446,7 @@ def add_output(self, at_type: Union[str, vocabulary.ThingTypesBase], **propertie :param properties: additional property specifications :return: the newly added Output object """ - new = Output(at_type=at_type) - if len(properties) > 0: - new.properties = properties + new = Output(at_type=at_type, properties=properties) if new not in self.output: self.output.append(new) else: @@ -412,7 +456,7 @@ def add_output(self, at_type: Union[str, vocabulary.ThingTypesBase], **propertie def add_parameter(self, name: str, description: str, type: param_value_types, choices: Optional[List[real_valued_primitives]] = None, multivalued: bool = False, - default: Union[real_valued_primitives, List[real_valued_primitives]] = None): + default: Union[None, real_valued_primitives, List[real_valued_primitives]] = None): """ Helper method to add an element to the ``parameters`` list. """ @@ -456,10 +500,7 @@ def add_more(self, key: str, value: str): raise ValueError("Key and value should not be empty!") def jsonify(self, pretty=False): - if pretty: - return self.json(exclude_defaults=True, by_alias=True, indent=2) - else: - return self.json(exclude_defaults=True, by_alias=True) + return self.model_dump_json(exclude_defaults=True, by_alias=True, indent=2 if pretty else None) if __name__ == '__main__': diff --git a/requirements.dev b/requirements.dev index 242b1ac..e5d9f9d 100644 --- a/requirements.dev +++ b/requirements.dev @@ -9,4 +9,4 @@ sphinx-autobuild autodoc m2r2 pillow -setuptools>=62 +setuptools diff --git a/requirements.txt b/requirements.txt index 9efc988..6e38737 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -mmif-python==1.0.20 +mmif-python==1.1.1 Flask>=2 Flask-RESTful>=0.3.9 gunicorn>=20 lapps>=0.0.2 -pydantic>=1.8,<2 +pydantic>=2 jsonschema>=3 diff --git a/setup.py b/setup.py index ba4758f..b96a0ea 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,8 @@ #! /usr/bin/env python3 -import distutils.cmd import os from os import path import shutil -import setuptools - name = "clams-python" cmdclass = {} @@ -25,22 +22,7 @@ init_mod.write(f'__version__ = "{version}"') init_mod.close() - -class DoNothing(distutils.cmd.Command): - description = "run base code until `setuptools.setup()` line and exits 0." - user_options = [] - - def initialize_options(self) -> None: - pass - - def finalize_options(self) -> None: - pass - - def run(self): - pass - - -cmdclass['donothing'] = DoNothing +import setuptools setuptools.setup( name=name, @@ -51,12 +33,12 @@ def run(self): long_description=long_desc, long_description_content_type="text/markdown", url="https://clams.ai", + license="Apache-2.0", classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 5 - Production/Stable', 'Framework :: Flask', 'Framework :: Pytest', - 'Intended Audience :: Developers ', - 'License :: OSI Approved :: Apache Software License', + 'Intended Audience :: Developers', 'Programming Language :: Python :: 3 :: Only', ], cmdclass=cmdclass, @@ -66,7 +48,7 @@ def run(self): 'clams': ['develop/templates/**/*', 'develop/templates/**/.*'] }, install_requires=requires, - python_requires='>=3.8', + python_requires='>=3.10', packages=setuptools.find_packages(), entry_points={ 'console_scripts': [ diff --git a/tests/metadata.py b/tests/metadata.py index 5cb7610..454e2fa 100644 --- a/tests/metadata.py +++ b/tests/metadata.py @@ -15,6 +15,5 @@ def appmetadata() -> AppMetadata: ) metadata.add_input(DocumentTypes.TextDocument) metadata.add_input_oneof(DocumentTypes.AudioDocument, str(DocumentTypes.VideoDocument)) - metadata.add_parameter(name='raise_error', description='force raise a ValueError', - type='boolean', default='false') + metadata.add_parameter(name='raise_error', description='force raise a ValueError', type='boolean', default='false') return metadata