Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Limits(BaseModel):
class Activity(BaseModel):
stats: Stats
limits: Limits
queued: bool | None = None # TODO: review since it in NOT filled
queued: bool | None = None


ActivityStatusDict: TypeAlias = dict[NodeID, Activity]
71 changes: 36 additions & 35 deletions packages/models-library/src/models_library/basic_regex.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,57 @@
"""Regular expressions patterns to build pydantic contrained strings

- Variants of the patterns with 'Named Groups' captured are suffixed with NG_RE

SEE tests_basic_regex.py for examples
"""

# TODO: for every pattern we should have a formatter function
# NOTE: some sites to manualy check ideas
# https://regex101.com/
# https://pythex.org/
#

# Universally unique Identifier. Pattern taken from https://stackoverflow.com/questions/136505/searching-for-uuids-in-text-with-regex
import re
from typing import Final

UUID_RE_BASE = (
# Universally unique Identifier. Pattern taken from https://stackoverflow.com/questions/136505/searching-for-uuids-in-text-with-regex
r"[0-9a-fA-F]{8}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{4}-?[0-9a-fA-F]{12}"
)
UUID_RE = rf"^{UUID_RE_BASE}$"

# Formatted timestamps with date and time
DATE_RE = r"\d{4}-(12|11|10|0?[1-9])-(31|30|[0-2]?\d)T(2[0-3]|1\d|0?[0-9])(:(\d|[0-5]\d)){2}(\.\d{3})?Z"
DATE_RE = (
# Formatted timestamps with date and time
r"\d{4}-(12|11|10|0?[1-9])-(31|30|[0-2]?\d)T(2[0-3]|1\d|0?[0-9])(:(\d|[0-5]\d)){2}(\.\d{3})?Z"
)


# python-like version
SIMPLE_VERSION_RE = r"^(0|[1-9]\d*)(\.(0|[1-9]\d*)){2}(-(0|[1-9]\d*|\d*[-a-zA-Z][-\da-zA-Z]*)(\.(0|[1-9]\d*|\d*[-a-zA-Z][-\da-zA-Z]*))*)?(\+[-\da-zA-Z]+(\.[-\da-zA-Z-]+)*)?$"
SIMPLE_VERSION_RE = (
# python-like version
r"^(0|[1-9]\d*)(\.(0|[1-9]\d*)){2}(-(0|[1-9]\d*|\d*[-a-zA-Z][-\da-zA-Z]*)(\.(0|[1-9]\d*|\d*[-a-zA-Z][-\da-zA-Z]*))*)?(\+[-\da-zA-Z]+(\.[-\da-zA-Z-]+)*)?$"
)

# Semantic version
# SEE https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
#
# with capture groups: cg1 = major, cg2 = minor, cg3 = patch, cg4 = prerelease and cg5 = buildmetadata
SEMANTIC_VERSION_RE_W_CAPTURE_GROUPS = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
# with named groups: major, minor, patch, prerelease and buildmetadata
SEMANTIC_VERSION_RE_W_NAMED_GROUPS = r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"

SEMANTIC_VERSION_RE_W_CAPTURE_GROUPS = (
# with capture groups: cg1 = major, cg2 = minor, cg3 = patch, cg4 = prerelease and cg5 = buildmetadata
r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
)
SEMANTIC_VERSION_RE_W_NAMED_GROUPS = (
# with named groups: major, minor, patch, prerelease and buildmetadata
r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
)


# Regex to detect whether a string can be used as a variable identifier (see tests)
# - cannot start with spaces, _ (we only want public) or numbers
# https://docs.python.org/3/reference/lexical_analysis.html#identifiers
PUBLIC_VARIABLE_NAME_RE = r"^[^_\W0-9]\w*$"
PUBLIC_VARIABLE_NAME_RE = (
# Regex to detect whether a string can be used as a variable identifier (see tests)
# - cannot start with spaces, _ (we only want public) or numbers
# https://docs.python.org/3/reference/lexical_analysis.html#identifiers
r"^[^_\W0-9]\w*$"
)

MIME_TYPE_RE = (
r"([\w\*]*)\/(([\w\-\*]+\.)+)?([\w\-\*]+)(\+([\w\-\.]+))?(; ([\w+-\.=]+))?"
)

# Storage basic file ID
SIMCORE_S3_FILE_ID_RE = rf"^(exports\/\d+\/{UUID_RE_BASE}\.zip)|((api|({UUID_RE_BASE}))\/({UUID_RE_BASE})\/(.+)$)"
SIMCORE_S3_FILE_ID_RE = (
# Storage basic file ID
rf"^(exports\/\d+\/{UUID_RE_BASE}\.zip)|((api|({UUID_RE_BASE}))\/({UUID_RE_BASE})\/(.+)$)"
)


SIMCORE_S3_DIRECTORY_ID_RE = rf"^({UUID_RE_BASE})\/({UUID_RE_BASE})\/(.+)\/$"

# S3 - AWS bucket names [https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html]
S3_BUCKET_NAME_RE = re.compile(
# S3 - AWS bucket names [https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html]
r"^(?!xn--)[a-z0-9][a-z0-9-]{1,61}[a-z0-9]$(?<!-s3alias)"
)

Expand All @@ -62,11 +61,13 @@
DATCORE_COLLECTION_NAME_RE = rf"^N:collection:{UUID_RE_BASE}$"


TWILIO_ALPHANUMERIC_SENDER_ID_RE = r"(?!^\d+$)^[a-zA-Z0-9\s]{2,11}$"
# Alphanumeric Sender IDs may be up to 11 characters long.
# Accepted characters include both upper- and lower-case Ascii letters,
# the digits 0 through 9, and the space character.
# They may not be only numerals.
TWILIO_ALPHANUMERIC_SENDER_ID_RE = (
# Alphanumeric Sender IDs may be up to 11 characters long.
# Accepted characters include both upper- and lower-case Ascii letters,
# the digits 0 through 9, and the space character.
# They may not be only numerals.
r"(?!^\d+$)^[a-zA-Z0-9\s]{2,11}$"
)


# Docker
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import inspect
import logging
from collections.abc import Callable, Iterator
from dataclasses import dataclass
from typing import Any, get_args, get_origin
from urllib.parse import quote

from ..services import Author, ServiceKey, ServiceMetaDataPublished, ServiceVersion
from ..services_io import ServiceInput, ServiceOutput
from ._settings import AUTHORS, FunctionServiceSettings

_logger = logging.getLogger(__name__)
Expand All @@ -24,7 +27,7 @@ def create_fake_thumbnail_url(label: str) -> str:
return f"https://fakeimg.pl/100x100/ff0000%2C128/000%2C255/?text={quote(label)}"


class ServiceNotFound(KeyError):
class ServiceNotFoundError(KeyError):
pass


Expand All @@ -35,6 +38,135 @@ class _Record:
is_under_development: bool = False


_TYPE_MAPPING = {
"number": float,
"integer": int,
"boolean": bool,
"string": str,
"data:*/*": str,
"ref_contentSchema": type[Any],
}


def _service_type_to_python_type(property_type: str) -> type[Any]:
"""Convert service property type to Python type"""
# Fast lookup for exact matches
if mapped_type := _TYPE_MAPPING.get(property_type):
return mapped_type

# Handle data: prefix patterns
if property_type.startswith("data:"):
return str

# Default to Any for unknown types
return type[Any]


def validate_callable_signature(
implementation: Callable | None,
service_inputs: dict[str, ServiceInput] | None,
service_outputs: dict[str, ServiceOutput] | None,
) -> None:
"""
Validates that the callable signature matches the service inputs and outputs.

Args:
implementation: The callable to validate
service_inputs: Dictionary of service input specifications
service_outputs: Dictionary of service output specifications

Raises:
ValueError: If signature doesn't match the expected inputs/outputs
TypeError: If types are incompatible
"""
if implementation is None:
return

sig = inspect.signature(implementation)
service_inputs = service_inputs or {}
service_outputs = service_outputs or {}

# Validate input parameters
sig_params = list(sig.parameters.values())
expected_input_count = len(service_inputs)
actual_input_count = len(
[
p
for p in sig_params
if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
]
)

if actual_input_count != expected_input_count:
msg = f"Function has {actual_input_count} parameters but service expects {expected_input_count} inputs"
raise ValueError(msg)

# Check parameter types if type hints are available
for i, (input_key, input_spec) in enumerate(service_inputs.items()):
assert input_key # nosec
if i < len(sig_params):
param = sig_params[i]
expected_type = _service_type_to_python_type(input_spec.property_type)

if param.annotation != inspect.Parameter.empty and expected_type != Any:
param_type = param.annotation
# Handle Union types and optional parameters
if get_origin(param_type) is not None:
param_types = get_args(param_type)
if expected_type not in param_types:
_logger.warning(
"Parameter '%s' type hint %s doesn't match expected service input type %s",
param.name,
param_type,
expected_type,
)
elif param_type != expected_type:
_logger.warning(
"Parameter '%s' type hint %s doesn't match expected service input type %s",
param.name,
param_type,
expected_type,
)

# Validate return type
if service_outputs:
return_annotation = sig.return_annotation
if return_annotation != inspect.Signature.empty:
output_count = len(service_outputs)

# If single output, return type should match directly
if output_count == 1:
output_spec = next(iter(service_outputs.values()))
expected_return_type = _service_type_to_python_type(
output_spec.property_type
)

if return_annotation not in {Any, expected_return_type}:
# Check if it's a Union type containing the expected type
if get_origin(return_annotation) is not None:
return_types = get_args(return_annotation)
if expected_return_type not in return_types:
_logger.warning(
"Return type %s doesn't match expected service output type %s",
return_annotation,
expected_return_type,
)
else:
_logger.warning(
"Return type %s doesn't match expected service output type %s",
return_annotation,
expected_return_type,
)

# If multiple outputs, expect tuple or dict return type
elif output_count > 1:
if get_origin(return_annotation) not in (tuple, dict):
_logger.warning(
"Multiple outputs expected but return type %s is not tuple or dict",
return_annotation,
)


class FunctionServices:
"""Used to register a collection of function services"""

Expand All @@ -46,21 +178,23 @@ def add(
self,
meta: ServiceMetaDataPublished,
implementation: Callable | None = None,
*,
is_under_development: bool = False,
):
"""
raises ValueError
"""
if not isinstance(meta, ServiceMetaDataPublished):
msg = f"Expected ServiceDockerData, got {type(meta)}"
raise ValueError(msg)
raise TypeError(msg)

# ensure unique
if (meta.key, meta.version) in self._functions:
msg = f"{meta.key, meta.version} is already registered"
raise ValueError(msg)

# TODO: ensure callable signature fits metadata
# Validate callable signature matches metadata
validate_callable_signature(implementation, meta.inputs, meta.outputs)

# register
self._functions[(meta.key, meta.version)] = _Record(
Expand All @@ -71,8 +205,10 @@ def add(

def extend(self, other: "FunctionServices"):
# pylint: disable=protected-access
for f in other._functions.values():
self.add(f.meta, f.implementation, f.is_under_development)
for f in other._functions.values(): # noqa: SLF001
self.add(
f.meta, f.implementation, is_under_development=f.is_under_development
)

def _skip_dev(self):
skip = True
Expand Down Expand Up @@ -110,7 +246,7 @@ def get_implementation(
func = self._functions[(service_key, service_version)]
except KeyError as err:
msg = f"{service_key}:{service_version} not found in registry"
raise ServiceNotFound(msg) from err
raise ServiceNotFoundError(msg) from err
return func.implementation

def get_metadata(
Expand All @@ -121,7 +257,7 @@ def get_metadata(
func = self._functions[(service_key, service_version)]
except KeyError as err:
msg = f"{service_key}:{service_version} not found in registry"
raise ServiceNotFound(msg) from err
raise ServiceNotFoundError(msg) from err
return func.meta

def __len__(self):
Expand Down
Loading
Loading