From ed69056450fcf0ee140ef0c395902ab496e29a9d Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 18 Jul 2024 10:04:43 +0200 Subject: [PATCH 1/3] feat: Allow arbitrary values as categoricals --- src/ConfigSpace/api/types/categorical.py | 9 +- src/ConfigSpace/conditions.py | 10 +- src/ConfigSpace/configuration.py | 3 +- .../hyperparameters/categorical.py | 168 +++++++++++---- .../hyperparameters/hp_components.py | 30 ++- .../hyperparameters/hyperparameter.py | 32 +-- src/ConfigSpace/hyperparameters/ordinal.py | 111 ++++++++-- src/ConfigSpace/util.py | 5 +- test/test_hyperparameters.py | 193 +++++++++++++++++- 9 files changed, 456 insertions(+), 105 deletions(-) diff --git a/src/ConfigSpace/api/types/categorical.py b/src/ConfigSpace/api/types/categorical.py index d2c183bb..1273f402 100644 --- a/src/ConfigSpace/api/types/categorical.py +++ b/src/ConfigSpace/api/types/categorical.py @@ -5,6 +5,7 @@ from typing_extensions import TypeAlias from ConfigSpace.hyperparameters import CategoricalHyperparameter, OrdinalHyperparameter +from ConfigSpace.types import NotSet, _NotSet # We only accept these types in `items` T: TypeAlias = Union[str, int, float] @@ -16,7 +17,7 @@ def Categorical( name: str, items: Sequence[T], *, - default: T | None = None, + default: T | _NotSet = NotSet, weights: Sequence[float] | None = None, ordered: Literal[False], meta: dict | None = None, @@ -29,7 +30,7 @@ def Categorical( name: str, items: Sequence[T], *, - default: T | None = None, + default: T | _NotSet = NotSet, weights: Sequence[float] | None = None, ordered: Literal[True], meta: dict | None = None, @@ -42,7 +43,7 @@ def Categorical( name: str, items: Sequence[T], *, - default: T | None = None, + default: T | _NotSet = NotSet, weights: Sequence[float] | None = None, ordered: bool = ..., meta: dict | None = None, @@ -53,7 +54,7 @@ def Categorical( name: str, items: Sequence[T], *, - default: T | None = None, + default: T | _NotSet = NotSet, weights: Sequence[float] | None = None, ordered: bool = False, meta: dict | None = None, diff --git a/src/ConfigSpace/conditions.py b/src/ConfigSpace/conditions.py index 1e4d72b1..f7bd20dc 100644 --- a/src/ConfigSpace/conditions.py +++ b/src/ConfigSpace/conditions.py @@ -37,21 +37,13 @@ import numpy as np from more_itertools import all_equal, unique_everseen -from ConfigSpace.types import f64 +from ConfigSpace.types import NotSet, f64 if TYPE_CHECKING: from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter from ConfigSpace.types import Array, Mask -class _NotSet: - def __repr__(self) -> str: - return "ValueNotSetObject" - - -NotSet = _NotSet() # Sentinal value for unset values - - class Condition(ABC): def __init__( self, diff --git a/src/ConfigSpace/configuration.py b/src/ConfigSpace/configuration.py index b3f339ac..9c22c367 100644 --- a/src/ConfigSpace/configuration.py +++ b/src/ConfigSpace/configuration.py @@ -6,10 +6,9 @@ import numpy as np -from ConfigSpace.conditions import NotSet from ConfigSpace.exceptions import IllegalValueError from ConfigSpace.hyperparameters import FloatHyperparameter -from ConfigSpace.types import f64 +from ConfigSpace.types import NotSet, f64 if TYPE_CHECKING: from ConfigSpace.configuration_space import ConfigurationSpace diff --git a/src/ConfigSpace/hyperparameters/categorical.py b/src/ConfigSpace/hyperparameters/categorical.py index a102ce93..df0a1fc2 100644 --- a/src/ConfigSpace/hyperparameters/categorical.py +++ b/src/ConfigSpace/hyperparameters/categorical.py @@ -3,6 +3,7 @@ from collections import Counter from collections.abc import Hashable, Mapping, Sequence from dataclasses import dataclass, field +from itertools import product from typing import TYPE_CHECKING, Any, ClassVar, Set from typing_extensions import deprecated, override @@ -15,7 +16,7 @@ ) from ConfigSpace.hyperparameters.hp_components import Neighborhood, TransformerSeq from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter -from ConfigSpace.types import Array, f64 +from ConfigSpace.types import Array, NotSet, _NotSet, f64 if TYPE_CHECKING: from ConfigSpace.types import Array @@ -127,12 +128,13 @@ class CategoricalHyperparameter(Hyperparameter[Any, Any]): """The number of possible values for the categorical hyperparameter.""" probabilities: Array[f64] = field(repr=False) + _contains_sequence_as_value: bool def __init__( self, name: str, choices: Sequence[Any], - default_value: Any | None = None, + default_value: Any | _NotSet = NotSet, meta: Mapping[Hashable, Any] | None = None, weights: Sequence[float] | Array[np.number] | None = None, ) -> None: @@ -155,28 +157,33 @@ def __init__( The length of the weights has to be the same as the length of the choices. """ - # TODO: We can allow for None but we need to be sure it doesn't break - # anything elsewhere. - if any(choice is None for choice in choices): - raise TypeError("Choice 'None' is not supported") - if isinstance(choices, Set): raise TypeError( "Using a set of choices is prohibited as it can result in " "non-deterministic behavior. Please use a list or a tuple.", ) - # TODO:For now we assume hashable for choices to make the below check with - # Counter work. We can probably relax this assumption choices = tuple(choices) - counter = Counter(choices) - for choice, count in counter.items(): - if count > 1: - raise ValueError( - f"Choices for categorical hyperparameters {name} contain" - f" choice `{choice}` {count} times, while only a single oocurence" - " is allowed.", - ) + + # We first try the fast route if it's Hashable, otherwise we resort to doing + # an N^2 check. + try: + counter = Counter(choices) + for choice, count in counter.items(): + if count > 1: + raise ValueError( + f"Choices for categorical hyperparameters {name} contain" + f" choice `{choice}` {count} times, while only a single" + " occurence is allowed.", + ) + except TypeError: + for a, b in product(choices, choices): + if a is not b and a == b: + raise ValueError( # noqa: B904 + f"Choices for categorical hyperparameters {name} contain" + f" choice `{a}` multiple times, while only a single occurence" + " is allowed.", + ) if isinstance(weights, set): raise TypeError( @@ -208,7 +215,7 @@ def __init__( else: tupled_weights = None - if default_value is not None and default_value not in choices: + if default_value is not NotSet and default_value not in choices: raise ValueError( "The default value has to be one of the choices. " f"Got {default_value!r} which is not in {choices}.", @@ -221,9 +228,9 @@ def __init__( _weights: Array[f64] = np.asarray(weights, dtype=f64) probabilities = _weights / np.sum(_weights) - if default_value is None and weights is None: + if default_value is NotSet and weights is None: default_value = choices[0] - elif default_value is None: + elif default_value is NotSet: highest_prob_index = np.argmax(probabilities) default_value = choices[highest_prob_index] elif default_value in choices: @@ -241,28 +248,29 @@ def __init__( else: vector_dist = UniformIntegerDistribution(size=size) - # NOTE: Unfortunatly, numpy will promote number types to str - # if there are string types in the array, where we'd rather - # stick to object type in that case. Hence the manual... - seq_choices = np.asarray(choices) - if seq_choices.dtype.kind in {"U", "S"} and not all( - isinstance(choice, str) for choice in choices - ): - seq_choices = np.asarray(choices, dtype=object) - - # We also want to see about value casting, i.e. if our numpy array - # does not contain objects, then we should include a `value_cast` - # to transformting `to_value` for a single object type - if seq_choices.dtype.kind == "b": - value_cast = bool - elif seq_choices.dtype.kind in {"i", "u"}: - value_cast = int - elif seq_choices.dtype.kind == "f": - value_cast = float - elif seq_choices.dtype.kind in {"U", "S"}: - value_cast = str - else: - value_cast = None + try: + # This can fail with a ValueError if the choices contain arbitrary objects + # that are list like. + seq_choices = np.asarray(choices) + + # NOTE: Unfortunatly, numpy will promote number types to str + # if there are string types in the array, where we'd rather + # stick to object type in that case. Hence the manual... + if seq_choices.dtype.kind in {"U", "S"} and not all( + isinstance(choice, str) for choice in choices + ): + seq_choices = np.array(choices, dtype=object) + + except ValueError: + seq_choices = list(choices) + + # If the Hyperparameter recieves as a Sequence during legality checks or + # conversions, we need to inform it that one of the values is a Sequence itself, + # i.e. we should treat it as a single value and not a list of multiple values + self._contains_sequence_as_value = any( + isinstance(choice, Sequence) and not isinstance(choice, str) + for choice in choices + ) self.probabilities = probabilities self.choices = choices @@ -277,7 +285,7 @@ def __init__( neighborhood=NeighborhoodCat(size=size), neighborhood_size=self._categorical_neighborhood_size, meta=meta, - value_cast=value_cast, + value_cast=None, ) def to_uniform(self) -> CategoricalHyperparameter: @@ -314,11 +322,81 @@ def __eq__(self, other: Any) -> bool: return True - def _categorical_neighborhood_size(self, value: Any | None) -> int: - if value is None or value not in self.choices: + def _categorical_neighborhood_size(self, value: Any | _NotSet) -> int: + if value is NotSet or value not in self.choices: return self.size return self.size - 1 + @override + def to_vector(self, value: Any | Sequence[Any] | Array[Any]) -> f64 | Array[f64]: + if isinstance(value, np.ndarray): + return self._transformer.to_vector(value) + + if isinstance(value, str): + return self._transformer.to_vector(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.to_vector(_v)[0] + + # A sequence of things containing different values + return self._transformer.to_vector(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.to_vector(np.array([value]))[0] + + @override + def legal_value(self, value: Any | Sequence[Any] | Array[Any]) -> bool | Mask: + if isinstance(value, np.ndarray): + return self._transformer.legal_value(value) + + if isinstance(value, str): + return self._transformer.legal_value(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.legal_value(_v)[0] + + # A sequence of things containing different values + return self._transformer.legal_value(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.legal_value(np.array([value]))[0] + + @override + def pdf_values(self, values: Sequence[Any] | Array[Any]) -> Array[f64]: + if isinstance(values, np.ndarray): + if values.ndim != 1: + raise ValueError("Method pdf expects a one-dimensional numpy array") + + vector = self.to_vector(values) # type: ignore + return self.pdf_vector(vector) + + if self._contains_sequence_as_value: + # We have to convert it into a numpy array of objects carefully + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(len(values), dtype=object) + _v[:] = values + _vector: Array[f64] = self.to_vector(_v) # type: ignore + return self.pdf_vector(_vector) + + vector: Array[f64] = self.to_vector(values) # type: ignore + return self.pdf_vector(vector) + @property @deprecated("Please use `len(hp.choices)` or 'hp.size' instead.") def num_choices(self) -> int: diff --git a/src/ConfigSpace/hyperparameters/hp_components.py b/src/ConfigSpace/hyperparameters/hp_components.py index 1e53101f..183416f5 100644 --- a/src/ConfigSpace/hyperparameters/hp_components.py +++ b/src/ConfigSpace/hyperparameters/hp_components.py @@ -126,7 +126,7 @@ class TransformerSeq(Transformer[Any]): seq: The sequence of values to transform. """ - seq: Array[Any] + seq: Array[Any] | list[Any] # If `list`, assumed to contain sequence objects """The original sequence of values.""" lower_vectorized: f64 = field(init=False) @@ -172,20 +172,40 @@ def to_value(self, vector: Array[f64]) -> Array[Any]: f" representation into a value in {self.seq}." f"Expected integers but got {vector} (dtype: {vector.dtype})", ) - indices: Array[np.intp] = np.rint(vector).astype(np.intp) - return self.seq[indices] # type: ignore + + if isinstance(self.seq, np.ndarray): + indices = np.rint(vector).astype(i64) + return self.seq[indices] + + items = [self.seq[int(np.rint(i))] for i in vector] + if isinstance(self.seq, list): + # We have to convert it into a numpy array of objects carefully + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(len(items), dtype=object) + _v[:] = items + return _v + + return np.array(items, dtype=object) @override def to_vector(self, value: Array[Any]) -> Array[f64]: if self._lookup is not None: return np.array([self._lookup[v] for v in value], dtype=f64) - return np.flatnonzero(np.isin(self.seq, value)).astype(f64) + + if isinstance(self.seq, np.ndarray): + return np.flatnonzero(np.isin(self.seq, value)).astype(f64) + + return np.array([self.seq.index(v) for v in value], dtype=f64) @override def legal_value(self, value: Array[Any]) -> Mask: if self._lookup is not None: return np.array([v in self._lookup for v in value], dtype=np.bool_) - return np.isin(value, self.seq) + + if isinstance(self.seq, np.ndarray): + return np.isin(value, self.seq) + + return np.array([v in self.seq for v in value], dtype=np.bool_) @override def legal_vector(self, vector: Array[f64]) -> Mask: diff --git a/src/ConfigSpace/hyperparameters/hyperparameter.py b/src/ConfigSpace/hyperparameters/hyperparameter.py index 43ea29e9..c7896917 100644 --- a/src/ConfigSpace/hyperparameters/hyperparameter.py +++ b/src/ConfigSpace/hyperparameters/hyperparameter.py @@ -17,7 +17,7 @@ import numpy as np -from ConfigSpace.types import DType, Number, ValueT, f64, i64 +from ConfigSpace.types import DType, NotSet, Number, ValueT, _NotSet, f64, i64 if TYPE_CHECKING: from ConfigSpace.hyperparameters.distributions import Distribution @@ -64,7 +64,9 @@ class Hyperparameter(ABC, Generic[ValueT, DType]): _transformer: Transformer[DType] = field(repr=False) _neighborhood: Neighborhood = field(repr=False, compare=False) _value_cast: Callable[[DType], ValueT] | None = field(repr=False, compare=False) - _neighborhood_size: float | Callable[[ValueT | DType | None], int | float] = field( + _neighborhood_size: ( + float | Callable[[ValueT | DType | _NotSet | None], int | float] + ) = field( repr=False, compare=False, ) @@ -367,7 +369,7 @@ def to_vector( # type: ignore ) -> Array[f64]: ... @overload - def to_vector(self, value: ValueT | DType) -> f64: ... + def to_vector(self, value: ValueT | DType | Sequence[ValueT | DType]) -> f64: ... def to_vector( self, @@ -516,15 +518,10 @@ def pdf_values( The probability density of the values. Where values are not legal, the probability density is zero. """ - # TODO(eddiebergman): Backwards compatible restriction, why this restriction? - _values = np.asarray(values) - if _values.ndim != 1: - raise ValueError( - "Method pdf expects a one-dimensional numpy array but got" - f" {_values.ndim} dimensions." - f"\n{_values}", - ) - vector = self.to_vector(_values) + if isinstance(values, np.ndarray) and values.ndim != 1: + raise ValueError("Method pdf expects a one-dimensional numpy array") + + vector = self.to_vector(values) return self.pdf_vector(vector) def copy(self, **kwargs: Any) -> Self: @@ -542,7 +539,10 @@ def copy(self, **kwargs: Any) -> Self: # overwrite this. return replace(self, **kwargs) # type: ignore - def get_num_neighbors(self, value: ValueT | DType | None = None) -> int | float: + def get_num_neighbors( + self, + value: ValueT | DType | _NotSet = NotSet, + ) -> int | float: """Get the number of neighbors to sample for a given value. Args: @@ -729,11 +729,11 @@ def to_uniform( class IntegerHyperparameter(NumericalHyperparameter[int, i64]): """Base class for integer hyperparameters in the configuration space.""" - def _integer_neighborhood_size(self, value: int | i64 | None) -> int: - if value is None: + def _integer_neighborhood_size(self, value: int | i64 | _NotSet) -> int: + if value is NotSet: return int(self.size) - if self.lower <= value <= self.upper: + if self.lower <= value <= self.upper: # type: ignore return int(self.size) - 1 return int(self.size) diff --git a/src/ConfigSpace/hyperparameters/ordinal.py b/src/ConfigSpace/hyperparameters/ordinal.py index 447b379a..1c9aad31 100644 --- a/src/ConfigSpace/hyperparameters/ordinal.py +++ b/src/ConfigSpace/hyperparameters/ordinal.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, field from functools import partial from typing import Any, ClassVar -from typing_extensions import deprecated +from typing_extensions import deprecated, override import numpy as np @@ -14,7 +14,7 @@ ordinal_neighborhood, ) from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter -from ConfigSpace.types import Array, NotSet, _NotSet, i64 +from ConfigSpace.types import Array, Mask, NotSet, _NotSet, f64, i64 @dataclass(init=False) @@ -51,11 +51,13 @@ class OrdinalHyperparameter(Hyperparameter[Any, Any]): """Size of the hyperparameter, which is the number of possible values the hyperparameter can take on within the specified sequence.""" + _contains_sequence_as_value: bool + def __init__( self, name: str, sequence: Sequence[Any], - default_value: Any | None = None, + default_value: Any | _NotSet = NotSet, meta: Mapping[Hashable, Any] | None = None, ) -> None: """Initialize an ordinal hyperparameter. @@ -80,7 +82,7 @@ def __init__( ) size = len(sequence) - if default_value is None: + if default_value is NotSet: default_value = sequence[0] elif default_value not in sequence: raise ValueError( @@ -88,17 +90,32 @@ def __init__( f"Got {default_value!r} which is not in {sequence}.", ) - seq_choices = np.asarray(sequence) - # NOTE: Unfortunatly, numpy will promote number types to str - # if there are string types in the array, where we'd rather - # stick to object type in that case. Hence the manual... - if seq_choices.dtype.kind in {"U", "S"} and not all( - isinstance(item, str) for item in sequence - ): - seq_choices = np.asarray(sequence, dtype=object) + try: + # This can fail with a ValueError if the choices contain arbitrary objects + # that are list like. + seq_choices = np.asarray(sequence) + + # NOTE: Unfortunatly, numpy will promote number types to str + # if there are string types in the array, where we'd rather + # stick to object type in that case. Hence the manual... + if seq_choices.dtype.kind in {"U", "S"} and not all( + isinstance(item, str) for item in sequence + ): + seq_choices = np.array(sequence, dtype=object) + + except ValueError: + seq_choices = list(sequence) self.sequence = tuple(sequence) + # If the Hyperparameter recieves as a Sequence during legality checks or + # conversions, we need to inform it that one of the values is a Sequence itself, + # i.e. we should treat it as a single value and not a list of multiple values + self._contains_sequence_as_value = any( + isinstance(item, Sequence) and not isinstance(item, str) + for item in self.sequence + ) + super().__init__( name=name, size=size, @@ -155,6 +172,76 @@ def __str__(self) -> str: ] return ", ".join(parts) + @override + def to_vector(self, value: Any | Sequence[Any] | Array[Any]) -> f64 | Array[f64]: + if isinstance(value, np.ndarray): + return self._transformer.to_vector(value) + + if isinstance(value, str): + return self._transformer.to_vector(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.to_vector(_v)[0] + + # A sequence of things containing different values + return self._transformer.to_vector(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.to_vector(np.array([value]))[0] + + @override + def legal_value(self, value: Any | Sequence[Any] | Array[Any]) -> bool | Mask: + if isinstance(value, np.ndarray): + return self._transformer.legal_value(value) + + if isinstance(value, str): + return self._transformer.legal_value(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.legal_value(_v)[0] + + # A sequence of things containing different values + return self._transformer.legal_value(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.legal_value(np.array([value]))[0] + + @override + def pdf_values(self, values: Sequence[Any] | Array[Any]) -> Array[f64]: + if isinstance(values, np.ndarray): + if values.ndim != 1: + raise ValueError("Method pdf expects a one-dimensional numpy array") + + vector = self.to_vector(values) + return self.pdf_vector(vector) + + if self._contains_sequence_as_value: + # We have to convert it into a numpy array of objects carefully + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(len(values), dtype=object) + _v[:] = values + _vector: Array[f64] = self.to_vector(_v) # type: ignore + return self.pdf_vector(_vector) + + vector: Array[f64] = self.to_vector(values) # type: ignore + return self.pdf_vector(vector) + @property @deprecated("Please use 'len(hp.sequence)' or `hp.size` instead.") def num_elements(self) -> int: diff --git a/src/ConfigSpace/util.py b/src/ConfigSpace/util.py index 63a85cb7..f93baab4 100644 --- a/src/ConfigSpace/util.py +++ b/src/ConfigSpace/util.py @@ -50,6 +50,7 @@ UniformFloatHyperparameter, UniformIntegerHyperparameter, ) +from ConfigSpace.types import NotSet if TYPE_CHECKING: from ConfigSpace.configuration_space import ConfigurationSpace @@ -82,8 +83,8 @@ def impute_inactive_values( """ values = {} for hp in configuration.config_space.values(): - value = configuration.get(hp.name) - if value is None: + value = configuration.get(hp.name, NotSet) + if value is NotSet: if strategy == "default": new_value = hp.default_value diff --git a/test/test_hyperparameters.py b/test/test_hyperparameters.py index 5be3d9cf..1ddbc87b 100644 --- a/test/test_hyperparameters.py +++ b/test/test_hyperparameters.py @@ -29,11 +29,17 @@ import copy from collections import defaultdict -from typing import Any +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Hashable import numpy as np import pytest +from ConfigSpace.conditions import AndConjunction, EqualsCondition, InCondition +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.forbidden import ForbiddenEqualsClause, ForbiddenInClause from ConfigSpace.hyperparameters import ( BetaFloatHyperparameter, BetaIntegerHyperparameter, @@ -45,8 +51,13 @@ UniformFloatHyperparameter, UniformIntegerHyperparameter, ) +from ConfigSpace.util import get_one_exchange_neighbourhood -META_DATA = {"additional": "meta-data", "useful": "for integrations", "input_id": 42} +META_DATA: Mapping[Hashable, Any] = { + "additional": "meta-data", + "useful": "for integrations", + "input_id": 42, +} def test_constant(): @@ -84,7 +95,7 @@ def test_constant(): Constant(name, "value") # test that meta-data is stored correctly - c1_meta = Constant("value", 1, dict(META_DATA)) + c1_meta = Constant("value", 1, meta=dict(META_DATA)) assert c1_meta.meta == META_DATA # Test getting the size @@ -241,7 +252,7 @@ def test_uniformfloat(): 10, log=True, default_value=1.0, - meta=dict(META_DATA), + meta=META_DATA, ) assert f_meta.meta == META_DATA @@ -2088,8 +2099,10 @@ def test_categorical(): f2 = CategoricalHyperparameter("param", list(range(1000))) f2_ = CategoricalHyperparameter("param", list(range(1000))) assert f2 == f2_ - assert "param, Type: Categorical, Choices: {%s}, Default: 0" % ", ".join( - [str(choice) for choice in range(1000)], + assert "param, Type: Categorical, Choices: {{{}}}, Default: 0".format( + ", ".join( + [str(choice) for choice in range(1000)], + ), ) == str(f2) f3 = CategoricalHyperparameter("param", list(range(999))) @@ -2230,13 +2243,10 @@ def test_categorical_choices(): with pytest.raises( ValueError, match="Choices for categorical hyperparameters param contain choice `a` 2 times, " - "while only a single oocurence is allowed.", + "while only a single occurence is allowed.", ): CategoricalHyperparameter("param", ["a", "a"]) - with pytest.raises(TypeError, match="Choice 'None' is not supported"): - CategoricalHyperparameter("param", ["a", None]) - def test_categorical_default(): # Test that the default value is the most probable choice when weights are given @@ -2882,3 +2892,166 @@ def test_hyperparam_representation(): ) c1 = CategoricalHyperparameter("param", [True, False]) assert str(c1) == "param, Type: Categorical, Choices: {True, False}, Default: True" + + +@pytest.mark.parametrize( + "hp, i", + [ + ( + CategoricalHyperparameter("param", [True, False, None], default_value=None), + 2, + ), + ( + CategoricalHyperparameter("param", ["a", "b", None], default_value=None), + 2, + ), + (CategoricalHyperparameter("param", [None]), 0), + (CategoricalHyperparameter("param", [None, 1, 2]), 0), + ( + OrdinalHyperparameter( + "param", + [1, None, 2], + default_value=None, + ), # Essential None is in the middle for tests + 1, + ), + (OrdinalHyperparameter("param", [None]), 0), + ], +) +def test_none_allowed_in_categorical_ordinal( + hp: CategoricalHyperparameter | OrdinalHyperparameter, + i: int, + tmp_path: Path, +) -> None: + assert hp.legal_value(None) + assert hp.to_value(np.float64(i)) is None + assert hp.to_vector(None) == i + + if hp.size != 1: + seq = hp.choices if isinstance(hp, CategoricalHyperparameter) else hp.sequence + first_non_none = next(x for x in seq if x is not None) + assert None in hp.neighbors_values(first_non_none, n=hp.size) + assert 0 < hp.get_num_neighbors(None) < hp.size + + assert hp.pdf_values([None])[0] > 0 + + space = ConfigurationSpace({"c": hp}) + + _path = tmp_path / "space.json" + with _path.open("w") as f: + space.to_json(f) + + with _path.open("r") as f: + loaded_space = ConfigurationSpace.from_json(f) + + assert space == loaded_space + + default_config = space.get_default_configuration() + assert dict(default_config) == {"param": None} + + assert default_config._vector[0] == i + + assert None in default_config.values() + default_config["param"] = None # no raise + assert default_config == default_config # noqa: PLR0124 + default_config.check_valid_configuration() # no raise + + _ = list(get_one_exchange_neighbourhood(default_config, seed=1)) # no raise + + +@dataclass +class _DummyClass: + x: int + + +@pytest.mark.parametrize( + "hp", + [ + ( + CategoricalHyperparameter( + "param", + [{"hello": "world"}, _DummyClass(4), (1, 2), None], + default_value=_DummyClass(4), + ) + ), + ( + OrdinalHyperparameter( + "param", + [(0, 0), (10, 20), _DummyClass(10), (30, 40), None], + ) + ), + ], +) +def test_arbitrary_object_allowed_in_categorical_ordinal( + hp: CategoricalHyperparameter | OrdinalHyperparameter, + tmp_path: Path, +) -> None: + assert hp == hp # noqa: PLR0124 + + _seq = hp.choices if isinstance(hp, CategoricalHyperparameter) else hp.sequence + for s in _seq: + assert hp.legal_value(s) + + vector_value = hp.to_vector(s) + assert hp.lower_vectorized <= vector_value <= hp.upper_vectorized + + value_value = hp.to_value(vector_value) + assert value_value == s + + if isinstance(hp, CategoricalHyperparameter): + neighbors = hp.neighbors_values(s, n=hp.size) + for other in _seq: + if other is s: + continue + assert other in neighbors.tolist() + + assert 0 < hp.get_num_neighbors(s) < hp.size + + assert hp.pdf_values([s])[0] > 0 + + space = ConfigurationSpace({"c": hp}) + assert space == space # noqa: PLR0124 + + with pytest.raises(TypeError): + _path = tmp_path / "space.json" + with _path.open("w") as f: + space.to_json(f) + + default_config = space.get_default_configuration() + default_config.check_valid_configuration() # no raise + + for s in _seq: + u = UniformFloatHyperparameter("u", 1, 10) + space_with_cond = ConfigurationSpace() + space_with_cond.add( + hp, + u, + AndConjunction( + EqualsCondition(child=u, parent=hp, value=s), + InCondition(child=u, parent=hp, values=[s]), + ), + ) + samples = space_with_cond.sample_configuration(10) + for sample in samples: + ns = list(get_one_exchange_neighbourhood(sample, seed=1)) # no raise + for n in ns: + n.check_valid_configuration() # no raise + + for s in _seq: + # We can't put a forbidden on the default value unfortunatly... + if s == hp.default_value: + continue + + u = UniformFloatHyperparameter("u", 1, 10) + space_with_forb = ConfigurationSpace() + space_with_forb.add( + hp, + u, + ForbiddenEqualsClause(hp, s), + ForbiddenInClause(hp, [s]), + ) + samples = space_with_cond.sample_configuration(10) + for sample in samples: + list(get_one_exchange_neighbourhood(sample, seed=1)) # no raise + for n in ns: + n.check_valid_configuration() # no raise From da491a7c2821306d07d1bfba758865d8fd96fa5b Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 18 Jul 2024 10:10:36 +0200 Subject: [PATCH 2/3] fix: Allow constants to have arbitrary values --- src/ConfigSpace/configuration_space.py | 8 ++------ src/ConfigSpace/hyperparameters/constant.py | 7 ------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/src/ConfigSpace/configuration_space.py b/src/ConfigSpace/configuration_space.py index 2216bb2a..0821c1f1 100644 --- a/src/ConfigSpace/configuration_space.py +++ b/src/ConfigSpace/configuration_space.py @@ -111,12 +111,8 @@ def _parse_hyperparameters_from_dict( yield CategoricalHyperparameter(name, hp) - # If it's an allowed type, it's a constant - elif isinstance(hp, (int, str, float)): - yield Constant(name, hp) - - else: - raise ValueError(f"Unknown value '{hp}' for '{name}'") + # It's a constant + yield Constant(name, hp) class ConfigurationSpace(Mapping[str, Hyperparameter]): diff --git a/src/ConfigSpace/hyperparameters/constant.py b/src/ConfigSpace/hyperparameters/constant.py index 8f82ed03..aea4b0c3 100644 --- a/src/ConfigSpace/hyperparameters/constant.py +++ b/src/ConfigSpace/hyperparameters/constant.py @@ -64,13 +64,6 @@ def __init__( Field for holding meta data provided by the user. Not used by the configuration space. """ - # TODO: This should be changed and allowed... - if not isinstance(value, (int, float, str)) or isinstance(value, bool): - raise TypeError( - f"Constant hyperparameter '{name}' must be of type int, float or str, " - f"but got {type(value).__name__}.", - ) - self.value = value super().__init__( From 7f1508d1347947829d39008b4133d7a80471dd6c Mon Sep 17 00:00:00 2001 From: eddiebergman Date: Thu, 18 Jul 2024 10:59:13 +0200 Subject: [PATCH 3/3] feat(Constant): Fix transformation issues with `to_vector` --- changelog.md | 34 +++++--- docs/index.md | 22 +++++ src/ConfigSpace/configuration_space.py | 6 +- .../hyperparameters/categorical.py | 2 +- src/ConfigSpace/hyperparameters/constant.py | 85 ++++++++++++++++++- .../hyperparameters/hp_components.py | 11 ++- test/test_hyperparameters.py | 35 +++++--- 7 files changed, 156 insertions(+), 39 deletions(-) diff --git a/changelog.md b/changelog.md index efd58abb..d65a7988 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +# Version 1.1.0 + +* FEAT #376: Allow arbitrary values for `Categorical`, `Ordinal`, and `Constant` hyperparameters. +* FIX #375: Use `object` dtype for `Constant` np.array of values to prevent numpy type conversions of values. + + # Version 1.0.1 * FIX #373: Fix `ForbiddenEqualsRelation` when evaluating on vectors of values. @@ -92,7 +98,7 @@ # Version 0.4.15 -* Add `pyproject.toml` to support wheel installation as required in +* Add `pyproject.toml` to support wheel installation as required in [PEP518](https://medium.com/@grassfedcode/pep-517-and-518-in-plain-english-47208ca8b7a6) # Version 0.4.14 @@ -120,7 +126,7 @@ * ADD #135: Add weights to the sampling of categorical hyperparameters. * MAINT #129: Performance improvements for the generation of neighbor configurations. * MAINT #130: Test the installability of a distribution on travis-ci. -* FIX #140: Fixes a bug which led to samples lower than the lower bound of +* FIX #140: Fixes a bug which led to samples lower than the lower bound of `UniformFloatHyperparemeter` if the lower bound was larger than zero and quantization was used. * FIX # 138: Fixes a bug in which the readme wasn't read correctly on systems not using UTF8 as their default encoding. @@ -145,7 +151,7 @@ # Version 0.4.9 -* Fixes an issue where adding a new forbidden for an unknown hyperparameter +* Fixes an issue where adding a new forbidden for an unknown hyperparameter did not result in an immediate exception. * Add a new argument `vector` to `util.deactivate_inactive_hyperparameters` * Make the number of categories a public variable for categorical and @@ -153,21 +159,21 @@ # Version 0.4.8 -* Fixes an issue which made serialization of `ForbiddenInCondition` to json +* Fixes an issue which made serialization of `ForbiddenInCondition` to json fail. -* MAINT #101: Improved error message on setting illegal value in a +* MAINT #101: Improved error message on setting illegal value in a configuration. * DOC #91: Added a documentation to automl.github.io/ConfigSpace # Version 0.4.7 * Tests Python3.7. -* Fixes #87: better handling of Conjunctions when adding them to the +* Fixes #87: better handling of Conjunctions when adding them to the configuration space. * MAINT: Improved type annotation in `util.py` which results in improved performance (due to better Cython optimization). -* MAINT: `util.get_one_exchange_neighborhood` now accepts two arguments - `num_neighbors` and `stdev` which govern the neighborhood creation behaviour +* MAINT: `util.get_one_exchange_neighborhood` now accepts two arguments + `num_neighbors` and `stdev` which govern the neighborhood creation behaviour of several continuous hyperparameters. * NEW #85: Add function to obtain active hyperparameters * NEW #84: Add field for meta-data to the configuration space object. @@ -291,21 +297,21 @@ # Version 0.2.1 -* FIX: bug which changed order of hyperparameters when adding new - hyperparameter. This was non-deterministic due to the use of dict instead +* FIX: bug which changed order of hyperparameters when adding new + hyperparameter. This was non-deterministic due to the use of dict instead of OrderedDict. * FIX: compare configurations with == instead of numpy.allclose. * FIX: issue 2, syntax error no longer present during installation * FIX: json serialization of configurations and their hyperparameters can now - be deserialized by json and still compare equal + be deserialized by json and still compare equal # Version 0.2 -* FIX: bug which made integer values have different float values in the +* FIX: bug which made integer values have different float values in the underlying vector representation. -* FIX: bug which could make two configuration spaces compare unequal due to +* FIX: bug which could make two configuration spaces compare unequal due to the use of defaultdict -* FEATURE: new feature add_configuration_space, which allows to add a whole +* FEATURE: new feature add_configuration_space, which allows to add a whole configuration space into an existing configuration space * FEATURE: python3.5 support * FIX: add function get_parent() to Conjunctions (issue #1) diff --git a/docs/index.md b/docs/index.md index 6417354b..0d2e8e47 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,6 +26,28 @@ Those are introduced in the [user guide](./guide.md) * You can now use your editor to jump to definition and see the source code. * Contribute more easily! + There is no also better support in Categorical, Ordinal and Constant hyperparameters, + for arbitrary values, for example: + + ```python + from dataclasses import dataclass + from ConfigSpace import ConfigurationSpace, Constant + + @dataclass + class A: + a: int + + def f() -> None: + return None + + cs = ConfigurationSpace({ + "cat": [True, False, None], + "othercat": [A(1), f], + "constant": Constant("constant": (24, 25)), + }) + ``` + + With this, we have also deprecated many of the previous functions, simplifying the API where possible or improving it's clarity. We have tried hard to keep everything backwards compatible, and also recommend the new functionality to use! diff --git a/src/ConfigSpace/configuration_space.py b/src/ConfigSpace/configuration_space.py index 0821c1f1..6615916d 100644 --- a/src/ConfigSpace/configuration_space.py +++ b/src/ConfigSpace/configuration_space.py @@ -110,9 +110,9 @@ def _parse_hyperparameters_from_dict( raise ValueError(f"Can't have empty list for categorical {name}") yield CategoricalHyperparameter(name, hp) - - # It's a constant - yield Constant(name, hp) + else: + # It's a constant + yield Constant(name, hp) class ConfigurationSpace(Mapping[str, Hyperparameter]): diff --git a/src/ConfigSpace/hyperparameters/categorical.py b/src/ConfigSpace/hyperparameters/categorical.py index df0a1fc2..d697f137 100644 --- a/src/ConfigSpace/hyperparameters/categorical.py +++ b/src/ConfigSpace/hyperparameters/categorical.py @@ -16,7 +16,7 @@ ) from ConfigSpace.hyperparameters.hp_components import Neighborhood, TransformerSeq from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter -from ConfigSpace.types import Array, NotSet, _NotSet, f64 +from ConfigSpace.types import Array, Mask, NotSet, _NotSet, f64 if TYPE_CHECKING: from ConfigSpace.types import Array diff --git a/src/ConfigSpace/hyperparameters/constant.py b/src/ConfigSpace/hyperparameters/constant.py index aea4b0c3..0fe8a584 100644 --- a/src/ConfigSpace/hyperparameters/constant.py +++ b/src/ConfigSpace/hyperparameters/constant.py @@ -1,15 +1,16 @@ from __future__ import annotations -from collections.abc import Hashable, Mapping +from collections.abc import Hashable, Mapping, Sequence from dataclasses import dataclass from typing import Any, ClassVar +from typing_extensions import override import numpy as np from ConfigSpace.hyperparameters.distributions import ConstantVectorDistribution from ConfigSpace.hyperparameters.hp_components import TransformerConstant from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter -from ConfigSpace.types import Array, f64 +from ConfigSpace.types import Array, Mask, f64 CONSTANT_VECTOR_VALUE_YES = f64(1) """Vectorized value for constant when set.""" @@ -44,6 +45,8 @@ class Constant(Hyperparameter[Any, Any]): size: int """Size of the hyperparameter, which is always 1 for a constant hyperparameter.""" + _contains_sequence_as_value: bool = False + def __init__( self, name: str, @@ -64,7 +67,15 @@ def __init__( Field for holding meta data provided by the user. Not used by the configuration space. """ + if isinstance(value, np.ndarray): + raise ValueError( + "Constant hyperparameter does not support numpy arrays as values", + ) self.value = value + self._contains_sequence_as_value = isinstance( + value, + Sequence, + ) and not isinstance(value, str) super().__init__( name=name, @@ -93,5 +104,75 @@ def __str__(self) -> str: return ", ".join(parts) + @override + def legal_value(self, value: Any | Sequence[Any] | Array[Any]) -> bool | Mask: + if isinstance(value, np.ndarray): + return self._transformer.legal_value(value) + + if isinstance(value, str): + return self._transformer.legal_value(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.legal_value(_v)[0] + + # A sequence of things containing different values + return self._transformer.legal_value(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.legal_value(np.array([value]))[0] + + @override + def pdf_values(self, values: Sequence[Any] | Array[Any]) -> Array[f64]: + if isinstance(values, np.ndarray): + if values.ndim != 1: + raise ValueError("Method pdf expects a one-dimensional numpy array") + + vector = self.to_vector(values) # type: ignore + return self.pdf_vector(vector) + + if self._contains_sequence_as_value: + # We have to convert it into a numpy array of objects carefully + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(len(values), dtype=object) + _v[:] = values + _vector: Array[f64] = self.to_vector(_v) # type: ignore + return self.pdf_vector(_vector) + + vector: Array[f64] = self.to_vector(values) # type: ignore + return self.pdf_vector(vector) + + @override + def to_vector(self, value: Any | Sequence[Any] | Array[Any]) -> f64 | Array[f64]: + if isinstance(value, np.ndarray): + return self._transformer.to_vector(value) + + if isinstance(value, str): + return self._transformer.to_vector(np.array([value]))[0] + + # Got a sequence of things, could be a list of stuff or a single value which is + # itself a list, e.g. a tuple (1, 2) indicating a single value + # If we could have single values which are sequences, we need to do some + # magic to get it into an array without numpy flattening it down + if isinstance(value, Sequence): + if self._contains_sequence_as_value: + # https://stackoverflow.com/a/47389566/5332072 + _v = np.empty(1, dtype=object) + _v[0] = value + return self._transformer.to_vector(_v)[0] + + # A sequence of things containing different values + return self._transformer.to_vector(np.asarray(value)) + + # Single value that is not a sequence + return self._transformer.to_vector(np.array([value]))[0] + UnParametrizedHyperparameter = Constant # Legacy diff --git a/src/ConfigSpace/hyperparameters/hp_components.py b/src/ConfigSpace/hyperparameters/hp_components.py index 183416f5..c4e11ac8 100644 --- a/src/ConfigSpace/hyperparameters/hp_components.py +++ b/src/ConfigSpace/hyperparameters/hp_components.py @@ -503,11 +503,10 @@ def __post_init__(self) -> None: @override def to_vector(self, value: ObjectArray) -> Array[f64]: - return np.where( - value == self.value, - self.vector_value_yes, - self.vector_value_no, - ) + if isinstance(self.value, np.ndarray): + return np.flatnonzero(np.equal(self.value, value)).astype(f64) + + return np.array([v == self.value for v in value], dtype=f64) @override def to_value(self, vector: Array[f64]) -> ObjectArray: @@ -515,7 +514,7 @@ def to_value(self, vector: Array[f64]) -> ObjectArray: @override def legal_value(self, value: ObjectArray) -> Mask: - return value == self.value # type: ignore + return np.array([v == self.value for v in value], dtype=np.bool_) @override def legal_vector(self, vector: Array[f64]) -> Mask: diff --git a/test/test_hyperparameters.py b/test/test_hyperparameters.py index 1ddbc87b..3236dcd9 100644 --- a/test/test_hyperparameters.py +++ b/test/test_hyperparameters.py @@ -60,6 +60,10 @@ } +def f() -> None: + pass + + def test_constant(): # Test construction c1 = Constant("value", 1) @@ -67,10 +71,15 @@ def test_constant(): c3 = Constant("value", 2) c4 = Constant("valuee", 1) c5 = Constant("valueee", 2) + c6 = Constant("valueee", [1, 2]) + _c6 = Constant("valueee", [1, 2]) + c7 = Constant("valueee", f) + c8 = Constant("valueee", Path("path")) # Test attributes are accessible assert c5.name == "valueee" assert c5.value == 2 + assert c7.value == f # Test the string representation assert str(c1) == "value, Type: Constant, Value: 1" @@ -81,27 +90,19 @@ def test_constant(): assert c1 != c3 assert c1 != c4 assert c1 != c5 - - # Test that only string, integers and floats are allowed - # TODO: This should be changed and allowed... - v: Any - for v in [{}, None, True]: - with pytest.raises(TypeError): - Constant("value", v) - - # Test that only string names are allowed - for name in [1, {}, None, True]: - with pytest.raises(TypeError): - Constant(name, "value") + assert c6 == _c6 # test that meta-data is stored correctly c1_meta = Constant("value", 1, meta=dict(META_DATA)) assert c1_meta.meta == META_DATA # Test getting the size - for constant in (c1, c2, c3, c4, c5, c1_meta): + for constant in (c1, c2, c3, c4, c5, c1_meta, c6, c7, c8): assert constant.size == 1 + with pytest.raises(ValueError): + _ = Constant("value", np.array([1, 2])) + def test_constant_pdf(): c1 = Constant("valuee", 1) @@ -146,6 +147,14 @@ def test_constant_pdf(): ): c1.pdf_values(wrong_shape_3) + c3 = Constant("valueee", [1, 2]) + c4 = Constant("valueee", f) + c5 = Constant("valueee", Path("path")) + + assert c3.pdf_values([[1, 2]]) == np.array(1.0) + assert c4.pdf_values([f]) == np.array(1.0) + assert c5.pdf_values([Path("path")]) == np.array(1.0) + def test_constant__pdf(): c1 = Constant("valuee", 1)