From 8d5308db5cb8ee751e1ab65327e5e690b97dacc2 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Wed, 22 Nov 2023 18:25:36 +0100
Subject: [PATCH 1/4] feat(HEBO): Added initial support for single/mutli
 objective HEBO

---
 src/amltk/optimization/optimizers/hebo.py | 239 ++++++++++++++++++++++
 src/amltk/pipeline/parsers/hebo.py        | 142 +++++++++++++
 tests/optimizers/test_optimizers.py       |  28 +++
 3 files changed, 409 insertions(+)
 create mode 100644 src/amltk/optimization/optimizers/hebo.py
 create mode 100644 src/amltk/pipeline/parsers/hebo.py

diff --git a/src/amltk/optimization/optimizers/hebo.py b/src/amltk/optimization/optimizers/hebo.py
new file mode 100644
index 00000000..21c32f29
--- /dev/null
+++ b/src/amltk/optimization/optimizers/hebo.py
@@ -0,0 +1,239 @@
+"""An optimizer from HEBO to optimize a HEBO design space.
+
+### In progress
+"""
+# TODO
+# Constraints
+# Parallel ask/suggest
+# I imagine iterative tell is fine, we don't need concurrent.
+# Figure out what other feeatures there are
+# Doc of course
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from itertools import count
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, TypeAlias, overload
+from typing_extensions import override
+
+import numpy as np
+import pandas as pd
+from hebo.design_space.design_space import DesignSpace
+from hebo.optimizers.general import GeneralBO
+from hebo.optimizers.hebo import HEBO
+
+import amltk.randomness
+from amltk.optimization.metric import Metric
+from amltk.optimization.optimizer import Optimizer
+from amltk.optimization.trial import Trial
+from amltk.pipeline.parsers.hebo import parser
+from amltk.store import PathBucket
+
+if TYPE_CHECKING:
+    from typing import Protocol
+
+    from hebo.optimizers.abstract_optimizer import AbstractOptimizer
+
+    from amltk.pipeline import Node
+    from amltk.types import Seed
+
+    class HEBOParser(Protocol):
+        """A protocol for HEBO design space parser."""
+
+        def __call__(
+            self,
+            node: Node,
+            *,
+            flat: bool = False,
+            delim: str = ":",
+        ) -> DesignSpace:
+            """See [`hebo`][amltk.pipeline.parsers.hebo.parser]."""
+            ...
+
+
+HEBOTrial: TypeAlias = pd.DataFrame
+"""HEBO uses dataframes internally."""
+
+
+class HEBOOptimizer(Optimizer[HEBOTrial]):
+    """An optimizer that uses HEBO to optimize a HEBO design space."""
+
+    def __init__(
+        self,
+        optimizer: AbstractOptimizer,
+        metrics: Metric | Sequence[Metric],
+        bucket: PathBucket | None = None,
+        seed: Seed | None = None,
+    ) -> None:
+        """Initialize the optimizer.
+
+        Args:
+            optimizer: The HEBO optimizer.
+            metrics: The metrics to optimize.
+            bucket: The bucket to store results of individual trials from this
+                optimizer.
+            seed: The seed to use for trials generated from this optimizer.
+        """
+        metrics = metrics if isinstance(metrics, Sequence) else [metrics]
+        super().__init__(metrics=metrics, bucket=bucket)
+        self.optimizer = optimizer
+        self.seed = seed
+
+        # TODO: If HEBO does multi-fidelity or some other kinds of optimization,
+        # this may not be sufficient
+        self.name_generator = iter(f"trial_{i}" for i in count())
+
+    @override
+    def tell(self, report: Trial.Report[HEBOTrial]) -> None:
+        """Tell the optimizer the report for an asked trial.
+
+        Args:
+            report: The report for a trial
+        """
+        raw_x = report.trial.info
+        assert raw_x is not None
+
+        # NOTE: Given a trial fail/crashed, we will have inf/worst or None for each
+        # metric. As long as we fill in any missing metrics and maintain metric order,
+        # than HEBO should be fine with these reported.
+        # Either way, we don't actually have to look at the status of the trial to give
+        # the info to hebo.
+
+        # Make sure we have a value for each
+        _lookup: dict[str, Metric.Value] = {
+            v.metric.name: v for v in report.metric_values
+        }
+        metric_values = [
+            _lookup.get(metric.name, metric.worst) for metric in self.metrics
+        ]
+        raw_y = np.array([[v.value for v in metric_values]])
+        self.optimizer.observe(raw_x, raw_y)
+
+    @override
+    @overload
+    def ask(self, *, n_suggestions: int) -> list[Trial[HEBOTrial]]:
+        ...
+
+    @override
+    @overload
+    def ask(self, *, n_suggestions: None = None) -> Trial[HEBOTrial]:
+        ...
+
+    @override
+    def ask(
+        self,
+        *,
+        n_suggestions: int | None = None,
+        fix_input: dict[str, Any] | None = None,
+    ) -> Trial[HEBOTrial] | list[Trial[HEBOTrial]]:
+        """Ask the optimizer for a trial to evaluate.
+
+        Returns:
+            A Trial
+        """
+        if fix_input is not None:
+            # TODO: Probably fine to implement but not a priority
+            # right now.
+            raise NotImplementedError(
+                "fix_input not yet supported for HEBOOptimizer",
+            )
+
+        match n_suggestions:
+            # TODO: Allow multiple suggestions per iteration
+            case int():
+                raise NotImplementedError(
+                    "Multiple suggestions per iteration not yet supported",
+                )
+            case None:
+                # NOTE: Assuming for now that if I suggest without
+                # anything, i.e. `n_suggestions = 1`, then I get a
+                # single row dataframe.
+                df_config: pd.DataFrame = self.optimizer.suggest()  # type: ignore
+                assert isinstance(df_config, pd.DataFrame)
+                assert len(df_config) == 1
+
+                config: dict[str, Any] = df_config.iloc[0].to_dict()
+
+                return Trial(
+                    name=next(self.name_generator),
+                    config=config,
+                    bucket=self.bucket,
+                    metrics=self.metrics,
+                    info=df_config,
+                    seed=amltk.randomness.as_int(self.seed),
+                )
+            case _:  # type: ignore
+                raise ValueError(f"{n_suggestions=} must be `None` or `int > 0`")
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        space: Node | DesignSpace,
+        metrics: Metric | Sequence[Metric],
+        seed: Seed | None = None,
+        bucket: PathBucket | str | Path | None = None,
+        **optimizer_kwargs: Any,
+    ) -> HEBOOptimizer:
+        """Create an optimizer from HEBO.
+
+        Args:
+            space: The space to search over
+            metrics: The metrics to optimize.
+
+                * If `Metric`, then this is a single objective optimization with `HEBO`.
+                * If `Sequence[Metric]`, then this is a multi-objective optimization
+                    with `GeneralBO`.
+
+            seed: The seed to use for trials generated from this optimizer.
+            bucket: The bucket to store results of individual trials from this
+                optimizer.
+            **optimizer_kwargs: Keyword arguments to pass to the optimizer constructed.
+
+        Returns:
+            A HEBOOptimizer.
+        """
+        # TODO: Since hebo in it's observe will ignore anything that's inf, we can
+        # not have metrics that have an unbounded best, as these would get ignored...
+        # Probably need to raise an issue.
+        # Not really sure how to report or handle that case though as it's only a
+        # theoretical problem.
+        _check_metrics = [metrics] if isinstance(metrics, Metric) else metrics
+        if any(np.isinf(metric.optimal.value) for metric in _check_metrics):
+            raise ValueError(
+                "HEBO doesn't support metrics with an unbounded optimal value i.e. inf",
+            )
+
+        scramble_seed = amltk.randomness.as_int(seed)
+
+        if isinstance(bucket, str | Path):
+            bucket = PathBucket(bucket)
+
+        space = space if isinstance(space, DesignSpace) else parser(space)
+
+        match metrics:
+            case Metric() | [Metric()]:
+                optimizer = HEBO(
+                    space=space,
+                    scramble_seed=scramble_seed,
+                    **optimizer_kwargs,
+                )
+            case Sequence():
+                assert len(metrics) > 1
+                # TODO: Not really sure if I should give a ref point or not, especially
+                # if there are unbounded metrics.
+                ref_point = np.array([metric.worst.value for metric in metrics])
+                optimizer = GeneralBO(
+                    space=space,
+                    num_obj=len(metrics),
+                    ref_point=ref_point,
+                    **optimizer_kwargs,
+                )
+
+        return cls(optimizer=optimizer, metrics=metrics, bucket=bucket, seed=seed)
+
+    @override
+    @classmethod
+    def preferred_parser(cls) -> HEBOParser:
+        return parser
diff --git a/src/amltk/pipeline/parsers/hebo.py b/src/amltk/pipeline/parsers/hebo.py
new file mode 100644
index 00000000..d43dc3fc
--- /dev/null
+++ b/src/amltk/pipeline/parsers/hebo.py
@@ -0,0 +1,142 @@
+"""TODO."""
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, TypeAlias
+
+import numpy as np
+from hebo.design_space.design_space import DesignSpace
+
+if TYPE_CHECKING:
+    from amltk.pipeline import Node
+
+HP: TypeAlias = dict[str, Any]
+
+PAIR = 2
+
+
+def _parse_hp(
+    node_name: str,
+    *,
+    hp_name: str,
+    hp: tuple | list | Mapping,
+    delim: str = ":",
+) -> HP:
+    new_hp_name = f"{node_name}{delim}{hp_name}"
+    match hp:
+        # If the name in a dict does not match what we see in the `space` dict, raise
+        case {"name": _name_in_dict} if _name_in_dict != hp_name:
+            raise ValueError(
+                f'Can\'t have "name" in {hp=} as it is already given the {hp_name=}.',
+            )
+        # Otherwise it's a dictionary with either the same name or no name, either case
+        # we give it a new name prefixed by the nodes name
+        case Mapping():
+            return {**hp, "name": new_hp_name}
+        # Bounded int/float
+        case tuple() as tup if len(tup) == PAIR:
+            match tup:
+                case (int() | np.integer(), int() | np.integer()):
+                    x, y = tup
+                    return {
+                        "name": new_hp_name,
+                        "type": "int",
+                        "lb": int(x),
+                        "ub": int(y),
+                    }
+                case (float() | np.floating(), float() | np.floating()):
+                    x, y = tup
+                    return {
+                        "name": new_hp_name,
+                        "type": "num",
+                        "lb": float(x),
+                        "ub": float(y),
+                    }
+                case (x, y):
+                    raise ValueError(
+                        f"Expected {hp_name} to have same type for lower/upper bound,"
+                        f"got lower: {type(x)}, upper: {type(y)}.",
+                    )
+        # Bool param
+        case (one, two) if isinstance(one, bool) and isinstance(two, bool):
+            return {"name": hp_name, "type": "bool"}
+        # Categorical param
+        case list() if all(isinstance(item, str) for item in hp):
+            return {"name": hp_name, "type": "cat", "categories": hp}
+        # Constant value
+        case (one,) if isinstance(one, int | float | str | bool):
+            return {"name": hp_name, "type": "cat", "categories": [one]}
+        case _:
+            raise ValueError(
+                f"Could not parse {hp_name} as a valid HEBO distribution.\n{hp=}",
+            )
+
+    raise ValueError(f"Could not parse {hp_name} as a valid HEBO distribution.\n{hp=}")
+
+
+def _parse_space(node: Node, *, flat: bool = False, delim: str = ":") -> dict[str, HP]:
+    match node.space:
+        case None:
+            space = {}
+        case list():
+            space = {hp["name"]: hp for hp in node.space}
+        case Mapping():
+            space = {
+                name: _parse_hp(node_name=node.name, hp_name=name, hp=hp, delim=delim)
+                for name, hp in node.space.items()
+            }
+        case _:
+            raise ValueError(
+                f"Can't parse {node.space=} as a HEBO space for node {node.name=}.",
+            )
+
+    for child in node.nodes:
+        subspace: dict[str, HP] = _parse_space(child)
+        if not flat:
+            _prefix = lambda _hp_name: f"{node.name}{delim}{_hp_name}"
+            subspace = {
+                _prefix(hp_name): {**hp, "name": _prefix(hp_name)}
+                for hp_name, hp in subspace.items()
+            }
+
+        for hp_name, hp in subspace.items():
+            if hp_name in space:
+                raise ValueError(
+                    f"Duplicate name {hp_name} already in space from space of "
+                    f"{node.name}\nCurrently parsed space: {space}",
+                )
+            space[hp_name] = hp
+
+    return space
+
+
+def parser(
+    node: Node,
+    *,
+    flat: bool = False,
+    conditionals: bool = False,
+    delim: str = ":",
+) -> DesignSpace:
+    """Parse a Node and its children into a hebo DesignSpace.
+
+    Args:
+        node: The Node to parse
+        flat: Whether to have a heirarchical naming scheme for nodes and their children.
+        conditionals: Whether to include conditionals in the space from a
+            [`Choice`][amltk.pipeline.Choice]. If this is `False`, this will
+            also remove all forbidden clauses and other conditional clauses.
+            The primary use of this functionality is that some optimizers do not
+            support these features.
+
+            !!! warning "Not yet supported"
+
+                This functionality is not yet supported in HEBO
+
+        delim: The delimiter to use for the names of the hyperparameters
+    """
+    if conditionals:
+        raise NotImplementedError("Conditionals are not yet supported with HEBO.")
+
+    space = _parse_space(node=node, flat=flat, delim=delim)
+    hp_values = list(space.values())
+    return DesignSpace().parse(hp_values)
diff --git a/tests/optimizers/test_optimizers.py b/tests/optimizers/test_optimizers.py
index 478c7d16..005c66bc 100644
--- a/tests/optimizers/test_optimizers.py
+++ b/tests/optimizers/test_optimizers.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import numpy as np
 import pytest
 from pytest_cases import case, parametrize, parametrize_with_cases
 
@@ -12,6 +13,7 @@
 from amltk.profiling import Timer
 
 if TYPE_CHECKING:
+    from amltk.optimization.optimizers.hebo import HEBOOptimizer
     from amltk.optimization.optimizers.neps import NEPSOptimizer
     from amltk.optimization.optimizers.optuna import OptunaOptimizer
     from amltk.optimization.optimizers.smac import SMACOptimizer
@@ -86,6 +88,32 @@ def opt_optuna(metric: Metric, tmp_path: Path) -> OptunaOptimizer:
     )
 
 
+# NOTE: HEBO does not support unbounded optimals in metrics
+hebo_metrics = [
+    Metric("score_bounded", minimize=False, bounds=(0, 1)),
+    Metric("score_unbounded", minimize=False, bounds=(-np.inf, 10)),
+    Metric("loss_unbounded", minimize=True, bounds=(-10, np.inf)),
+    Metric("loss_bounded", minimize=True, bounds=(-1, 1)),
+]
+
+
+@case
+@parametrize("metric", [*hebo_metrics, hebo_metrics])  # Single obj and multi
+def opt_hebo(metric: Metric, tmp_path: Path) -> HEBOOptimizer:
+    try:
+        from amltk.optimization.optimizers.hebo import HEBOOptimizer
+    except ImportError:
+        pytest.skip("HEBO is not installed")
+
+    pipeline = Component(_A, name="hi", space={"a": (1, 10)})
+    return HEBOOptimizer.create(
+        space=pipeline,
+        metrics=metric,
+        seed=42,
+        bucket=tmp_path,
+    )
+
+
 @case
 @parametrize("metric", [*metrics])  # Single obj
 def opt_neps(metric: Metric, tmp_path: Path) -> NEPSOptimizer:

From 2fa5f79f900e5d9d2c2df4bd3d950f1c0acc4764 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Wed, 22 Nov 2023 18:35:22 +0100
Subject: [PATCH 2/4] fix(HEBO): Make sure to report costs always

---
 src/amltk/optimization/optimizers/hebo.py | 45 ++++++++++++++++++++---
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/amltk/optimization/optimizers/hebo.py b/src/amltk/optimization/optimizers/hebo.py
index 21c32f29..d7683120 100644
--- a/src/amltk/optimization/optimizers/hebo.py
+++ b/src/amltk/optimization/optimizers/hebo.py
@@ -99,7 +99,6 @@ def tell(self, report: Trial.Report[HEBOTrial]) -> None:
         # than HEBO should be fine with these reported.
         # Either way, we don't actually have to look at the status of the trial to give
         # the info to hebo.
-
         # Make sure we have a value for each
         _lookup: dict[str, Metric.Value] = {
             v.metric.name: v for v in report.metric_values
@@ -107,7 +106,9 @@ def tell(self, report: Trial.Report[HEBOTrial]) -> None:
         metric_values = [
             _lookup.get(metric.name, metric.worst) for metric in self.metrics
         ]
-        raw_y = np.array([[v.value for v in metric_values]])
+
+        costs = [self.cost(v) for v in metric_values]
+        raw_y = np.array([costs])  # Yep, it needs 2d, for single report tells
         self.optimizer.observe(raw_x, raw_y)
 
     @override
@@ -221,13 +222,12 @@ def create(
                 )
             case Sequence():
                 assert len(metrics) > 1
-                # TODO: Not really sure if I should give a ref point or not, especially
-                # if there are unbounded metrics.
-                ref_point = np.array([metric.worst.value for metric in metrics])
                 optimizer = GeneralBO(
                     space=space,
                     num_obj=len(metrics),
-                    ref_point=ref_point,
+                    # TODO: Not really sure if I should give a ref point or not,
+                    # especially if there are unbounded metrics.
+                    ref_point=np.array(cls.worst_possible_cost(metrics)),
                     **optimizer_kwargs,
                 )
 
@@ -237,3 +237,36 @@ def create(
     @classmethod
     def preferred_parser(cls) -> HEBOParser:
         return parser
+
+    @overload
+    @classmethod
+    def worst_possible_cost(cls, metric: Metric) -> float:
+        ...
+
+    @overload
+    @classmethod
+    def worst_possible_cost(cls, metric: Sequence[Metric]) -> list[float]:
+        ...
+
+    @classmethod
+    def worst_possible_cost(
+        cls,
+        metric: Metric | Sequence[Metric],
+    ) -> float | list[float]:
+        """Get the crash cost for a metric for SMAC."""
+        match metric:
+            case Metric(bounds=(lower, upper)):  # Bounded metrics
+                return abs(upper - lower)
+            case Metric():  # Unbounded metric
+                return np.inf
+            case metrics:
+                return [cls.worst_possible_cost(m) for m in metrics]
+
+    @classmethod
+    def cost(cls, value: Metric.Value) -> float:
+        """Get the cost for a metric value for HEBO."""
+        match value.distance_to_optimal:
+            case None:  # If we can't compute the distance, use the loss
+                return value.loss
+            case distance:  # If we can compute the distance, use that
+                return distance

From 74d9c7f591a211923f71063e3fecf9127c032358 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Wed, 22 Nov 2023 18:25:36 +0100
Subject: [PATCH 3/4] feat(HEBO): Added initial support for single/mutli
 objective HEBO

---
 src/amltk/optimization/optimizers/hebo.py | 239 ++++++++++++++++++++++
 src/amltk/pipeline/parsers/hebo.py        | 142 +++++++++++++
 tests/optimizers/test_optimizers.py       |  28 +++
 3 files changed, 409 insertions(+)
 create mode 100644 src/amltk/optimization/optimizers/hebo.py
 create mode 100644 src/amltk/pipeline/parsers/hebo.py

diff --git a/src/amltk/optimization/optimizers/hebo.py b/src/amltk/optimization/optimizers/hebo.py
new file mode 100644
index 00000000..21c32f29
--- /dev/null
+++ b/src/amltk/optimization/optimizers/hebo.py
@@ -0,0 +1,239 @@
+"""An optimizer from HEBO to optimize a HEBO design space.
+
+### In progress
+"""
+# TODO
+# Constraints
+# Parallel ask/suggest
+# I imagine iterative tell is fine, we don't need concurrent.
+# Figure out what other feeatures there are
+# Doc of course
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from itertools import count
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, TypeAlias, overload
+from typing_extensions import override
+
+import numpy as np
+import pandas as pd
+from hebo.design_space.design_space import DesignSpace
+from hebo.optimizers.general import GeneralBO
+from hebo.optimizers.hebo import HEBO
+
+import amltk.randomness
+from amltk.optimization.metric import Metric
+from amltk.optimization.optimizer import Optimizer
+from amltk.optimization.trial import Trial
+from amltk.pipeline.parsers.hebo import parser
+from amltk.store import PathBucket
+
+if TYPE_CHECKING:
+    from typing import Protocol
+
+    from hebo.optimizers.abstract_optimizer import AbstractOptimizer
+
+    from amltk.pipeline import Node
+    from amltk.types import Seed
+
+    class HEBOParser(Protocol):
+        """A protocol for HEBO design space parser."""
+
+        def __call__(
+            self,
+            node: Node,
+            *,
+            flat: bool = False,
+            delim: str = ":",
+        ) -> DesignSpace:
+            """See [`hebo`][amltk.pipeline.parsers.hebo.parser]."""
+            ...
+
+
+HEBOTrial: TypeAlias = pd.DataFrame
+"""HEBO uses dataframes internally."""
+
+
+class HEBOOptimizer(Optimizer[HEBOTrial]):
+    """An optimizer that uses HEBO to optimize a HEBO design space."""
+
+    def __init__(
+        self,
+        optimizer: AbstractOptimizer,
+        metrics: Metric | Sequence[Metric],
+        bucket: PathBucket | None = None,
+        seed: Seed | None = None,
+    ) -> None:
+        """Initialize the optimizer.
+
+        Args:
+            optimizer: The HEBO optimizer.
+            metrics: The metrics to optimize.
+            bucket: The bucket to store results of individual trials from this
+                optimizer.
+            seed: The seed to use for trials generated from this optimizer.
+        """
+        metrics = metrics if isinstance(metrics, Sequence) else [metrics]
+        super().__init__(metrics=metrics, bucket=bucket)
+        self.optimizer = optimizer
+        self.seed = seed
+
+        # TODO: If HEBO does multi-fidelity or some other kinds of optimization,
+        # this may not be sufficient
+        self.name_generator = iter(f"trial_{i}" for i in count())
+
+    @override
+    def tell(self, report: Trial.Report[HEBOTrial]) -> None:
+        """Tell the optimizer the report for an asked trial.
+
+        Args:
+            report: The report for a trial
+        """
+        raw_x = report.trial.info
+        assert raw_x is not None
+
+        # NOTE: Given a trial fail/crashed, we will have inf/worst or None for each
+        # metric. As long as we fill in any missing metrics and maintain metric order,
+        # than HEBO should be fine with these reported.
+        # Either way, we don't actually have to look at the status of the trial to give
+        # the info to hebo.
+
+        # Make sure we have a value for each
+        _lookup: dict[str, Metric.Value] = {
+            v.metric.name: v for v in report.metric_values
+        }
+        metric_values = [
+            _lookup.get(metric.name, metric.worst) for metric in self.metrics
+        ]
+        raw_y = np.array([[v.value for v in metric_values]])
+        self.optimizer.observe(raw_x, raw_y)
+
+    @override
+    @overload
+    def ask(self, *, n_suggestions: int) -> list[Trial[HEBOTrial]]:
+        ...
+
+    @override
+    @overload
+    def ask(self, *, n_suggestions: None = None) -> Trial[HEBOTrial]:
+        ...
+
+    @override
+    def ask(
+        self,
+        *,
+        n_suggestions: int | None = None,
+        fix_input: dict[str, Any] | None = None,
+    ) -> Trial[HEBOTrial] | list[Trial[HEBOTrial]]:
+        """Ask the optimizer for a trial to evaluate.
+
+        Returns:
+            A Trial
+        """
+        if fix_input is not None:
+            # TODO: Probably fine to implement but not a priority
+            # right now.
+            raise NotImplementedError(
+                "fix_input not yet supported for HEBOOptimizer",
+            )
+
+        match n_suggestions:
+            # TODO: Allow multiple suggestions per iteration
+            case int():
+                raise NotImplementedError(
+                    "Multiple suggestions per iteration not yet supported",
+                )
+            case None:
+                # NOTE: Assuming for now that if I suggest without
+                # anything, i.e. `n_suggestions = 1`, then I get a
+                # single row dataframe.
+                df_config: pd.DataFrame = self.optimizer.suggest()  # type: ignore
+                assert isinstance(df_config, pd.DataFrame)
+                assert len(df_config) == 1
+
+                config: dict[str, Any] = df_config.iloc[0].to_dict()
+
+                return Trial(
+                    name=next(self.name_generator),
+                    config=config,
+                    bucket=self.bucket,
+                    metrics=self.metrics,
+                    info=df_config,
+                    seed=amltk.randomness.as_int(self.seed),
+                )
+            case _:  # type: ignore
+                raise ValueError(f"{n_suggestions=} must be `None` or `int > 0`")
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        space: Node | DesignSpace,
+        metrics: Metric | Sequence[Metric],
+        seed: Seed | None = None,
+        bucket: PathBucket | str | Path | None = None,
+        **optimizer_kwargs: Any,
+    ) -> HEBOOptimizer:
+        """Create an optimizer from HEBO.
+
+        Args:
+            space: The space to search over
+            metrics: The metrics to optimize.
+
+                * If `Metric`, then this is a single objective optimization with `HEBO`.
+                * If `Sequence[Metric]`, then this is a multi-objective optimization
+                    with `GeneralBO`.
+
+            seed: The seed to use for trials generated from this optimizer.
+            bucket: The bucket to store results of individual trials from this
+                optimizer.
+            **optimizer_kwargs: Keyword arguments to pass to the optimizer constructed.
+
+        Returns:
+            A HEBOOptimizer.
+        """
+        # TODO: Since hebo in it's observe will ignore anything that's inf, we can
+        # not have metrics that have an unbounded best, as these would get ignored...
+        # Probably need to raise an issue.
+        # Not really sure how to report or handle that case though as it's only a
+        # theoretical problem.
+        _check_metrics = [metrics] if isinstance(metrics, Metric) else metrics
+        if any(np.isinf(metric.optimal.value) for metric in _check_metrics):
+            raise ValueError(
+                "HEBO doesn't support metrics with an unbounded optimal value i.e. inf",
+            )
+
+        scramble_seed = amltk.randomness.as_int(seed)
+
+        if isinstance(bucket, str | Path):
+            bucket = PathBucket(bucket)
+
+        space = space if isinstance(space, DesignSpace) else parser(space)
+
+        match metrics:
+            case Metric() | [Metric()]:
+                optimizer = HEBO(
+                    space=space,
+                    scramble_seed=scramble_seed,
+                    **optimizer_kwargs,
+                )
+            case Sequence():
+                assert len(metrics) > 1
+                # TODO: Not really sure if I should give a ref point or not, especially
+                # if there are unbounded metrics.
+                ref_point = np.array([metric.worst.value for metric in metrics])
+                optimizer = GeneralBO(
+                    space=space,
+                    num_obj=len(metrics),
+                    ref_point=ref_point,
+                    **optimizer_kwargs,
+                )
+
+        return cls(optimizer=optimizer, metrics=metrics, bucket=bucket, seed=seed)
+
+    @override
+    @classmethod
+    def preferred_parser(cls) -> HEBOParser:
+        return parser
diff --git a/src/amltk/pipeline/parsers/hebo.py b/src/amltk/pipeline/parsers/hebo.py
new file mode 100644
index 00000000..d43dc3fc
--- /dev/null
+++ b/src/amltk/pipeline/parsers/hebo.py
@@ -0,0 +1,142 @@
+"""TODO."""
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, TypeAlias
+
+import numpy as np
+from hebo.design_space.design_space import DesignSpace
+
+if TYPE_CHECKING:
+    from amltk.pipeline import Node
+
+HP: TypeAlias = dict[str, Any]
+
+PAIR = 2
+
+
+def _parse_hp(
+    node_name: str,
+    *,
+    hp_name: str,
+    hp: tuple | list | Mapping,
+    delim: str = ":",
+) -> HP:
+    new_hp_name = f"{node_name}{delim}{hp_name}"
+    match hp:
+        # If the name in a dict does not match what we see in the `space` dict, raise
+        case {"name": _name_in_dict} if _name_in_dict != hp_name:
+            raise ValueError(
+                f'Can\'t have "name" in {hp=} as it is already given the {hp_name=}.',
+            )
+        # Otherwise it's a dictionary with either the same name or no name, either case
+        # we give it a new name prefixed by the nodes name
+        case Mapping():
+            return {**hp, "name": new_hp_name}
+        # Bounded int/float
+        case tuple() as tup if len(tup) == PAIR:
+            match tup:
+                case (int() | np.integer(), int() | np.integer()):
+                    x, y = tup
+                    return {
+                        "name": new_hp_name,
+                        "type": "int",
+                        "lb": int(x),
+                        "ub": int(y),
+                    }
+                case (float() | np.floating(), float() | np.floating()):
+                    x, y = tup
+                    return {
+                        "name": new_hp_name,
+                        "type": "num",
+                        "lb": float(x),
+                        "ub": float(y),
+                    }
+                case (x, y):
+                    raise ValueError(
+                        f"Expected {hp_name} to have same type for lower/upper bound,"
+                        f"got lower: {type(x)}, upper: {type(y)}.",
+                    )
+        # Bool param
+        case (one, two) if isinstance(one, bool) and isinstance(two, bool):
+            return {"name": hp_name, "type": "bool"}
+        # Categorical param
+        case list() if all(isinstance(item, str) for item in hp):
+            return {"name": hp_name, "type": "cat", "categories": hp}
+        # Constant value
+        case (one,) if isinstance(one, int | float | str | bool):
+            return {"name": hp_name, "type": "cat", "categories": [one]}
+        case _:
+            raise ValueError(
+                f"Could not parse {hp_name} as a valid HEBO distribution.\n{hp=}",
+            )
+
+    raise ValueError(f"Could not parse {hp_name} as a valid HEBO distribution.\n{hp=}")
+
+
+def _parse_space(node: Node, *, flat: bool = False, delim: str = ":") -> dict[str, HP]:
+    match node.space:
+        case None:
+            space = {}
+        case list():
+            space = {hp["name"]: hp for hp in node.space}
+        case Mapping():
+            space = {
+                name: _parse_hp(node_name=node.name, hp_name=name, hp=hp, delim=delim)
+                for name, hp in node.space.items()
+            }
+        case _:
+            raise ValueError(
+                f"Can't parse {node.space=} as a HEBO space for node {node.name=}.",
+            )
+
+    for child in node.nodes:
+        subspace: dict[str, HP] = _parse_space(child)
+        if not flat:
+            _prefix = lambda _hp_name: f"{node.name}{delim}{_hp_name}"
+            subspace = {
+                _prefix(hp_name): {**hp, "name": _prefix(hp_name)}
+                for hp_name, hp in subspace.items()
+            }
+
+        for hp_name, hp in subspace.items():
+            if hp_name in space:
+                raise ValueError(
+                    f"Duplicate name {hp_name} already in space from space of "
+                    f"{node.name}\nCurrently parsed space: {space}",
+                )
+            space[hp_name] = hp
+
+    return space
+
+
+def parser(
+    node: Node,
+    *,
+    flat: bool = False,
+    conditionals: bool = False,
+    delim: str = ":",
+) -> DesignSpace:
+    """Parse a Node and its children into a hebo DesignSpace.
+
+    Args:
+        node: The Node to parse
+        flat: Whether to have a heirarchical naming scheme for nodes and their children.
+        conditionals: Whether to include conditionals in the space from a
+            [`Choice`][amltk.pipeline.Choice]. If this is `False`, this will
+            also remove all forbidden clauses and other conditional clauses.
+            The primary use of this functionality is that some optimizers do not
+            support these features.
+
+            !!! warning "Not yet supported"
+
+                This functionality is not yet supported in HEBO
+
+        delim: The delimiter to use for the names of the hyperparameters
+    """
+    if conditionals:
+        raise NotImplementedError("Conditionals are not yet supported with HEBO.")
+
+    space = _parse_space(node=node, flat=flat, delim=delim)
+    hp_values = list(space.values())
+    return DesignSpace().parse(hp_values)
diff --git a/tests/optimizers/test_optimizers.py b/tests/optimizers/test_optimizers.py
index 478c7d16..005c66bc 100644
--- a/tests/optimizers/test_optimizers.py
+++ b/tests/optimizers/test_optimizers.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import numpy as np
 import pytest
 from pytest_cases import case, parametrize, parametrize_with_cases
 
@@ -12,6 +13,7 @@
 from amltk.profiling import Timer
 
 if TYPE_CHECKING:
+    from amltk.optimization.optimizers.hebo import HEBOOptimizer
     from amltk.optimization.optimizers.neps import NEPSOptimizer
     from amltk.optimization.optimizers.optuna import OptunaOptimizer
     from amltk.optimization.optimizers.smac import SMACOptimizer
@@ -86,6 +88,32 @@ def opt_optuna(metric: Metric, tmp_path: Path) -> OptunaOptimizer:
     )
 
 
+# NOTE: HEBO does not support unbounded optimals in metrics
+hebo_metrics = [
+    Metric("score_bounded", minimize=False, bounds=(0, 1)),
+    Metric("score_unbounded", minimize=False, bounds=(-np.inf, 10)),
+    Metric("loss_unbounded", minimize=True, bounds=(-10, np.inf)),
+    Metric("loss_bounded", minimize=True, bounds=(-1, 1)),
+]
+
+
+@case
+@parametrize("metric", [*hebo_metrics, hebo_metrics])  # Single obj and multi
+def opt_hebo(metric: Metric, tmp_path: Path) -> HEBOOptimizer:
+    try:
+        from amltk.optimization.optimizers.hebo import HEBOOptimizer
+    except ImportError:
+        pytest.skip("HEBO is not installed")
+
+    pipeline = Component(_A, name="hi", space={"a": (1, 10)})
+    return HEBOOptimizer.create(
+        space=pipeline,
+        metrics=metric,
+        seed=42,
+        bucket=tmp_path,
+    )
+
+
 @case
 @parametrize("metric", [*metrics])  # Single obj
 def opt_neps(metric: Metric, tmp_path: Path) -> NEPSOptimizer:

From 5375b32c8e1a1e6fbb80bd51c2c02c48da82b5f3 Mon Sep 17 00:00:00 2001
From: eddiebergman <eddiebergmanhs@gmail.com>
Date: Wed, 22 Nov 2023 18:35:22 +0100
Subject: [PATCH 4/4] fix(HEBO): Make sure to report costs always

---
 src/amltk/optimization/optimizers/hebo.py | 45 ++++++++++++++++++++---
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/amltk/optimization/optimizers/hebo.py b/src/amltk/optimization/optimizers/hebo.py
index 21c32f29..d7683120 100644
--- a/src/amltk/optimization/optimizers/hebo.py
+++ b/src/amltk/optimization/optimizers/hebo.py
@@ -99,7 +99,6 @@ def tell(self, report: Trial.Report[HEBOTrial]) -> None:
         # than HEBO should be fine with these reported.
         # Either way, we don't actually have to look at the status of the trial to give
         # the info to hebo.
-
         # Make sure we have a value for each
         _lookup: dict[str, Metric.Value] = {
             v.metric.name: v for v in report.metric_values
@@ -107,7 +106,9 @@ def tell(self, report: Trial.Report[HEBOTrial]) -> None:
         metric_values = [
             _lookup.get(metric.name, metric.worst) for metric in self.metrics
         ]
-        raw_y = np.array([[v.value for v in metric_values]])
+
+        costs = [self.cost(v) for v in metric_values]
+        raw_y = np.array([costs])  # Yep, it needs 2d, for single report tells
         self.optimizer.observe(raw_x, raw_y)
 
     @override
@@ -221,13 +222,12 @@ def create(
                 )
             case Sequence():
                 assert len(metrics) > 1
-                # TODO: Not really sure if I should give a ref point or not, especially
-                # if there are unbounded metrics.
-                ref_point = np.array([metric.worst.value for metric in metrics])
                 optimizer = GeneralBO(
                     space=space,
                     num_obj=len(metrics),
-                    ref_point=ref_point,
+                    # TODO: Not really sure if I should give a ref point or not,
+                    # especially if there are unbounded metrics.
+                    ref_point=np.array(cls.worst_possible_cost(metrics)),
                     **optimizer_kwargs,
                 )
 
@@ -237,3 +237,36 @@ def create(
     @classmethod
     def preferred_parser(cls) -> HEBOParser:
         return parser
+
+    @overload
+    @classmethod
+    def worst_possible_cost(cls, metric: Metric) -> float:
+        ...
+
+    @overload
+    @classmethod
+    def worst_possible_cost(cls, metric: Sequence[Metric]) -> list[float]:
+        ...
+
+    @classmethod
+    def worst_possible_cost(
+        cls,
+        metric: Metric | Sequence[Metric],
+    ) -> float | list[float]:
+        """Get the crash cost for a metric for SMAC."""
+        match metric:
+            case Metric(bounds=(lower, upper)):  # Bounded metrics
+                return abs(upper - lower)
+            case Metric():  # Unbounded metric
+                return np.inf
+            case metrics:
+                return [cls.worst_possible_cost(m) for m in metrics]
+
+    @classmethod
+    def cost(cls, value: Metric.Value) -> float:
+        """Get the cost for a metric value for HEBO."""
+        match value.distance_to_optimal:
+            case None:  # If we can't compute the distance, use the loss
+                return value.loss
+            case distance:  # If we can compute the distance, use that
+                return distance