From 13ee01dad0bb55bfdfd12c61e393329622db670f Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Tue, 4 Nov 2025 17:58:42 +0100
Subject: [PATCH 01/10] feat: add evolutionary algorithm for categoricals in
 acqfn optim

---
 neps/optimizers/models/gp.py | 50 ++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index 586ba371e..a65203135 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -11,6 +11,7 @@
 from typing import TYPE_CHECKING, Any
 
 import gpytorch.constraints
+import numpy as np
 import torch
 from botorch.fit import fit_gpytorch_mll
 from botorch.models import SingleTaskGP
@@ -207,7 +208,6 @@ def optimize_acq(
         ]
         for name, transformer in cat_transformers.items()
     }
-
     # Second, generate all possible combinations
     fixed_cats: list[dict[int, float]]
     if len(cats) == 1:
@@ -226,15 +226,43 @@ def optimize_acq(
     with warning_context:
         # TODO: we should deterministically shuffle the fixed_categoricals
         # as the underlying function does not.
-        return optimize_acqf_mixed(  # type: ignore
-            acq_function=acq_fn,
-            bounds=bounds,
-            num_restarts=min(num_restarts // n_combos, 2),
-            raw_samples=n_intial_start_points,
-            q=n_candidates_required,
-            fixed_features_list=fixed_cats,
-            **acq_options,
-        )
+
+        # Sample a subset of the fixed cat combinations to form initial population
+        population = np.random.choice(
+            fixed_cats,
+            size=min(len(fixed_cats), 20),
+            replace=False,
+        ).tolist()
+
+        # Randomly shuffle the population
+        best_score = -np.inf
+        best_candidates = None
+        for _ in range(10):
+            np.random.shuffle(population)
+            candidates, scores = optimize_acqf_mixed(  # type: ignore
+                acq_function=acq_fn,
+                bounds=bounds,
+                num_restarts=2,
+                raw_samples=n_intial_start_points,
+                q=n_candidates_required,
+                fixed_features_list=population,
+                **acq_options,
+            )
+
+            # Randomly mutate one of the cats in the returned candidate
+            mutated_candidate = candidates[0].clone()
+            mutated_cat_idx = np.random.choice(list(cats.keys()))
+            mutate_value = np.random.choice(cats[mutated_cat_idx])
+            mutated_candidate[mutated_cat_idx] = mutate_value
+
+            # Randomly replace a candidate in the population with the mutated candidate
+            population.append({k: mutated_candidate[k].item() for k in cats})
+
+            # Keep best candidates and scores
+            if scores.item() > best_score:
+                best_score = scores.item()
+                best_candidates = mutated_candidate.unsqueeze(0)
+        return best_candidates, best_score
 
 
 def encode_trials_for_gp(
@@ -318,7 +346,7 @@ def fit_and_acquire_from_gp(
     n_candidates_required: int | None = None,
     num_restarts: int = 20,
     n_initial_start_points: int = 256,
-    maximum_allowed_categorical_combinations: int = 30,
+    maximum_allowed_categorical_combinations: int = 300000,
     fixed_acq_features: dict[str, Any] | None = None,
     acq_options: Mapping[str, Any] | None = None,
     hide_warnings: bool = False,

From 7bc41e701300ed766afa617d84efb0cd591b688b Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Sun, 9 Nov 2025 14:38:57 +0100
Subject: [PATCH 02/10] fix: stricter ifbo version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9611a16e9..ad8552c6c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,7 @@ dependencies = [
     "torchvision>=0.8.0",
     "botorch>=0.12",
     "gpytorch==1.13.0",
-    "ifbo",
+    "ifbo>=0.3.10",
 ]
 
 [project.urls]

From 36b01dda6964fa8bd3f32af3a2207a14fcce1151 Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Tue, 11 Nov 2025 19:05:13 +0100
Subject: [PATCH 03/10] fix: Add pymoo dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index ad8552c6c..d873bfb1e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,7 @@ dependencies = [
     "botorch>=0.12",
     "gpytorch==1.13.0",
     "ifbo>=0.3.10",
+    "pymoo"
 ]
 
 [project.urls]

From e63efe113d8c82521c3b8f088a760fdaff87a038 Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Thu, 20 Nov 2025 23:55:39 +0100
Subject: [PATCH 04/10] feat: Add discrete local search for categoricals and
 sequential search for mixed spaces

---
 neps/optimizers/models/gp.py | 145 ++++++++++++++++++++++++++---------
 1 file changed, 107 insertions(+), 38 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index a65203135..c59093a02 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -18,7 +18,10 @@
 from botorch.models.gp_regression import Log, get_covar_module_with_dim_scaled_prior
 from botorch.models.gp_regression_mixed import CategoricalKernel, OutcomeTransform
 from botorch.models.transforms.outcome import ChainedOutcomeTransform, Standardize
-from botorch.optim import optimize_acqf, optimize_acqf_mixed
+from botorch.optim import (
+    optimize_acqf,
+    optimize_acqf_discrete_local_search,
+)
 from gpytorch import ExactMarginalLogLikelihood
 from gpytorch.kernels import ScaleKernel
 from gpytorch.utils.warnings import NumericalWarning
@@ -124,7 +127,7 @@ def make_default_single_obj_gp(
     )
 
 
-def optimize_acq(
+def optimize_acq(  # noqa: C901, PLR0915
     acq_fn: AcquisitionFunction,
     encoder: ConfigEncoder,
     *,
@@ -162,6 +165,8 @@ def optimize_acq(
         )
     }
 
+    num_numericals = len(encoder.domains) - len(cat_transformers)
+
     # Proceed with regular numerical acquisition
     if not any(cat_transformers):
         # Small heuristic to increase the number of candidates as our
@@ -198,6 +203,8 @@ def optimize_acq(
             "dimensions or consider encoding your categoricals in some other format."
         )
 
+    # For a large number of categoricals, we need to generate a subset of all possible
+    # combinations to use as fixed features during acquisition function optimization.
     # Right, now we generate all possible combinations
     # First, just collect the possible values per cat column
     # {hp_name: [v1, v2], hp_name2: [v1, v2, v3], ...}
@@ -208,9 +215,16 @@ def optimize_acq(
         ]
         for name, transformer in cat_transformers.items()
     }
-    # Second, generate all possible combinations
-    fixed_cats: list[dict[int, float]]
-    if len(cats) == 1:
+    fixed_cats: list[dict[int, float]] = []
+    if n_combos > 1000:
+        # randomly sample 1000 combinations if n_combos is too large
+        keys = list(cats.keys())
+        for _ in range(1000):
+            combo = {key: float(np.random.choice(cats[key])) for key in keys}
+            fixed_cats.append(combo)
+
+    # generate all possible combinations if n_combos is small enough
+    elif len(cats) == 1:
         col, choice_indices = next(iter(cats.items()))
         fixed_cats = [{col: i} for i in choice_indices]
     else:
@@ -223,46 +237,101 @@ def optimize_acq(
     if len(_fixed_features) > 0:
         fixed_cats = [{**cat, **_fixed_features} for cat in fixed_cats]
 
-    with warning_context:
-        # TODO: we should deterministically shuffle the fixed_categoricals
-        # as the underlying function does not.
-
-        # Sample a subset of the fixed cat combinations to form initial population
-        population = np.random.choice(
-            fixed_cats,
-            size=min(len(fixed_cats), 20),
-            replace=False,
-        ).tolist()
-
-        # Randomly shuffle the population
-        best_score = -np.inf
-        best_candidates = None
-        for _ in range(10):
-            np.random.shuffle(population)
-            candidates, scores = optimize_acqf_mixed(  # type: ignore
+    if num_numericals > 0:
+        with warning_context:
+            # cats: dict[int, list[float]] as before
+            cat_keys = list(cats.keys())
+            choices = [torch.tensor(cats[k], dtype=torch.float) for k in cat_keys]
+
+            # Sample a random categorical combination and keep it fixed during
+            # the continuous optimization step
+            random_fixed_cat = fixed_cats[np.random.randint(len(fixed_cats))]
+
+            # --- Step 1: Optimize acquisition function over the continuous space ---
+            best_x_continuous, _ = optimize_acqf(
                 acq_function=acq_fn,
                 bounds=bounds,
-                num_restarts=2,
-                raw_samples=n_intial_start_points,
                 q=n_candidates_required,
-                fixed_features_list=population,
+                num_restarts=num_restarts,
+                raw_samples=n_intial_start_points,
+                fixed_features=_fixed_features or random_fixed_cat,
                 **acq_options,
             )
 
-            # Randomly mutate one of the cats in the returned candidate
-            mutated_candidate = candidates[0].clone()
-            mutated_cat_idx = np.random.choice(list(cats.keys()))
-            mutate_value = np.random.choice(cats[mutated_cat_idx])
-            mutated_candidate[mutated_cat_idx] = mutate_value
+            # Extract the numerical dims from the optimized continuous vector
+            cont_dims = [i for i in range(len(encoder.domains)) if i not in cat_keys]
+
+            # --- Step 2: Wrap acquisition function for discrete search ---
+            def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
+                """
+                Evaluate the acquisition function at the optimized continuous vector
+                for a given categorical vector.
+                cat_tensor: shape [q, 1, num_cats]
+                """
+
+                # cat_tensor is of shape [q, 1, num_cats]
+                # insert in each q dimension the continuous dims from
+                # best_x_continuous to form the full x
+
+                cat_tensor = cat_tensor.reshape(-1, len(cat_keys))  # [q, num_cats]
+                x_full: list[torch.Tensor] = []
+                for candidate in cat_tensor:
+                    combo = {k: float(v.item()) for k, v in zip(cat_keys, candidate)}  # noqa: B905
+                    combo.update(
+                        {i: float(best_x_continuous[0, i].item()) for i in cont_dims}
+                    )
+                    x_candidate = torch.tensor(
+                        [combo[i] for i in range(len(encoder.domains))],
+                        dtype=torch.float,
+                    )
+                    x_full.append(x_candidate)
+                x_full_tensor: torch.Tensor = torch.stack(x_full, dim=0)  # [q, num_dims]
+                # Expand to original dim
+                x_full_tensor = x_full_tensor.unsqueeze(1)  # [q, 1, num_dims]
+                return acq_fn(x_full_tensor)
+
+            # --- Step 3: Run BoTorch discrete local search over categorical space ---
+            best_cat_tensor, _ = optimize_acqf_discrete_local_search(
+                acq_function=acq_discrete_only,
+                discrete_choices=choices,
+                q=n_candidates_required,
+                num_restarts=num_restarts,
+                raw_samples=n_intial_start_points,
+            )
+
+            best_cat_dict = {
+                k: float(v.item())
+                for k, v in zip(cat_keys, best_cat_tensor[0], strict=False)
+            }
+
+            # --- Step 4: Return the final combined candidate ---
+            best_x_full = torch.tensor(
+                [
+                    best_cat_dict.get(i, float(best_x_continuous[0, i].item()))
+                    for i in range(len(encoder.domains))
+                ],
+                dtype=torch.float,
+            ).unsqueeze(0)
+
+            # Optional: evaluate the final acquisition value
+            with torch.no_grad():
+                best_val_final = acq_fn(best_x_full)
 
-            # Randomly replace a candidate in the population with the mutated candidate
-            population.append({k: mutated_candidate[k].item() for k in cats})
+            return best_x_full, best_val_final
 
-            # Keep best candidates and scores
-            if scores.item() > best_score:
-                best_score = scores.item()
-                best_candidates = mutated_candidate.unsqueeze(0)
-        return best_candidates, best_score
+    else:
+        with warning_context:
+            torch_cats: list[torch.Tensor] = [
+                torch.tensor(v, dtype=torch.float64) for v in cats.values()
+            ]
+            return optimize_acqf_discrete_local_search(  # type: ignore
+                acq_function=acq_fn,
+                discrete_choices=torch_cats,
+                q=n_candidates_required,
+                num_restarts=num_restarts,
+                raw_samples=n_intial_start_points,
+                **acq_options,
+            )
 
 
 def encode_trials_for_gp(
@@ -346,7 +415,7 @@ def fit_and_acquire_from_gp(
     n_candidates_required: int | None = None,
     num_restarts: int = 20,
     n_initial_start_points: int = 256,
-    maximum_allowed_categorical_combinations: int = 300000,
+    maximum_allowed_categorical_combinations: int = np.inf,
     fixed_acq_features: dict[str, Any] | None = None,
     acq_options: Mapping[str, Any] | None = None,
     hide_warnings: bool = False,

From 08ec062043d4ac9f03f5b58530303f47fc01393b Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Fri, 21 Nov 2025 02:42:00 +0100
Subject: [PATCH 05/10] fix: Filter out BoTorch InputDataWarning

---
 neps/optimizers/models/gp.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index c59093a02..56811977c 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import warnings
 from collections.abc import Mapping, Sequence
 from contextlib import nullcontext
 from dataclasses import dataclass
@@ -13,6 +14,7 @@
 import gpytorch.constraints
 import numpy as np
 import torch
+from botorch.exceptions.warnings import InputDataWarning
 from botorch.fit import fit_gpytorch_mll
 from botorch.models import SingleTaskGP
 from botorch.models.gp_regression import Log, get_covar_module_with_dim_scaled_prior
@@ -39,6 +41,8 @@
 
 logger = logging.getLogger(__name__)
 
+warnings.filterwarnings("ignore", category=InputDataWarning)
+
 
 @dataclass
 class GPEncodedData:
@@ -243,11 +247,11 @@ def optimize_acq(  # noqa: C901, PLR0915
             cat_keys = list(cats.keys())
             choices = [torch.tensor(cats[k], dtype=torch.float) for k in cat_keys]
 
+            # Step 1: Optimize acquisition function over the continuous space
             # Sample a random categorical combination and keep it fixed during
             # the continuous optimization step
             random_fixed_cat = fixed_cats[np.random.randint(len(fixed_cats))]
 
-            # --- Step 1: Optimize acquisition function over the continuous space ---
             best_x_continuous, _ = optimize_acqf(
                 acq_function=acq_fn,
                 bounds=bounds,
@@ -261,7 +265,7 @@ def optimize_acq(  # noqa: C901, PLR0915
             # Extract the numerical dims from the optimized continuous vector
             cont_dims = [i for i in range(len(encoder.domains)) if i not in cat_keys]
 
-            # --- Step 2: Wrap acquisition function for discrete search ---
+            # Step 2: Wrap acquisition function for discrete search
             def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
                 """
                 Evaluate the acquisition function at the optimized continuous vector
@@ -290,7 +294,7 @@ def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
                 x_full_tensor = x_full_tensor.unsqueeze(1)  # [q, 1, num_dims]
                 return acq_fn(x_full_tensor)
 
-            # --- Step 3: Run BoTorch discrete local search over categorical space ---
+            # Step 3: Run BoTorch discrete local search over categorical space
             best_cat_tensor, _ = optimize_acqf_discrete_local_search(
                 acq_function=acq_discrete_only,
                 discrete_choices=choices,
@@ -304,7 +308,7 @@ def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
                 for k, v in zip(cat_keys, best_cat_tensor[0], strict=False)
             }
 
-            # --- Step 4: Return the final combined candidate ---
+            # Step 4: Final combined candidate
             best_x_full = torch.tensor(
                 [
                     best_cat_dict.get(i, float(best_x_continuous[0, i].item()))
@@ -313,7 +317,7 @@ def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
                 dtype=torch.float,
             ).unsqueeze(0)
 
-            # Optional: evaluate the final acquisition value
+            # Evaluate the final acquisition value
             with torch.no_grad():
                 best_val_final = acq_fn(best_x_full)
 

From 1f60dfeead29de16fa764c906147475826d7408e Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Thu, 27 Nov 2025 16:50:07 +0100
Subject: [PATCH 06/10] Update pymoo dep version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d873bfb1e..b91aa8cf6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,7 @@ dependencies = [
     "botorch>=0.12",
     "gpytorch==1.13.0",
     "ifbo>=0.3.10",
-    "pymoo"
+    "pymoo>=0.6.1.5"
 ]
 
 [project.urls]

From 6927807ec191be7c3bcd3abad531ab3246a29568 Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Tue, 25 Nov 2025 19:01:26 +0100
Subject: [PATCH 07/10] feat: update randomly chosen categoricals with fixed
 cats passed in BO

---
 neps/optimizers/models/gp.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index 56811977c..b766f31be 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -237,28 +237,28 @@ def optimize_acq(  # noqa: C901, PLR0915
             for combo in product(*cats.values())
         ]
 
-    # Make sure to include caller's fixed features if provided
-    if len(_fixed_features) > 0:
-        fixed_cats = [{**cat, **_fixed_features} for cat in fixed_cats]
+    cat_keys = list(cats.keys())
+    choices = [torch.tensor(cats[k], dtype=torch.float) for k in cat_keys]
 
     if num_numericals > 0:
         with warning_context:
             # cats: dict[int, list[float]] as before
-            cat_keys = list(cats.keys())
-            choices = [torch.tensor(cats[k], dtype=torch.float) for k in cat_keys]
 
             # Step 1: Optimize acquisition function over the continuous space
             # Sample a random categorical combination and keep it fixed during
             # the continuous optimization step
             random_fixed_cat = fixed_cats[np.random.randint(len(fixed_cats))]
 
+            if len(_fixed_features) > 0:
+                random_fixed_cat.update(_fixed_features)
+
             best_x_continuous, _ = optimize_acqf(
                 acq_function=acq_fn,
                 bounds=bounds,
                 q=n_candidates_required,
                 num_restarts=num_restarts,
                 raw_samples=n_intial_start_points,
-                fixed_features=_fixed_features or random_fixed_cat,
+                fixed_features=random_fixed_cat,
                 **acq_options,
             )
 
@@ -325,12 +325,9 @@ def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
 
     else:
         with warning_context:
-            torch_cats: list[torch.Tensor] = [
-                torch.tensor(v, dtype=torch.float64) for v in cats.values()
-            ]
             return optimize_acqf_discrete_local_search(  # type: ignore
                 acq_function=acq_fn,
-                discrete_choices=torch_cats,
+                discrete_choices=choices,
                 q=n_candidates_required,
                 num_restarts=num_restarts,
                 raw_samples=n_intial_start_points,

From c7484583b3b07a0989ae92757c878a449c2022a6 Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Thu, 27 Nov 2025 18:50:33 +0100
Subject: [PATCH 08/10] fix: clean up acqf optim code

---
 neps/optimizers/models/gp.py | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index b766f31be..7255a1acc 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -8,7 +8,6 @@
 from contextlib import nullcontext
 from dataclasses import dataclass
 from functools import reduce
-from itertools import product
 from typing import TYPE_CHECKING, Any
 
 import gpytorch.constraints
@@ -131,7 +130,7 @@ def make_default_single_obj_gp(
     )
 
 
-def optimize_acq(  # noqa: C901, PLR0915
+def optimize_acq(
     acq_fn: AcquisitionFunction,
     encoder: ConfigEncoder,
     *,
@@ -219,38 +218,22 @@ def optimize_acq(  # noqa: C901, PLR0915
         ]
         for name, transformer in cat_transformers.items()
     }
-    fixed_cats: list[dict[int, float]] = []
-    if n_combos > 1000:
-        # randomly sample 1000 combinations if n_combos is too large
-        keys = list(cats.keys())
-        for _ in range(1000):
-            combo = {key: float(np.random.choice(cats[key])) for key in keys}
-            fixed_cats.append(combo)
-
-    # generate all possible combinations if n_combos is small enough
-    elif len(cats) == 1:
-        col, choice_indices = next(iter(cats.items()))
-        fixed_cats = [{col: i} for i in choice_indices]
-    else:
-        fixed_cats = [
-            dict(zip(cats.keys(), combo, strict=False))
-            for combo in product(*cats.values())
-        ]
-
     cat_keys = list(cats.keys())
     choices = [torch.tensor(cats[k], dtype=torch.float) for k in cat_keys]
+    fixed_cat: dict[int, float] = {}
 
     if num_numericals > 0:
         with warning_context:
+            fixed_cat = {key: float(np.random.choice(cats[key])) for key in cat_keys}
+
             # cats: dict[int, list[float]] as before
 
             # Step 1: Optimize acquisition function over the continuous space
             # Sample a random categorical combination and keep it fixed during
             # the continuous optimization step
-            random_fixed_cat = fixed_cats[np.random.randint(len(fixed_cats))]
 
             if len(_fixed_features) > 0:
-                random_fixed_cat.update(_fixed_features)
+                fixed_cat.update(_fixed_features)
 
             best_x_continuous, _ = optimize_acqf(
                 acq_function=acq_fn,
@@ -258,7 +241,7 @@ def optimize_acq(  # noqa: C901, PLR0915
                 q=n_candidates_required,
                 num_restarts=num_restarts,
                 raw_samples=n_intial_start_points,
-                fixed_features=random_fixed_cat,
+                fixed_features=fixed_cat,
                 **acq_options,
             )
 

From 9709ce59631bcd8af2db91a26cb1d2b8388e77ea Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Thu, 27 Nov 2025 21:13:42 +0100
Subject: [PATCH 09/10] feat: Add WrappedAcquisition module and adapt the mixed
 spaces optimization script accordingly

---
 neps/optimizers/acquisition/__init__.py       |   8 +-
 .../acquisition/wrapped_acquisition.py        |  94 +++++++++++++
 neps/optimizers/models/gp.py                  | 128 +++++++++++-------
 3 files changed, 177 insertions(+), 53 deletions(-)
 create mode 100644 neps/optimizers/acquisition/wrapped_acquisition.py

diff --git a/neps/optimizers/acquisition/__init__.py b/neps/optimizers/acquisition/__init__.py
index 0d2d27efa..d8e4a4771 100644
--- a/neps/optimizers/acquisition/__init__.py
+++ b/neps/optimizers/acquisition/__init__.py
@@ -1,5 +1,11 @@
 from neps.optimizers.acquisition.cost_cooling import cost_cooled_acq
 from neps.optimizers.acquisition.pibo import pibo_acquisition
 from neps.optimizers.acquisition.weighted_acquisition import WeightedAcquisition
+from neps.optimizers.acquisition.wrapped_acquisition import WrappedAcquisition
 
-__all__ = ["WeightedAcquisition", "cost_cooled_acq", "pibo_acquisition"]
+__all__ = [
+    "WeightedAcquisition",
+    "WrappedAcquisition",
+    "cost_cooled_acq",
+    "pibo_acquisition",
+]
diff --git a/neps/optimizers/acquisition/wrapped_acquisition.py b/neps/optimizers/acquisition/wrapped_acquisition.py
new file mode 100644
index 000000000..21abe872c
--- /dev/null
+++ b/neps/optimizers/acquisition/wrapped_acquisition.py
@@ -0,0 +1,94 @@
+"""Module to wrap the existing acquisition function to account for mixed search spaces.
+
+For mixed search spaces, we first keep the categorical dimensions fixed to some randomly
+chosen values and perform optimization over the continuous dimensions.
+Next, we select the numerical dimensions from the returned best candidate and keep them
+fixed, while we use `optimize_acqf_discrete_local_search` over the categorical dimensions.
+
+For this, we need to wrap the existing acquisition function to accept tensors containing
+only the categorical dimensions since BoTorch does not natively support keeping numerical
+dimensions fixed in `optimize_acqf_discrete_local_search`.
+
+Inside `WrappedAcquisition`, we concatenate the fixed numerical dimensions to the tensor
+containing only the categoricals before passing it to the original acquisition function.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import torch
+from botorch.acquisition import AcquisitionFunction
+from botorch.acquisition.analytic import t_batch_mode_transform
+from botorch.acquisition.monte_carlo import concatenate_pending_points
+
+if TYPE_CHECKING:
+    from torch import Tensor
+
+    from neps.space.encoding import ConfigEncoder
+
+
+class WrappedAcquisition(AcquisitionFunction):
+    """Acquisition function wrapper for mixed search spaces."""
+
+    def __init__(
+        self,
+        acq: AcquisitionFunction,
+        encoder: ConfigEncoder,
+        fixed_numericals: dict[int, float],
+    ) -> None:
+        """Initialize the wrapped acquisition function.
+
+        Args:
+            acq: The base acquisition function.
+            fixed_numericals: A dictionary mapping numerical dimension indices to their
+                fixed values.
+        """
+        super().__init__(model=acq.model)
+        # NOTE: Remove X_pending from the base acquisition function.
+        # See similar note in WeightedAcquisition.
+        if (X_pending := getattr(acq, "X_pending", None)) is not None:
+            acq.set_X_pending(None)
+            self.set_X_pending(X_pending)
+        else:
+            acq.set_X_pending(None)
+            self.set_X_pending(None)
+
+        self.acq = acq
+        self.encoder = encoder
+        self.fixed_numericals = fixed_numericals
+        self.fixed_numericals = fixed_numericals
+
+    @concatenate_pending_points  # type: ignore
+    @t_batch_mode_transform()  # type: ignore
+    def forward(self, X: Tensor) -> Tensor:
+        """Evaluate the wrapped acquisition function on the candidate set X
+        after concatenating the fixed numerical dimensions.
+
+        Args:
+            X: A `batch_shape x q x d_categorical`-dim tensor of candidates, where
+                `d_categorical` is the number of categorical dimensions.
+
+        Returns:
+            A `batch_shape`-dim tensor of acquisition function values at the input
+            candidates.
+        """
+        batch, q, c_dims = X.shape
+        n_dims = len(self.fixed_numericals)
+        new_X_shape = (batch, q, c_dims + n_dims)
+
+        # Create a new tensor to hold the concatenated dimensions
+        x_full: torch.Tensor = torch.empty(new_X_shape, dtype=X.dtype, device=X.device)
+
+        # Create a mask to identify positions of categorical and numerical dimensions
+        mask = torch.ones(c_dims + n_dims, dtype=torch.bool, device=X.device)
+        insert_idxs = torch.tensor(list(self.fixed_numericals.keys()), device=X.device)
+        mask[insert_idxs] = False
+
+        # Fill in the fixed numerical values and the input categorical values
+        for idx, val in self.fixed_numericals.items():
+            x_full[:, :, idx] = val
+        x_full[:, :, mask] = X
+
+        # Pass the concatenated tensor to the original acquisition function
+        return self.acq(x_full)
diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index 7255a1acc..21ed32ffd 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -27,7 +27,11 @@
 from gpytorch.kernels import ScaleKernel
 from gpytorch.utils.warnings import NumericalWarning
 
-from neps.optimizers.acquisition import cost_cooled_acq, pibo_acquisition
+from neps.optimizers.acquisition import (
+    WrappedAcquisition,
+    cost_cooled_acq,
+    pibo_acquisition,
+)
 from neps.space.encoding import CategoricalToIntegerTransformer, ConfigEncoder
 from neps.utils.common import disable_warnings
 
@@ -142,7 +146,34 @@ def optimize_acq(
     maximum_allowed_categorical_combinations: int = 30,
     hide_warnings: bool = False,
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    """Optimize the acquisition function."""
+    """Optimize the acquisition function.
+
+    For purely numerical spaces, this uses botorch's `optimize_acqf()`.
+    For purely categorical spaces, this uses `optimize_acqf_discrete_local_search()`.
+    For mixed spaces, this uses a two step sequential optimization:
+    1. Optimize acquisition over continuous space, sampling a random
+        categorical combination to fix the continuous acquisition function
+    2. Wrap the acquisition function to fix the numerical dimensions
+        and optimize over the categorical space using
+        `optimize_acqf_discrete_local_search()`
+        NOTE: `optimize_acqf_discrete_local_search()` scales much better than
+        `optimize_acqf_mixed()` for large categorical dimensions in the search space.
+
+
+    Args:
+        acq_fn: The acquisition function to optimize.
+        encoder: The encoder used for encoding the configurations
+        n_candidates_required: The number of candidates to return.
+        num_restarts: The number of restarts to use during optimization.
+        n_intial_start_points: The number of initial start points to use during
+            optimization.
+        acq_options: Additional options to pass to the botorch `optimizer_acqf` function.
+        fixed_features: The features to fix to a certain value during acquisition.
+        hide_warnings: Whether to hide numerical warnings issued during GP routines.
+
+    Returns:
+        The (encoded) optimized candidate(s) and corresponding acquisition value(s).
+    """
     warning_context = (
         disable_warnings(NumericalWarning) if hide_warnings else nullcontext()
     )
@@ -206,9 +237,6 @@ def optimize_acq(
             "dimensions or consider encoding your categoricals in some other format."
         )
 
-    # For a large number of categoricals, we need to generate a subset of all possible
-    # combinations to use as fixed features during acquisition function optimization.
-    # Right, now we generate all possible combinations
     # First, just collect the possible values per cat column
     # {hp_name: [v1, v2], hp_name2: [v1, v2, v3], ...}
     cats: dict[int, list[float]] = {
@@ -224,13 +252,11 @@ def optimize_acq(
 
     if num_numericals > 0:
         with warning_context:
+            # Sample a random categorical combination and keep it fixed during
+            # the continuous optimization step
             fixed_cat = {key: float(np.random.choice(cats[key])) for key in cat_keys}
 
-            # cats: dict[int, list[float]] as before
-
             # Step 1: Optimize acquisition function over the continuous space
-            # Sample a random categorical combination and keep it fixed during
-            # the continuous optimization step
 
             if len(_fixed_features) > 0:
                 fixed_cat.update(_fixed_features)
@@ -246,59 +272,57 @@ def optimize_acq(
             )
 
             # Extract the numerical dims from the optimized continuous vector
-            cont_dims = [i for i in range(len(encoder.domains)) if i not in cat_keys]
+            fixed_numericals = {
+                i: float(best_x_continuous[0, i].item())
+                for i in range(len(encoder.domains))
+                if i not in cat_keys
+            }
+
+            # Update fixed_numericals with _fixed_features
+            fixed_numericals.update(_fixed_features)
 
             # Step 2: Wrap acquisition function for discrete search
-            def acq_discrete_only(cat_tensor: torch.Tensor) -> torch.Tensor:
-                """
-                Evaluate the acquisition function at the optimized continuous vector
-                for a given categorical vector.
-                cat_tensor: shape [q, 1, num_cats]
-                """
-
-                # cat_tensor is of shape [q, 1, num_cats]
-                # insert in each q dimension the continuous dims from
-                # best_x_continuous to form the full x
-
-                cat_tensor = cat_tensor.reshape(-1, len(cat_keys))  # [q, num_cats]
-                x_full: list[torch.Tensor] = []
-                for candidate in cat_tensor:
-                    combo = {k: float(v.item()) for k, v in zip(cat_keys, candidate)}  # noqa: B905
-                    combo.update(
-                        {i: float(best_x_continuous[0, i].item()) for i in cont_dims}
-                    )
-                    x_candidate = torch.tensor(
-                        [combo[i] for i in range(len(encoder.domains))],
-                        dtype=torch.float,
-                    )
-                    x_full.append(x_candidate)
-                x_full_tensor: torch.Tensor = torch.stack(x_full, dim=0)  # [q, num_dims]
-                # Expand to original dim
-                x_full_tensor = x_full_tensor.unsqueeze(1)  # [q, 1, num_dims]
-                return acq_fn(x_full_tensor)
-
-            # Step 3: Run BoTorch discrete local search over categorical space
+            wrapped_acq = WrappedAcquisition(
+                acq=acq_fn,
+                encoder=encoder,
+                fixed_numericals=fixed_numericals,
+            )
+
+            # Step 3: Run discrete local search over the categorical space
+            # with the wrapped acquisition function
             best_cat_tensor, _ = optimize_acqf_discrete_local_search(
-                acq_function=acq_discrete_only,
+                acq_function=wrapped_acq,
                 discrete_choices=choices,
                 q=n_candidates_required,
                 num_restarts=num_restarts,
                 raw_samples=n_intial_start_points,
             )
 
-            best_cat_dict = {
-                k: float(v.item())
-                for k, v in zip(cat_keys, best_cat_tensor[0], strict=False)
-            }
+            # Step 4: Concatenate best categorical and numerical dims, along with
+            # any fixed features provided by the caller
+
+            q, c_dims = best_cat_tensor.shape
+            n_dims = len(fixed_numericals)
+            new_X_shape = (q, c_dims + n_dims)
+
+            # Create a new tensor to hold the concatenated dimensions
+            best_x_full: torch.Tensor = torch.empty(
+                new_X_shape, dtype=best_cat_tensor.dtype, device=best_cat_tensor.device
+            )
+
+            # Create a mask to identify positions of categorical and numerical dimensions
+            mask = torch.ones(
+                c_dims + n_dims, dtype=torch.bool, device=best_cat_tensor.device
+            )
+            insert_idxs = torch.tensor(
+                list(fixed_numericals.keys()), device=best_cat_tensor.device
+            )
+            mask[insert_idxs] = False
 
-            # Step 4: Final combined candidate
-            best_x_full = torch.tensor(
-                [
-                    best_cat_dict.get(i, float(best_x_continuous[0, i].item()))
-                    for i in range(len(encoder.domains))
-                ],
-                dtype=torch.float,
-            ).unsqueeze(0)
+            # Fill in the fixed numerical values and the input categorical values
+            for idx, val in fixed_numericals.items():
+                best_x_full[:, idx] = val
+            best_x_full[:, mask] = best_cat_tensor
 
             # Evaluate the final acquisition value
             with torch.no_grad():

From 3c5566eab8abc0372dfb7801bd364398e1f08666 Mon Sep 17 00:00:00 2001
From: Sohambasu07 <soham.basu07@gmail.com>
Date: Thu, 27 Nov 2025 21:17:46 +0100
Subject: [PATCH 10/10] fix: Remove

---
 neps/optimizers/models/gp.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/neps/optimizers/models/gp.py b/neps/optimizers/models/gp.py
index 21ed32ffd..365caa77e 100644
--- a/neps/optimizers/models/gp.py
+++ b/neps/optimizers/models/gp.py
@@ -7,7 +7,6 @@
 from collections.abc import Mapping, Sequence
 from contextlib import nullcontext
 from dataclasses import dataclass
-from functools import reduce
 from typing import TYPE_CHECKING, Any
 
 import gpytorch.constraints
@@ -143,7 +142,6 @@ def optimize_acq(
     n_intial_start_points: int | None = None,
     acq_options: Mapping[str, Any] | None = None,
     fixed_features: dict[str, Any] | None = None,
-    maximum_allowed_categorical_combinations: int = 30,
     hide_warnings: bool = False,
 ) -> tuple[torch.Tensor, torch.Tensor]:
     """Optimize the acquisition function.
@@ -221,22 +219,6 @@ def optimize_acq(
                 **acq_options,
             )
 
-    # We need to generate the product of all possible combinations of categoricals,
-    # first we do a sanity check
-    n_combos = reduce(
-        lambda x, y: x * y,  # type: ignore
-        [t.domain.cardinality for t in cat_transformers.values()],
-        1,
-    )
-    if n_combos > maximum_allowed_categorical_combinations:
-        raise ValueError(
-            "The number of fixed categorical dimensions is too high. "
-            "This will lead to an explosion in the number of possible "
-            f"combinations. Got: {n_combos} while the setting for the function"
-            f" is: {maximum_allowed_categorical_combinations=}. Consider reducing the "
-            "dimensions or consider encoding your categoricals in some other format."
-        )
-
     # First, just collect the possible values per cat column
     # {hp_name: [v1, v2], hp_name2: [v1, v2, v3], ...}
     cats: dict[int, list[float]] = {
@@ -423,7 +405,6 @@ def fit_and_acquire_from_gp(
     n_candidates_required: int | None = None,
     num_restarts: int = 20,
     n_initial_start_points: int = 256,
-    maximum_allowed_categorical_combinations: int = np.inf,
     fixed_acq_features: dict[str, Any] | None = None,
     acq_options: Mapping[str, Any] | None = None,
     hide_warnings: bool = False,
@@ -467,9 +448,6 @@ def fit_and_acquire_from_gp(
         num_restarts: The number of restarts to use during optimization.
         n_initial_start_points: The number of initial start points to use during
             optimization.
-        maximum_allowed_categorical_combinations: The maximum number of categorical
-            combinations to allow. If the number of combinations exceeds this, an error
-            will be raised.
         acq_options: Additional options to pass to the botorch `optimizer_acqf` function.
         hide_warnings: Whether to hide numerical warnings issued during GP routines.
 
@@ -550,7 +528,6 @@ def fit_and_acquire_from_gp(
         n_intial_start_points=n_initial_start_points,
         fixed_features=fixed_acq_features,
         acq_options=acq_options,
-        maximum_allowed_categorical_combinations=maximum_allowed_categorical_combinations,
         hide_warnings=hide_warnings,
     )
     return candidates