From 134e08f34e796dd80dfe68cc4d4ebe81202a8394 Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Tue, 12 Aug 2025 16:50:46 -0700 Subject: [PATCH 1/8] Add first draft of MC dropout ensemble models --- xopt/generators/bayesian/bax/acquisition.py | 30 ++- xopt/generators/bayesian/models/ensemble.py | 236 ++++++++++++++++++++ 2 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 xopt/generators/bayesian/models/ensemble.py diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py index 39092d48f..bbce05021 100644 --- a/xopt/generators/bayesian/bax/acquisition.py +++ b/xopt/generators/bayesian/bax/acquisition.py @@ -3,6 +3,7 @@ MultiObjectiveAnalyticAcquisitionFunction, ) from botorch.models.model import Model, ModelList +from botorch.posteriors.ensemble import EnsemblePosterior from botorch.utils.transforms import t_batch_mode_transform from torch import Tensor @@ -36,6 +37,7 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None: self.algorithm_results, ) = self.algorithm.get_execution_paths(self.model, bounds) + # Need to call the model on some data before we can condition_on_observations self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models]) @@ -83,17 +85,27 @@ def forward(self, X: Tensor) -> Tensor: # calculcate the variance of the posterior for each input x post = self.model.posterior(X) - var_post = post.variance - - # calculcate the variance of the fantasy posteriors - fantasy_posts = self.fantasy_models.posterior( - ( - X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand( - *X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:] + + # Some different shape handling with EnsemblePosterior -- maybe we can move this upstream? + if isinstance(post, EnsemblePosterior): + var_post = post.variance.unsqueeze(-1) + + # calculcate the variance of the fantasy posteriors + fantasy_posts = self.fantasy_models.posterior(X) + var_fantasy_posts = fantasy_posts.variance.unsqueeze(-1).unsqueeze(-1) + + else: + var_post = post.variance + + # calculcate the variance of the fantasy posteriors + fantasy_posts = self.fantasy_models.posterior( + ( + X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand( + *X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:] + ) ) ) - ) - var_fantasy_posts = fantasy_posts.variance + var_fantasy_posts = fantasy_posts.variance # calculate Shannon entropy for posterior given the current data h_current = 0.5 * torch.log(2 * torch.pi * var_post) + 0.5 diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py new file mode 100644 index 000000000..61eef9306 --- /dev/null +++ b/xopt/generators/bayesian/models/ensemble.py @@ -0,0 +1,236 @@ +from botorch.models.model import Model, ModelList +from botorch.models.transforms import Normalize, Standardize +from botorch.posteriors.ensemble import EnsemblePosterior + +from gpytorch.distributions import MultivariateNormal + +import torch +from torch import Tensor +from torch import nn + +import numpy as np +from copy import deepcopy + +from xopt.generators.bayesian.base_model import ModelConstructor +from xopt.generators.bayesian.utils import get_training_data + +from pydantic import Field +from typing import List, Callable, Dict, Union +import pandas as pd + + +class EnsembleModelConstructor(ModelConstructor): + """ + Model constructor for ensemble estimates + """ + name: str = Field("ensemble") + model: Callable = Field(None, description="Ensemble model") + + def build_model( + self, + input_names: List[str], + outcome_names: List[str], + data: pd.DataFrame, + input_bounds: Dict[str, List] = None, + dtype: torch.dtype = torch.float64, + device: Union[torch.device, str] = "cpu", + ): + # get training data -- need to update utils for multivariate case + train_X, train_Y, train_Yvar = get_training_data( + input_names, outcome_names[0], data + ) + self.model = self.model.to(dtype) + + self.model.fit(train_X, train_Y) + self.model = self.model.to(device) + self.model.eval() + + self.model.models = [self.model] + + return self.model + +class MCDropoutMLP(nn.Module): + """ + Generic multi-layer perceptron model with MC dropout + """ + def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, hidden_dim=100, + hidden_activation=nn.ReLU(), output_activation=None): + super().__init__() + + self.layers = nn.ModuleList() + for i in range(n_hidden_layers): + if i == 0: + self.layers.append(nn.Linear(input_dim, hidden_dim)) + else: + self.layers.append(nn.Linear(hidden_dim, hidden_dim)) + + self.layers.append(nn.Dropout(p=dropout_rate)) + self.layers.append(hidden_activation) + + + if n_hidden_layers == 0: + self.layers.append(nn.Linear(input_dim, output_dim)) + else: + self.layers.append(nn.Linear(hidden_dim, output_dim)) + + if output_activation is not None: + self.layers.append(output_activation) + + def forward(self, input_data, seed=None): + # Needed so that dropout works + self.train() + if seed is not None: + torch.random.manual_seed(seed) + for layer in self.layers: + input_data = layer(input_data) + + self.eval() + return input_data + + +class FlattenedEnsemblePosterior(EnsemblePosterior): + """ + Wrapper for EnsemblePosterior to make shapes do similar things + as GPyTorchPosterior. + """ + def rsample(self, sample_shape=torch.Size()): + samples = super().rsample(sample_shape) + # Flatten (q, m) into a single dimension like GP case + q, m = samples.shape[-2:] + return samples.view(*samples.shape[:-2], q * m) + + @property + def mean(self): + # Average over ensemble dimension + return super().mean.mean(dim=1) + + @property + def variance(self): + # Average over ensemble dimension + return super().variance.mean(dim=1) + + @property + def mvn(self): + mean = self.mean + variance = self.variance + + # Assumes diagonal gaussian for simplicity + covar = torch.diag_embed(variance.squeeze(-1).squeeze(-1)) + mvn = MultivariateNormal(mean.squeeze(-1).squeeze(-1), covariance_matrix=covar) + + return mvn + + +class MCDropoutModel(Model): + """ + BoTorch model for MC dropout ensemble + """ + + model: MCDropoutMLP + num_samples: int + input_dim: int + output_dim: int + dropout_rate: float + n_hidden_layers: int + hidden_dim: int + + def __init__(self, input_dim: int, output_dim: int, dropout_rate: float = 0.1, + num_samples: int = 30, n_hidden_layers=3, hidden_dim=100, + hidden_activation=nn.ReLU(), output_activation=None, observables=['y1'], + input_bounds=torch.tensor([[-np.pi], [np.pi]])): + + super(MCDropoutModel, self).__init__() + self.model = MCDropoutMLP(input_dim, output_dim, + dropout_rate=dropout_rate, + n_hidden_layers=n_hidden_layers, + hidden_dim=hidden_dim, + hidden_activation=hidden_activation, + output_activation=output_activation) + + self.num_samples = num_samples + + self.outcome_transform = Standardize(output_dim) + self.input_transform = Normalize(input_dim, bounds = input_bounds) + self.output_indices = None + + def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), + n_epochs = 500, optim_type = torch.optim.Adam, lr = 1e-3, is_fantasy=False) -> None: + + self.model.train() + # Seems like this is normalized already in InfoBAX calc? + if not is_fantasy: + X_s = self.input_transform(X) + y_s = self.outcome_transform(y)[0] + else: + X_s = X + y_s = y + + optimizer = optim_type(self.model.parameters(), lr=lr) + + for epoch in range(n_epochs): + pred = self.model(X_s) + loss = loss_fn(pred, y_s) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + + self.train_inputs = (X,) + self.train_targets = y + + + def condition_on_observations(self, X, y, n_cond_epochs=40): + fantasies = [] + for i_batch in range(X.shape[0]): + new_model = deepcopy(self) + train_X = torch.cat([new_model.train_inputs[0], X[i_batch]], dim=0) + train_y = torch.cat([new_model.train_targets, y[i_batch]], dim=0) + + new_model.fit(train_X, train_y, n_epochs=n_cond_epochs, is_fantasy=True) + + fantasies.append(new_model) + + return ModelList(*fantasies) + + def subset_output(self, output_indices): + # Create a deep copy of the model to avoid modifying the original + new_model = deepcopy(self) + new_model.output_indices = output_indices + + return new_model + + + def forward(self, X: Tensor) -> Tensor: + x = self.input_transform(X) + + preds = [] + for i in range(self.num_samples): + out = self.model(x, seed=i) + # Some of this shape logic to be checked -- probably need to modify for n-d + if out.ndim == 2: + out = out.unsqueeze(-1) + elif out.ndim == 1: + out = out.unsqueeze(-1).unsqueeze(-1) + + out = out.unsqueeze(1) + preds.append(out) + + samples = torch.cat(preds, dim=1) + + if self.output_indices is not None: + return samples[..., self.output_indices] + else: + return samples + + + def posterior(self, X: Tensor, **kwargs): + samples = self.forward(X) + + return FlattenedEnsemblePosterior(samples) + + def set_train_data(self, X = None, y = None, strict=False): + if X is None or y is None: + return + self.train_inputs = (X,) + self.train_targets = y \ No newline at end of file From 99b4cb9dfc01b1df885b350f0326f33e0d49c57e Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Mon, 8 Sep 2025 17:04:54 -0700 Subject: [PATCH 2/8] Change posterior normalization --- xopt/generators/bayesian/models/ensemble.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py index 61eef9306..1bc58e6a3 100644 --- a/xopt/generators/bayesian/models/ensemble.py +++ b/xopt/generators/bayesian/models/ensemble.py @@ -166,6 +166,7 @@ def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), y_s = y optimizer = optim_type(self.model.parameters(), lr=lr) + for epoch in range(n_epochs): pred = self.model(X_s) @@ -226,8 +227,12 @@ def forward(self, X: Tensor) -> Tensor: def posterior(self, X: Tensor, **kwargs): samples = self.forward(X) + if hasattr(self, "outcome_transform"): + samples = self.outcome_transform.untransform(samples)[0] + + posterior = FlattenedEnsemblePosterior(samples) - return FlattenedEnsemblePosterior(samples) + return posterior def set_train_data(self, X = None, y = None, strict=False): if X is None or y is None: From e88eee490845190890fc1153f9f635b9d9ce6004 Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Tue, 9 Sep 2025 13:58:44 -0700 Subject: [PATCH 3/8] Reshuffle some arguments --- xopt/generators/bayesian/models/ensemble.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py index 1bc58e6a3..8f095b079 100644 --- a/xopt/generators/bayesian/models/ensemble.py +++ b/xopt/generators/bayesian/models/ensemble.py @@ -137,7 +137,7 @@ class MCDropoutModel(Model): def __init__(self, input_dim: int, output_dim: int, dropout_rate: float = 0.1, num_samples: int = 30, n_hidden_layers=3, hidden_dim=100, hidden_activation=nn.ReLU(), output_activation=None, observables=['y1'], - input_bounds=torch.tensor([[-np.pi], [np.pi]])): + input_bounds=torch.tensor([[-np.pi], [np.pi]]), n_epochs=500, n_cond_epochs=40): super(MCDropoutModel, self).__init__() self.model = MCDropoutMLP(input_dim, output_dim, @@ -148,14 +148,19 @@ def __init__(self, input_dim: int, output_dim: int, dropout_rate: float = 0.1, output_activation=output_activation) self.num_samples = num_samples + self.n_epochs = n_epochs + self.n_cond_epochs = n_cond_epochs self.outcome_transform = Standardize(output_dim) self.input_transform = Normalize(input_dim, bounds = input_bounds) self.output_indices = None def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), - n_epochs = 500, optim_type = torch.optim.Adam, lr = 1e-3, is_fantasy=False) -> None: + n_epochs=None, optim_type = torch.optim.Adam, lr = 1e-3, is_fantasy=False) -> None: + if n_epochs is None: + n_epochs = self.n_epochs + self.model.train() # Seems like this is normalized already in InfoBAX calc? if not is_fantasy: @@ -181,14 +186,14 @@ def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), self.train_targets = y - def condition_on_observations(self, X, y, n_cond_epochs=40): + def condition_on_observations(self, X, y): fantasies = [] for i_batch in range(X.shape[0]): new_model = deepcopy(self) train_X = torch.cat([new_model.train_inputs[0], X[i_batch]], dim=0) train_y = torch.cat([new_model.train_targets, y[i_batch]], dim=0) - new_model.fit(train_X, train_y, n_epochs=n_cond_epochs, is_fantasy=True) + new_model.fit(train_X, train_y, n_epochs=self.n_cond_epochs, is_fantasy=True) fantasies.append(new_model) From 8f25e74b455c29a18f8aeba31af5da22f01c0efa Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Wed, 10 Sep 2025 18:00:01 -0700 Subject: [PATCH 4/8] Fixes for nd output --- xopt/generators/bayesian/bax/acquisition.py | 3 +++ xopt/generators/bayesian/models/ensemble.py | 7 +++---- xopt/generators/bayesian/utils.py | 19 +++++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py index bbce05021..509d57e88 100644 --- a/xopt/generators/bayesian/bax/acquisition.py +++ b/xopt/generators/bayesian/bax/acquisition.py @@ -125,5 +125,8 @@ def forward(self, X: Tensor) -> Tensor: # eq (4) of https://arxiv.org/pdf/2104.09460.pdf # (avg_h_fantasy is a Monte-Carlo estimate of the second term on the right) eig = h_current_scalar - avg_h_fantasy + + # mean over properties + eig = eig.mean(dim=-1, keepdim=True) return eig.reshape(X.shape[:-2]) diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py index 8f095b079..5e81e4712 100644 --- a/xopt/generators/bayesian/models/ensemble.py +++ b/xopt/generators/bayesian/models/ensemble.py @@ -37,7 +37,7 @@ def build_model( ): # get training data -- need to update utils for multivariate case train_X, train_Y, train_Yvar = get_training_data( - input_names, outcome_names[0], data + input_names, outcome_names, data ) self.model = self.model.to(dtype) @@ -213,9 +213,8 @@ def forward(self, X: Tensor) -> Tensor: preds = [] for i in range(self.num_samples): out = self.model(x, seed=i) - # Some of this shape logic to be checked -- probably need to modify for n-d - if out.ndim == 2: - out = out.unsqueeze(-1) + if out.ndim == 2: # add q dimension + out = out.unsqueeze(-2) elif out.ndim == 1: out = out.unsqueeze(-1).unsqueeze(-1) diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py index 137099d17..f6825c96b 100644 --- a/xopt/generators/bayesian/utils.py +++ b/xopt/generators/bayesian/utils.py @@ -9,7 +9,7 @@ def get_training_data( - input_names: List[str], outcome_name: str, data: pd.DataFrame + input_names: List[str], outcome_names: str, data: pd.DataFrame ) -> (torch.Tensor, torch.Tensor): """ Creates training data from input data frame. @@ -44,23 +44,26 @@ def get_training_data( """ input_data = data[input_names] - outcome_data = data[outcome_name] + outcome_data = data[outcome_names] # cannot use any rows where any variable values are nans non_nans = ~input_data.isnull().T.any() input_data = input_data[non_nans] outcome_data = outcome_data[non_nans] - train_X = torch.tensor(input_data[~outcome_data.isnull()].to_numpy(dtype="double")) + train_X = torch.tensor(input_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")) train_Y = torch.tensor( - outcome_data[~outcome_data.isnull()].to_numpy(dtype="double") - ).unsqueeze(-1) + outcome_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double") + ) + if train_Y.ndim < 2: + train_Y = train_Y.unsqueeze(-1) train_Yvar = None - if f"{outcome_name}_var" in data: - variance_data = data[f"{outcome_name}_var"][non_nans] + var_names = [f"{name}_var" for name in outcome_names if f"{name}_var" in data] + if len(var_names) > 0: + variance_data = data[var_names][non_nans] train_Yvar = torch.tensor( - variance_data[~outcome_data.isnull()].to_numpy(dtype="double") + variance_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double") ).unsqueeze(-1) return train_X, train_Y, train_Yvar From d6e33cbb9d72cafe931bc4518590c810429dd1fd Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Mon, 29 Sep 2025 09:35:20 -0700 Subject: [PATCH 5/8] Fix list comparison, update type hint --- xopt/generators/bayesian/bax/acquisition.py | 2 +- xopt/generators/bayesian/utils.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py index 509d57e88..81e6d8df9 100644 --- a/xopt/generators/bayesian/bax/acquisition.py +++ b/xopt/generators/bayesian/bax/acquisition.py @@ -39,7 +39,7 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None: # Need to call the model on some data before we can condition_on_observations - self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models]) + # self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models]) # construct a batch of size n_samples fantasy models, # where each fantasy model is produced by taking the model diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py index f6825c96b..e11adaaee 100644 --- a/xopt/generators/bayesian/utils.py +++ b/xopt/generators/bayesian/utils.py @@ -1,5 +1,5 @@ from copy import deepcopy -from typing import List +from typing import List, Union import numpy as np import pandas as pd @@ -9,7 +9,7 @@ def get_training_data( - input_names: List[str], outcome_names: str, data: pd.DataFrame + input_names: List[str], outcome_names: Union[str, List[str]], data: pd.DataFrame ) -> (torch.Tensor, torch.Tensor): """ Creates training data from input data frame. @@ -43,6 +43,9 @@ def get_training_data( """ + if type(outcome_names) != list: + outcome_names = [outcome_names] + input_data = data[input_names] outcome_data = data[outcome_names] From 920ffb23d2112c096f20f0d4e46748f2afe3a81d Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Mon, 29 Sep 2025 09:53:38 -0700 Subject: [PATCH 6/8] Add some comments --- xopt/generators/bayesian/bax/acquisition.py | 3 ++- xopt/generators/bayesian/models/ensemble.py | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py index 81e6d8df9..5928b32e1 100644 --- a/xopt/generators/bayesian/bax/acquisition.py +++ b/xopt/generators/bayesian/bax/acquisition.py @@ -39,7 +39,8 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None: # Need to call the model on some data before we can condition_on_observations - # self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models]) + # SG: removed this -- I don't think it's needed? + # self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models]) # construct a batch of size n_samples fantasy models, # where each fantasy model is produced by taking the model diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py index 5e81e4712..1e484d9aa 100644 --- a/xopt/generators/bayesian/models/ensemble.py +++ b/xopt/generators/bayesian/models/ensemble.py @@ -35,16 +35,18 @@ def build_model( dtype: torch.dtype = torch.float64, device: Union[torch.device, str] = "cpu", ): - # get training data -- need to update utils for multivariate case + # Get training data train_X, train_Y, train_Yvar = get_training_data( input_names, outcome_names, data ) self.model = self.model.to(dtype) + # Fit model on training data self.model.fit(train_X, train_Y) self.model = self.model.to(device) self.model.eval() + # Expected model list self.model.models = [self.model] return self.model @@ -57,8 +59,10 @@ def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, h hidden_activation=nn.ReLU(), output_activation=None): super().__init__() + # Build up model layers self.layers = nn.ModuleList() for i in range(n_hidden_layers): + # Input layer or hidden layers if i == 0: self.layers.append(nn.Linear(input_dim, hidden_dim)) else: @@ -67,7 +71,7 @@ def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, h self.layers.append(nn.Dropout(p=dropout_rate)) self.layers.append(hidden_activation) - + # Output layer if n_hidden_layers == 0: self.layers.append(nn.Linear(input_dim, output_dim)) else: @@ -79,11 +83,12 @@ def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, h def forward(self, input_data, seed=None): # Needed so that dropout works self.train() + # Seeding makes dropout self-consistent if seed is not None: torch.random.manual_seed(seed) for layer in self.layers: input_data = layer(input_data) - + self.eval() return input_data @@ -114,7 +119,7 @@ def mvn(self): mean = self.mean variance = self.variance - # Assumes diagonal gaussian for simplicity + # Assumes diagonal gaussian for simplicity -- not necessarily true, but probably good enough covar = torch.diag_embed(variance.squeeze(-1).squeeze(-1)) mvn = MultivariateNormal(mean.squeeze(-1).squeeze(-1), covariance_matrix=covar) @@ -140,6 +145,7 @@ def __init__(self, input_dim: int, output_dim: int, dropout_rate: float = 0.1, input_bounds=torch.tensor([[-np.pi], [np.pi]]), n_epochs=500, n_cond_epochs=40): super(MCDropoutModel, self).__init__() + # Construct model self.model = MCDropoutMLP(input_dim, output_dim, dropout_rate=dropout_rate, n_hidden_layers=n_hidden_layers, @@ -170,9 +176,9 @@ def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), X_s = X y_s = y + # Generic training loop -- may be worth exposing more/adding more hooks? optimizer = optim_type(self.model.parameters(), lr=lr) - for epoch in range(n_epochs): pred = self.model(X_s) loss = loss_fn(pred, y_s) @@ -202,10 +208,10 @@ def condition_on_observations(self, X, y): def subset_output(self, output_indices): # Create a deep copy of the model to avoid modifying the original new_model = deepcopy(self) + # Update output indices attribute (see forward) new_model.output_indices = output_indices return new_model - def forward(self, X: Tensor) -> Tensor: x = self.input_transform(X) From 12a32e3f53de3e0d94383c1a5780006c05398df6 Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Tue, 7 Oct 2025 15:14:09 -0700 Subject: [PATCH 7/8] Linting --- xopt/generators/bayesian/bax/acquisition.py | 7 +- xopt/generators/bayesian/models/ensemble.py | 137 ++++++++++++-------- xopt/generators/bayesian/utils.py | 8 +- 3 files changed, 92 insertions(+), 60 deletions(-) diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py index 3f11e85dc..b2f23d49d 100644 --- a/xopt/generators/bayesian/bax/acquisition.py +++ b/xopt/generators/bayesian/bax/acquisition.py @@ -37,7 +37,6 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None: self.algorithm_results, ) = self.algorithm.get_execution_paths(self.model, bounds) - # Need to call the model on some data before we can condition_on_observations self.model.posterior(self.xs_exe[:1, 0:1, 0:]) @@ -85,7 +84,7 @@ def forward(self, X: Tensor) -> Tensor: # calculcate the variance of the posterior for each input x post = self.model.posterior(X) - + # Some different shape handling with EnsemblePosterior -- maybe we can move this upstream? if isinstance(post, EnsemblePosterior): var_post = post.variance.unsqueeze(-1) @@ -93,7 +92,7 @@ def forward(self, X: Tensor) -> Tensor: # calculcate the variance of the fantasy posteriors fantasy_posts = self.fantasy_models.posterior(X) var_fantasy_posts = fantasy_posts.variance.unsqueeze(-1).unsqueeze(-1) - + else: var_post = post.variance @@ -125,7 +124,7 @@ def forward(self, X: Tensor) -> Tensor: # eq (4) of https://arxiv.org/pdf/2104.09460.pdf # (avg_h_fantasy is a Monte-Carlo estimate of the second term on the right) eig = h_current_scalar - avg_h_fantasy - + # mean over properties eig = eig.mean(dim=-1, keepdim=True) diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py index 1e484d9aa..6341b35d4 100644 --- a/xopt/generators/bayesian/models/ensemble.py +++ b/xopt/generators/bayesian/models/ensemble.py @@ -23,9 +23,10 @@ class EnsembleModelConstructor(ModelConstructor): """ Model constructor for ensemble estimates """ + name: str = Field("ensemble") model: Callable = Field(None, description="Ensemble model") - + def build_model( self, input_names: List[str], @@ -40,23 +41,33 @@ def build_model( input_names, outcome_names, data ) self.model = self.model.to(dtype) - + # Fit model on training data self.model.fit(train_X, train_Y) self.model = self.model.to(device) self.model.eval() - + # Expected model list self.model.models = [self.model] - + return self.model + class MCDropoutMLP(nn.Module): """ Generic multi-layer perceptron model with MC dropout """ - def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, hidden_dim=100, - hidden_activation=nn.ReLU(), output_activation=None): + + def __init__( + self, + input_dim, + output_dim, + dropout_rate=0.5, + n_hidden_layers=3, + hidden_dim=100, + hidden_activation=nn.ReLU(), + output_activation=None, + ): super().__init__() # Build up model layers @@ -67,7 +78,7 @@ def __init__(self, input_dim, output_dim, dropout_rate=0.5, n_hidden_layers=3, h self.layers.append(nn.Linear(input_dim, hidden_dim)) else: self.layers.append(nn.Linear(hidden_dim, hidden_dim)) - + self.layers.append(nn.Dropout(p=dropout_rate)) self.layers.append(hidden_activation) @@ -96,41 +107,42 @@ def forward(self, input_data, seed=None): class FlattenedEnsemblePosterior(EnsemblePosterior): """ Wrapper for EnsemblePosterior to make shapes do similar things - as GPyTorchPosterior. + as GPyTorchPosterior. """ + def rsample(self, sample_shape=torch.Size()): samples = super().rsample(sample_shape) # Flatten (q, m) into a single dimension like GP case q, m = samples.shape[-2:] return samples.view(*samples.shape[:-2], q * m) - + @property def mean(self): # Average over ensemble dimension return super().mean.mean(dim=1) - + @property def variance(self): # Average over ensemble dimension return super().variance.mean(dim=1) - + @property def mvn(self): mean = self.mean variance = self.variance - + # Assumes diagonal gaussian for simplicity -- not necessarily true, but probably good enough covar = torch.diag_embed(variance.squeeze(-1).squeeze(-1)) mvn = MultivariateNormal(mean.squeeze(-1).squeeze(-1), covariance_matrix=covar) - + return mvn - + class MCDropoutModel(Model): """ BoTorch model for MC dropout ensemble """ - + model: MCDropoutMLP num_samples: int input_dim: int @@ -139,34 +151,54 @@ class MCDropoutModel(Model): n_hidden_layers: int hidden_dim: int - def __init__(self, input_dim: int, output_dim: int, dropout_rate: float = 0.1, - num_samples: int = 30, n_hidden_layers=3, hidden_dim=100, - hidden_activation=nn.ReLU(), output_activation=None, observables=['y1'], - input_bounds=torch.tensor([[-np.pi], [np.pi]]), n_epochs=500, n_cond_epochs=40): - + def __init__( + self, + input_dim: int, + output_dim: int, + dropout_rate: float = 0.1, + num_samples: int = 30, + n_hidden_layers=3, + hidden_dim=100, + hidden_activation=nn.ReLU(), + output_activation=None, + observables=["y1"], + input_bounds=torch.tensor([[-np.pi], [np.pi]]), + n_epochs=500, + n_cond_epochs=40, + ): super(MCDropoutModel, self).__init__() # Construct model - self.model = MCDropoutMLP(input_dim, output_dim, - dropout_rate=dropout_rate, - n_hidden_layers=n_hidden_layers, - hidden_dim=hidden_dim, - hidden_activation=hidden_activation, - output_activation=output_activation) - + self.model = MCDropoutMLP( + input_dim, + output_dim, + dropout_rate=dropout_rate, + n_hidden_layers=n_hidden_layers, + hidden_dim=hidden_dim, + hidden_activation=hidden_activation, + output_activation=output_activation, + ) + self.num_samples = num_samples self.n_epochs = n_epochs self.n_cond_epochs = n_cond_epochs - + self.outcome_transform = Standardize(output_dim) - self.input_transform = Normalize(input_dim, bounds = input_bounds) + self.input_transform = Normalize(input_dim, bounds=input_bounds) self.output_indices = None - def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), - n_epochs=None, optim_type = torch.optim.Adam, lr = 1e-3, is_fantasy=False) -> None: - + def fit( + self, + X: Tensor, + y: Tensor, + loss_fn=torch.nn.MSELoss(), + n_epochs=None, + optim_type=torch.optim.Adam, + lr=1e-3, + is_fantasy=False, + ) -> None: if n_epochs is None: n_epochs = self.n_epochs - + self.model.train() # Seems like this is normalized already in InfoBAX calc? if not is_fantasy: @@ -175,36 +207,36 @@ def fit(self, X: Tensor, y: Tensor, loss_fn=torch.nn.MSELoss(), else: X_s = X y_s = y - + # Generic training loop -- may be worth exposing more/adding more hooks? optimizer = optim_type(self.model.parameters(), lr=lr) for epoch in range(n_epochs): pred = self.model(X_s) loss = loss_fn(pred, y_s) - + optimizer.zero_grad() loss.backward() optimizer.step() - - + self.train_inputs = (X,) self.train_targets = y - - + def condition_on_observations(self, X, y): fantasies = [] for i_batch in range(X.shape[0]): new_model = deepcopy(self) train_X = torch.cat([new_model.train_inputs[0], X[i_batch]], dim=0) train_y = torch.cat([new_model.train_targets, y[i_batch]], dim=0) - - new_model.fit(train_X, train_y, n_epochs=self.n_cond_epochs, is_fantasy=True) - + + new_model.fit( + train_X, train_y, n_epochs=self.n_cond_epochs, is_fantasy=True + ) + fantasies.append(new_model) - + return ModelList(*fantasies) - + def subset_output(self, output_indices): # Create a deep copy of the model to avoid modifying the original new_model = deepcopy(self) @@ -215,16 +247,16 @@ def subset_output(self, output_indices): def forward(self, X: Tensor) -> Tensor: x = self.input_transform(X) - + preds = [] for i in range(self.num_samples): out = self.model(x, seed=i) - if out.ndim == 2: # add q dimension + if out.ndim == 2: # add q dimension out = out.unsqueeze(-2) elif out.ndim == 1: out = out.unsqueeze(-1).unsqueeze(-1) - - out = out.unsqueeze(1) + + out = out.unsqueeze(1) preds.append(out) samples = torch.cat(preds, dim=1) @@ -233,19 +265,18 @@ def forward(self, X: Tensor) -> Tensor: return samples[..., self.output_indices] else: return samples - def posterior(self, X: Tensor, **kwargs): samples = self.forward(X) if hasattr(self, "outcome_transform"): samples = self.outcome_transform.untransform(samples)[0] - + posterior = FlattenedEnsemblePosterior(samples) return posterior - - def set_train_data(self, X = None, y = None, strict=False): + + def set_train_data(self, X=None, y=None, strict=False): if X is None or y is None: return self.train_inputs = (X,) - self.train_targets = y \ No newline at end of file + self.train_targets = y diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py index 9e1b47ffc..94da4b8fb 100644 --- a/xopt/generators/bayesian/utils.py +++ b/xopt/generators/bayesian/utils.py @@ -51,9 +51,9 @@ def get_training_data( """ - if type(outcome_names) != list: + if type(outcome_names) is not list: outcome_names = [outcome_names] - + input_data = data[input_names] outcome_data = data[outcome_names] @@ -62,7 +62,9 @@ def get_training_data( input_data = input_data[non_nans] outcome_data = outcome_data[non_nans] - train_X = torch.tensor(input_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")) + train_X = torch.tensor( + input_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double") + ) train_Y = torch.tensor( outcome_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double") ) From 38351a31c91b2400f99567a701c953b61666c4ad Mon Sep 17 00:00:00 2001 From: Sean Gasiorowski Date: Tue, 7 Oct 2025 17:03:41 -0700 Subject: [PATCH 8/8] Remove extraneous unsqueeze --- xopt/generators/bayesian/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py index 94da4b8fb..014447cd7 100644 --- a/xopt/generators/bayesian/utils.py +++ b/xopt/generators/bayesian/utils.py @@ -77,7 +77,7 @@ def get_training_data( variance_data = data[var_names][non_nans] train_Yvar = torch.tensor( variance_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double") - ).unsqueeze(-1) + ) return train_X, train_Y, train_Yvar