Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions xopt/generators/bayesian/bax/acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
MultiObjectiveAnalyticAcquisitionFunction,
)
from botorch.models.model import Model, ModelList
from botorch.posteriors.ensemble import EnsemblePosterior
from botorch.utils.transforms import t_batch_mode_transform
from torch import Tensor

Expand Down Expand Up @@ -83,17 +84,27 @@ def forward(self, X: Tensor) -> Tensor:

# calculcate the variance of the posterior for each input x
post = self.model.posterior(X)
var_post = post.variance

# calculcate the variance of the fantasy posteriors
fantasy_posts = self.fantasy_models.posterior(
(
X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand(
*X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:]
# Some different shape handling with EnsemblePosterior -- maybe we can move this upstream?
if isinstance(post, EnsemblePosterior):
var_post = post.variance.unsqueeze(-1)

# calculcate the variance of the fantasy posteriors
fantasy_posts = self.fantasy_models.posterior(X)
var_fantasy_posts = fantasy_posts.variance.unsqueeze(-1).unsqueeze(-1)

else:
var_post = post.variance

# calculcate the variance of the fantasy posteriors
fantasy_posts = self.fantasy_models.posterior(
(
X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand(
*X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:]
)
)
)
)
var_fantasy_posts = fantasy_posts.variance
var_fantasy_posts = fantasy_posts.variance

# calculate Shannon entropy for posterior given the current data
h_current = 0.5 * torch.log(2 * torch.pi * var_post) + 0.5
Expand All @@ -114,4 +125,7 @@ def forward(self, X: Tensor) -> Tensor:
# (avg_h_fantasy is a Monte-Carlo estimate of the second term on the right)
eig = h_current_scalar - avg_h_fantasy

# mean over properties
eig = eig.mean(dim=-1, keepdim=True)

return eig.reshape(X.shape[:-2])
282 changes: 282 additions & 0 deletions xopt/generators/bayesian/models/ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
from botorch.models.model import Model, ModelList
from botorch.models.transforms import Normalize, Standardize
from botorch.posteriors.ensemble import EnsemblePosterior

from gpytorch.distributions import MultivariateNormal

import torch
from torch import Tensor
from torch import nn

import numpy as np
from copy import deepcopy

from xopt.generators.bayesian.base_model import ModelConstructor
from xopt.generators.bayesian.utils import get_training_data

from pydantic import Field
from typing import List, Callable, Dict, Union
import pandas as pd


class EnsembleModelConstructor(ModelConstructor):
"""
Model constructor for ensemble estimates
"""

name: str = Field("ensemble")
model: Callable = Field(None, description="Ensemble model")

def build_model(
self,
input_names: List[str],
outcome_names: List[str],
data: pd.DataFrame,
input_bounds: Dict[str, List] = None,
dtype: torch.dtype = torch.float64,
device: Union[torch.device, str] = "cpu",
):
# Get training data
train_X, train_Y, train_Yvar = get_training_data(
input_names, outcome_names, data
)
self.model = self.model.to(dtype)

# Fit model on training data
self.model.fit(train_X, train_Y)
self.model = self.model.to(device)
self.model.eval()

# Expected model list
self.model.models = [self.model]

return self.model


class MCDropoutMLP(nn.Module):
"""
Generic multi-layer perceptron model with MC dropout
"""

def __init__(
self,
input_dim,
output_dim,
dropout_rate=0.5,
n_hidden_layers=3,
hidden_dim=100,
hidden_activation=nn.ReLU(),
output_activation=None,
):
super().__init__()

# Build up model layers
self.layers = nn.ModuleList()
for i in range(n_hidden_layers):
# Input layer or hidden layers
if i == 0:
self.layers.append(nn.Linear(input_dim, hidden_dim))
else:
self.layers.append(nn.Linear(hidden_dim, hidden_dim))

self.layers.append(nn.Dropout(p=dropout_rate))
self.layers.append(hidden_activation)

# Output layer
if n_hidden_layers == 0:
self.layers.append(nn.Linear(input_dim, output_dim))
else:
self.layers.append(nn.Linear(hidden_dim, output_dim))

if output_activation is not None:
self.layers.append(output_activation)

def forward(self, input_data, seed=None):
# Needed so that dropout works
self.train()
# Seeding makes dropout self-consistent
if seed is not None:
torch.random.manual_seed(seed)
for layer in self.layers:
input_data = layer(input_data)

self.eval()
return input_data


class FlattenedEnsemblePosterior(EnsemblePosterior):
"""
Wrapper for EnsemblePosterior to make shapes do similar things
as GPyTorchPosterior.
"""

def rsample(self, sample_shape=torch.Size()):
samples = super().rsample(sample_shape)
# Flatten (q, m) into a single dimension like GP case
q, m = samples.shape[-2:]
return samples.view(*samples.shape[:-2], q * m)

@property
def mean(self):
# Average over ensemble dimension
return super().mean.mean(dim=1)

@property
def variance(self):
# Average over ensemble dimension
return super().variance.mean(dim=1)

@property
def mvn(self):
mean = self.mean
variance = self.variance

# Assumes diagonal gaussian for simplicity -- not necessarily true, but probably good enough
covar = torch.diag_embed(variance.squeeze(-1).squeeze(-1))
mvn = MultivariateNormal(mean.squeeze(-1).squeeze(-1), covariance_matrix=covar)

return mvn


class MCDropoutModel(Model):
"""
BoTorch model for MC dropout ensemble
"""

model: MCDropoutMLP
num_samples: int
input_dim: int
output_dim: int
dropout_rate: float
n_hidden_layers: int
hidden_dim: int

def __init__(
self,
input_dim: int,
output_dim: int,
dropout_rate: float = 0.1,
num_samples: int = 30,
n_hidden_layers=3,
hidden_dim=100,
hidden_activation=nn.ReLU(),
output_activation=None,
observables=["y1"],
input_bounds=torch.tensor([[-np.pi], [np.pi]]),
n_epochs=500,
n_cond_epochs=40,
):
super(MCDropoutModel, self).__init__()
# Construct model
self.model = MCDropoutMLP(
input_dim,
output_dim,
dropout_rate=dropout_rate,
n_hidden_layers=n_hidden_layers,
hidden_dim=hidden_dim,
hidden_activation=hidden_activation,
output_activation=output_activation,
)

self.num_samples = num_samples
self.n_epochs = n_epochs
self.n_cond_epochs = n_cond_epochs

self.outcome_transform = Standardize(output_dim)
self.input_transform = Normalize(input_dim, bounds=input_bounds)
self.output_indices = None

def fit(
self,
X: Tensor,
y: Tensor,
loss_fn=torch.nn.MSELoss(),
n_epochs=None,
optim_type=torch.optim.Adam,
lr=1e-3,
is_fantasy=False,
) -> None:
if n_epochs is None:
n_epochs = self.n_epochs

self.model.train()
# Seems like this is normalized already in InfoBAX calc?
if not is_fantasy:
X_s = self.input_transform(X)
y_s = self.outcome_transform(y)[0]
else:
X_s = X
y_s = y

# Generic training loop -- may be worth exposing more/adding more hooks?
optimizer = optim_type(self.model.parameters(), lr=lr)

for epoch in range(n_epochs):
pred = self.model(X_s)
loss = loss_fn(pred, y_s)

optimizer.zero_grad()
loss.backward()
optimizer.step()

self.train_inputs = (X,)
self.train_targets = y

def condition_on_observations(self, X, y):
fantasies = []
for i_batch in range(X.shape[0]):
new_model = deepcopy(self)
train_X = torch.cat([new_model.train_inputs[0], X[i_batch]], dim=0)
train_y = torch.cat([new_model.train_targets, y[i_batch]], dim=0)

new_model.fit(
train_X, train_y, n_epochs=self.n_cond_epochs, is_fantasy=True
)

fantasies.append(new_model)

return ModelList(*fantasies)

def subset_output(self, output_indices):
# Create a deep copy of the model to avoid modifying the original
new_model = deepcopy(self)
# Update output indices attribute (see forward)
new_model.output_indices = output_indices

return new_model

def forward(self, X: Tensor) -> Tensor:
x = self.input_transform(X)

preds = []
for i in range(self.num_samples):
out = self.model(x, seed=i)
if out.ndim == 2: # add q dimension
out = out.unsqueeze(-2)
elif out.ndim == 1:
out = out.unsqueeze(-1).unsqueeze(-1)

out = out.unsqueeze(1)
preds.append(out)

samples = torch.cat(preds, dim=1)

if self.output_indices is not None:
return samples[..., self.output_indices]
else:
return samples

def posterior(self, X: Tensor, **kwargs):
samples = self.forward(X)
if hasattr(self, "outcome_transform"):
samples = self.outcome_transform.untransform(samples)[0]

posterior = FlattenedEnsemblePosterior(samples)

return posterior

def set_train_data(self, X=None, y=None, strict=False):
if X is None or y is None:
return
self.train_inputs = (X,)
self.train_targets = y
28 changes: 18 additions & 10 deletions xopt/generators/bayesian/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from contextlib import nullcontext
from copy import deepcopy
from typing import Any, List
from typing import Any, List, Union

import gpytorch
import numpy as np
Expand All @@ -17,7 +17,7 @@


def get_training_data(
input_names: List[str], outcome_name: str, data: pd.DataFrame
input_names: List[str], outcome_names: Union[str, List[str]], data: pd.DataFrame
) -> (torch.Tensor, torch.Tensor):
"""
Creates training data from input data frame.
Expand Down Expand Up @@ -51,25 +51,33 @@ def get_training_data(

"""

if type(outcome_names) is not list:
outcome_names = [outcome_names]

input_data = data[input_names]
outcome_data = data[outcome_name]
outcome_data = data[outcome_names]

# cannot use any rows where any variable values are nans
non_nans = ~input_data.isnull().T.any()
input_data = input_data[non_nans]
outcome_data = outcome_data[non_nans]

train_X = torch.tensor(input_data[~outcome_data.isnull()].to_numpy(dtype="double"))
train_X = torch.tensor(
input_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
)
train_Y = torch.tensor(
outcome_data[~outcome_data.isnull()].to_numpy(dtype="double")
).unsqueeze(-1)
outcome_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
)
if train_Y.ndim < 2:
train_Y = train_Y.unsqueeze(-1)

train_Yvar = None
if f"{outcome_name}_var" in data:
variance_data = data[f"{outcome_name}_var"][non_nans]
var_names = [f"{name}_var" for name in outcome_names if f"{name}_var" in data]
if len(var_names) > 0:
variance_data = data[var_names][non_nans]
train_Yvar = torch.tensor(
variance_data[~outcome_data.isnull()].to_numpy(dtype="double")
).unsqueeze(-1)
variance_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
)

return train_X, train_Y, train_Yvar

Expand Down
Loading