diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py
index 2672b3023..b2f23d49d 100644
--- a/xopt/generators/bayesian/bax/acquisition.py
+++ b/xopt/generators/bayesian/bax/acquisition.py
@@ -3,6 +3,7 @@
     MultiObjectiveAnalyticAcquisitionFunction,
 )
 from botorch.models.model import Model, ModelList
+from botorch.posteriors.ensemble import EnsemblePosterior
 from botorch.utils.transforms import t_batch_mode_transform
 from torch import Tensor
 
@@ -83,17 +84,27 @@ def forward(self, X: Tensor) -> Tensor:
 
         # calculcate the variance of the posterior for each input x
         post = self.model.posterior(X)
-        var_post = post.variance
 
-        # calculcate the variance of the fantasy posteriors
-        fantasy_posts = self.fantasy_models.posterior(
-            (
-                X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand(
-                    *X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:]
+        # Some different shape handling with EnsemblePosterior -- maybe we can move this upstream?
+        if isinstance(post, EnsemblePosterior):
+            var_post = post.variance.unsqueeze(-1)
+
+            # calculcate the variance of the fantasy posteriors
+            fantasy_posts = self.fantasy_models.posterior(X)
+            var_fantasy_posts = fantasy_posts.variance.unsqueeze(-1).unsqueeze(-1)
+
+        else:
+            var_post = post.variance
+
+            # calculcate the variance of the fantasy posteriors
+            fantasy_posts = self.fantasy_models.posterior(
+                (
+                    X.reshape(*X.shape[:-2], 1, *X.shape[-2:]).expand(
+                        *X.shape[:-2], self.xs_exe.shape[0], *X.shape[-2:]
+                    )
                 )
             )
-        )
-        var_fantasy_posts = fantasy_posts.variance
+            var_fantasy_posts = fantasy_posts.variance
 
         # calculate Shannon entropy for posterior given the current data
         h_current = 0.5 * torch.log(2 * torch.pi * var_post) + 0.5
@@ -114,4 +125,7 @@ def forward(self, X: Tensor) -> Tensor:
         # (avg_h_fantasy is a Monte-Carlo estimate of the second term on the right)
         eig = h_current_scalar - avg_h_fantasy
 
+        # mean over properties
+        eig = eig.mean(dim=-1, keepdim=True)
+
         return eig.reshape(X.shape[:-2])
diff --git a/xopt/generators/bayesian/models/ensemble.py b/xopt/generators/bayesian/models/ensemble.py
new file mode 100644
index 000000000..6341b35d4
--- /dev/null
+++ b/xopt/generators/bayesian/models/ensemble.py
@@ -0,0 +1,282 @@
+from botorch.models.model import Model, ModelList
+from botorch.models.transforms import Normalize, Standardize
+from botorch.posteriors.ensemble import EnsemblePosterior
+
+from gpytorch.distributions import MultivariateNormal
+
+import torch
+from torch import Tensor
+from torch import nn
+
+import numpy as np
+from copy import deepcopy
+
+from xopt.generators.bayesian.base_model import ModelConstructor
+from xopt.generators.bayesian.utils import get_training_data
+
+from pydantic import Field
+from typing import List, Callable, Dict, Union
+import pandas as pd
+
+
+class EnsembleModelConstructor(ModelConstructor):
+    """
+    Model constructor for ensemble estimates
+    """
+
+    name: str = Field("ensemble")
+    model: Callable = Field(None, description="Ensemble model")
+
+    def build_model(
+        self,
+        input_names: List[str],
+        outcome_names: List[str],
+        data: pd.DataFrame,
+        input_bounds: Dict[str, List] = None,
+        dtype: torch.dtype = torch.float64,
+        device: Union[torch.device, str] = "cpu",
+    ):
+        # Get training data
+        train_X, train_Y, train_Yvar = get_training_data(
+            input_names, outcome_names, data
+        )
+        self.model = self.model.to(dtype)
+
+        # Fit model on training data
+        self.model.fit(train_X, train_Y)
+        self.model = self.model.to(device)
+        self.model.eval()
+
+        # Expected model list
+        self.model.models = [self.model]
+
+        return self.model
+
+
+class MCDropoutMLP(nn.Module):
+    """
+    Generic multi-layer perceptron model with MC dropout
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+        dropout_rate=0.5,
+        n_hidden_layers=3,
+        hidden_dim=100,
+        hidden_activation=nn.ReLU(),
+        output_activation=None,
+    ):
+        super().__init__()
+
+        # Build up model layers
+        self.layers = nn.ModuleList()
+        for i in range(n_hidden_layers):
+            # Input layer or hidden layers
+            if i == 0:
+                self.layers.append(nn.Linear(input_dim, hidden_dim))
+            else:
+                self.layers.append(nn.Linear(hidden_dim, hidden_dim))
+
+            self.layers.append(nn.Dropout(p=dropout_rate))
+            self.layers.append(hidden_activation)
+
+        # Output layer
+        if n_hidden_layers == 0:
+            self.layers.append(nn.Linear(input_dim, output_dim))
+        else:
+            self.layers.append(nn.Linear(hidden_dim, output_dim))
+
+        if output_activation is not None:
+            self.layers.append(output_activation)
+
+    def forward(self, input_data, seed=None):
+        # Needed so that dropout works
+        self.train()
+        # Seeding makes dropout self-consistent
+        if seed is not None:
+            torch.random.manual_seed(seed)
+        for layer in self.layers:
+            input_data = layer(input_data)
+
+        self.eval()
+        return input_data
+
+
+class FlattenedEnsemblePosterior(EnsemblePosterior):
+    """
+    Wrapper for EnsemblePosterior to make shapes do similar things
+    as GPyTorchPosterior.
+    """
+
+    def rsample(self, sample_shape=torch.Size()):
+        samples = super().rsample(sample_shape)
+        # Flatten (q, m) into a single dimension like GP case
+        q, m = samples.shape[-2:]
+        return samples.view(*samples.shape[:-2], q * m)
+
+    @property
+    def mean(self):
+        # Average over ensemble dimension
+        return super().mean.mean(dim=1)
+
+    @property
+    def variance(self):
+        # Average over ensemble dimension
+        return super().variance.mean(dim=1)
+
+    @property
+    def mvn(self):
+        mean = self.mean
+        variance = self.variance
+
+        # Assumes diagonal gaussian for simplicity -- not necessarily true, but probably good enough
+        covar = torch.diag_embed(variance.squeeze(-1).squeeze(-1))
+        mvn = MultivariateNormal(mean.squeeze(-1).squeeze(-1), covariance_matrix=covar)
+
+        return mvn
+
+
+class MCDropoutModel(Model):
+    """
+    BoTorch model for MC dropout ensemble
+    """
+
+    model: MCDropoutMLP
+    num_samples: int
+    input_dim: int
+    output_dim: int
+    dropout_rate: float
+    n_hidden_layers: int
+    hidden_dim: int
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        dropout_rate: float = 0.1,
+        num_samples: int = 30,
+        n_hidden_layers=3,
+        hidden_dim=100,
+        hidden_activation=nn.ReLU(),
+        output_activation=None,
+        observables=["y1"],
+        input_bounds=torch.tensor([[-np.pi], [np.pi]]),
+        n_epochs=500,
+        n_cond_epochs=40,
+    ):
+        super(MCDropoutModel, self).__init__()
+        # Construct model
+        self.model = MCDropoutMLP(
+            input_dim,
+            output_dim,
+            dropout_rate=dropout_rate,
+            n_hidden_layers=n_hidden_layers,
+            hidden_dim=hidden_dim,
+            hidden_activation=hidden_activation,
+            output_activation=output_activation,
+        )
+
+        self.num_samples = num_samples
+        self.n_epochs = n_epochs
+        self.n_cond_epochs = n_cond_epochs
+
+        self.outcome_transform = Standardize(output_dim)
+        self.input_transform = Normalize(input_dim, bounds=input_bounds)
+        self.output_indices = None
+
+    def fit(
+        self,
+        X: Tensor,
+        y: Tensor,
+        loss_fn=torch.nn.MSELoss(),
+        n_epochs=None,
+        optim_type=torch.optim.Adam,
+        lr=1e-3,
+        is_fantasy=False,
+    ) -> None:
+        if n_epochs is None:
+            n_epochs = self.n_epochs
+
+        self.model.train()
+        # Seems like this is normalized already in InfoBAX calc?
+        if not is_fantasy:
+            X_s = self.input_transform(X)
+            y_s = self.outcome_transform(y)[0]
+        else:
+            X_s = X
+            y_s = y
+
+        # Generic training loop -- may be worth exposing more/adding more hooks?
+        optimizer = optim_type(self.model.parameters(), lr=lr)
+
+        for epoch in range(n_epochs):
+            pred = self.model(X_s)
+            loss = loss_fn(pred, y_s)
+
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            self.train_inputs = (X,)
+            self.train_targets = y
+
+    def condition_on_observations(self, X, y):
+        fantasies = []
+        for i_batch in range(X.shape[0]):
+            new_model = deepcopy(self)
+            train_X = torch.cat([new_model.train_inputs[0], X[i_batch]], dim=0)
+            train_y = torch.cat([new_model.train_targets, y[i_batch]], dim=0)
+
+            new_model.fit(
+                train_X, train_y, n_epochs=self.n_cond_epochs, is_fantasy=True
+            )
+
+            fantasies.append(new_model)
+
+        return ModelList(*fantasies)
+
+    def subset_output(self, output_indices):
+        # Create a deep copy of the model to avoid modifying the original
+        new_model = deepcopy(self)
+        # Update output indices attribute (see forward)
+        new_model.output_indices = output_indices
+
+        return new_model
+
+    def forward(self, X: Tensor) -> Tensor:
+        x = self.input_transform(X)
+
+        preds = []
+        for i in range(self.num_samples):
+            out = self.model(x, seed=i)
+            if out.ndim == 2:  # add q dimension
+                out = out.unsqueeze(-2)
+            elif out.ndim == 1:
+                out = out.unsqueeze(-1).unsqueeze(-1)
+
+            out = out.unsqueeze(1)
+            preds.append(out)
+
+        samples = torch.cat(preds, dim=1)
+
+        if self.output_indices is not None:
+            return samples[..., self.output_indices]
+        else:
+            return samples
+
+    def posterior(self, X: Tensor, **kwargs):
+        samples = self.forward(X)
+        if hasattr(self, "outcome_transform"):
+            samples = self.outcome_transform.untransform(samples)[0]
+
+        posterior = FlattenedEnsemblePosterior(samples)
+
+        return posterior
+
+    def set_train_data(self, X=None, y=None, strict=False):
+        if X is None or y is None:
+            return
+        self.train_inputs = (X,)
+        self.train_targets = y
diff --git a/xopt/generators/bayesian/utils.py b/xopt/generators/bayesian/utils.py
index f154755ee..014447cd7 100644
--- a/xopt/generators/bayesian/utils.py
+++ b/xopt/generators/bayesian/utils.py
@@ -1,6 +1,6 @@
 from contextlib import nullcontext
 from copy import deepcopy
-from typing import Any, List
+from typing import Any, List, Union
 
 import gpytorch
 import numpy as np
@@ -17,7 +17,7 @@
 
 
 def get_training_data(
-    input_names: List[str], outcome_name: str, data: pd.DataFrame
+    input_names: List[str], outcome_names: Union[str, List[str]], data: pd.DataFrame
 ) -> (torch.Tensor, torch.Tensor):
     """
     Creates training data from input data frame.
@@ -51,25 +51,33 @@ def get_training_data(
 
     """
 
+    if type(outcome_names) is not list:
+        outcome_names = [outcome_names]
+
     input_data = data[input_names]
-    outcome_data = data[outcome_name]
+    outcome_data = data[outcome_names]
 
     # cannot use any rows where any variable values are nans
     non_nans = ~input_data.isnull().T.any()
     input_data = input_data[non_nans]
     outcome_data = outcome_data[non_nans]
 
-    train_X = torch.tensor(input_data[~outcome_data.isnull()].to_numpy(dtype="double"))
+    train_X = torch.tensor(
+        input_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
+    )
     train_Y = torch.tensor(
-        outcome_data[~outcome_data.isnull()].to_numpy(dtype="double")
-    ).unsqueeze(-1)
+        outcome_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
+    )
+    if train_Y.ndim < 2:
+        train_Y = train_Y.unsqueeze(-1)
 
     train_Yvar = None
-    if f"{outcome_name}_var" in data:
-        variance_data = data[f"{outcome_name}_var"][non_nans]
+    var_names = [f"{name}_var" for name in outcome_names if f"{name}_var" in data]
+    if len(var_names) > 0:
+        variance_data = data[var_names][non_nans]
         train_Yvar = torch.tensor(
-            variance_data[~outcome_data.isnull()].to_numpy(dtype="double")
-        ).unsqueeze(-1)
+            variance_data[~outcome_data.isnull().T.any()].to_numpy(dtype="double")
+        )
 
     return train_X, train_Y, train_Yvar