From d365a94652abe47c73a26b5b1462d2720352a086 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 11 Sep 2024 14:56:07 -0500
Subject: [PATCH 01/27] Added gradient capability

---
 espei/error_functions/activity_error.py | 66 ++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
index a2d5e239..080187a7 100644
--- a/espei/error_functions/activity_error.py
+++ b/espei/error_functions/activity_error.py
@@ -19,6 +19,7 @@
 from pycalphad.core.utils import filter_phases, unpack_species
 from scipy.stats import norm
 from pycalphad import Workspace
+from pycalphad.property_framework import JanssonDerivative
 
 from espei.core_utils import ravel_conditions
 from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
@@ -29,7 +30,7 @@
 _log = logging.getLogger(__name__)
 
 
-def target_chempots_from_activity(component, target_activity, temperatures, wks_ref):
+def target_chempots_from_activity(component, parameters, target_activity, temperatures, wks_ref):
     """
     Return an array of experimental chemical potentials for the component
 
@@ -53,11 +54,18 @@ def target_chempots_from_activity(component, target_activity, temperatures, wks_
     # so mu_i = R*T*ln(acr_i) + mu_i^{ref}
     ref_chempot = wks_ref.get(v.MU(component))
     exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot
-    return exp_chem_pots
+    
+    gradient_props = [JanssonDerivative(v.MU(component), key) for key in parameters]
+    gradients = wks_ref.get(*gradient_props)
+    if type(gradients) is list:
+        ref_grads = [float(element) for element in gradients]
+    else:
+        ref_grads = gradients
+    return exp_chem_pots, ref_grads
 
 
 # TODO: roll this function into ActivityResidual
-def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, phase_models=None, callables=None, data_weight=1.0) -> Tuple[List[float], List[float]]:
+def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, phase_models=None, callables=None, data_weight=1.0) -> Tuple[List[float], List[float], List[float]]:
     """
     Notes
     -----
@@ -75,12 +83,23 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
     if parameters is None:
         parameters = {}
 
+    params_keys = []
+
+    # XXX: This mutates the global pycalphad namespace
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        # XXX: Mutates argument to function
+        dbf.symbols.pop(key,None)
+        
     activity_datasets = datasets.search(
         (tinydb.where('output').test(lambda x: 'ACR' in x)) &
         (tinydb.where('components').test(lambda x: set(x).issubset(comps))))
 
     residuals = []
     weights = []
+    gradients = []
     for ds in activity_datasets:
         acr_component = ds['output'].split('_')[1]  # the component of interest
         # calculate the reference state equilibrium
@@ -90,7 +109,9 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         data_comps = ds['components']
         data_phases = filter_phases(dbf, unpack_species(dbf, data_comps), candidate_phases=phases)
         ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()}
-        wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters)
+        # removed parameter assignment from wks_ref
+        ref_conditions.update(parameters)
+        wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions)
 
         # calculate current chemical potentials
         # get the conditions
@@ -102,6 +123,7 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         # we will ravel each composition individually, since they all must have the same shape
         dataset_computed_chempots = []
         dataset_weights = []
+        dataset_gradients = []
         for comp_name, comp_x in conds_list:
             P, T, X = ravel_conditions(ds['values'], ds['conditions']['P'], ds['conditions']['T'], comp_x)
             conditions[v.P] = P
@@ -113,22 +135,34 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         # assume now that the ravelled conditions all have the same size
         conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))]
         for conds in conditions_list:
-            wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds, parameters=parameters)
+            conds.update(parameters)
+            wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds)
             dataset_computed_chempots.append(wks_sample.get(v.MU(acr_component)))
             dataset_weights.append(std_dev / data_weight / ds.get("weight", 1.0))
+            gradient_props = [JanssonDerivative(v.MU(acr_component), key) for key in parameters]
+            grads = wks_sample.get(*gradient_props)
+            if type(grads) is list:
+                sample_grads = [float(element) for element in grads]
+            else:
+                sample_grads = grads
+            dataset_gradients.append(sample_grads)
 
         # calculate target chempots
         dataset_activities = np.array(ds['values']).flatten()
-        dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], wks_ref)
+        dataset_target_chempots, ref_grads = target_chempots_from_activity(acr_component, parameters, dataset_activities, conditions[v.T], wks_ref)
         dataset_residuals = (np.asarray(dataset_computed_chempots) - np.asarray(dataset_target_chempots, dtype=float)).tolist()
+        adjusted_gradient = []
+        for element in dataset_gradients:
+            adjusted_gradient.append((np.asarray(element) - np.asarray(ref_grads)).tolist())
         _log.debug('Data: %s, chemical potential difference: %s, reference: %s', dataset_activities, dataset_residuals, ds["reference"])
         residuals.extend(dataset_residuals)
         weights.extend(dataset_weights)
-    return residuals, weights
+        gradients.append(adjusted_gradient)
+    return residuals, weights, gradients
 
 
 # TODO: roll this function into ActivityResidual
-def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phase_models=None, callables=None, data_weight=1.0) -> float:
+def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phase_models=None, callables=None, data_weight=1.0) -> Tuple[float, List[float]]:
     """
     Return the sum of square error from activity data
 
@@ -160,14 +194,20 @@ def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phas
 
 
     """
-    residuals, weights = calculate_activity_residuals(dbf, comps, phases, datasets, parameters=parameters, phase_models=phase_models, callables=callables, data_weight=data_weight)
+    residuals, weights, gradients = calculate_activity_residuals(dbf, comps, phases, datasets, parameters=parameters, phase_models=phase_models, callables=callables, data_weight=data_weight)
     likelihood = np.sum(norm(0, scale=weights).logpdf(residuals))
+    gradients = np.concatenate(gradients)
+    derivative = -np.array(residuals)*np.array(gradients).T/np.array(weights)**2
+    if derivative.ndim == 1:
+        likelihood_grads = np.sum(-np.array(residuals)*np.array(gradients).T/np.array(weights)**2, axis=0)
+    else:
+        likelihood_grads = np.sum(-np.array(residuals)*np.array(gradients).T/np.array(weights)**2, axis=1)
     if np.isnan(likelihood):
         # TODO: revisit this case and evaluate whether it is resonable for NaN
         # to show up here. When this comment was written, the test
         # test_subsystem_activity_probability would trigger a NaN.
         return -np.inf
-    return likelihood
+    return likelihood, likelihood_grads
 
 
 # TODO: the __init__ method should pre-compute Model and PhaseRecord objects
@@ -218,10 +258,10 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl
         residuals, weights = calculate_activity_residuals(parameters=parameters, **self._activity_likelihood_kwargs)
         return residuals, weights
 
-    def get_likelihood(self, parameters: npt.NDArray) -> float:
+    def get_likelihood(self, parameters: npt.NDArray) -> Tuple[float, List[float]]:
         parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())}
-        likelihood = calculate_activity_error(parameters=parameters, **self._activity_likelihood_kwargs)
-        return likelihood
+        likelihood, gradients = calculate_activity_error(parameters=parameters, **self._activity_likelihood_kwargs)
+        return likelihood, gradients
 
 
 residual_function_registry.register(ActivityResidual)

From a3f120550e42663d16de0f8ed77210c0b863eea5 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 11 Sep 2024 14:56:59 -0500
Subject: [PATCH 02/27] Added gradient capability

---
 .../equilibrium_thermochemical_error.py       | 64 ++++++++++++++-----
 1 file changed, 48 insertions(+), 16 deletions(-)

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
index 01d31e71..449a5eb7 100644
--- a/espei/error_functions/equilibrium_thermochemical_error.py
+++ b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -15,6 +15,7 @@
 from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
 from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
 from pycalphad import Workspace, as_property
+from pycalphad.property_framework import JanssonDerivative
 
 from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
 from espei.phase_models import PhaseModelSpecification
@@ -74,12 +75,23 @@ def build_eqpropdata(data: tinydb.database.Document,
         'CPM':  0.2,  # J/K-mol
     }
 
-    params_keys, _ = extract_parameters(parameters)
+    #params_keys, _ = extract_parameters(parameters)
+    
+    params_keys = []
+
+    # XXX: This mutates the global pycalphad namespace
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        # XXX: Mutates argument to function
+        dbf.symbols.pop(key,None)
 
     data_comps = list(set(data['components']).union({'VA'}))
     species = sorted(unpack_species(dbf, data_comps), key=str)
     data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
-    models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
+    #models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
+    models = instantiate_models(dbf, species, data_phases, model=model)
     output = data['output']
     property_output = output.split('_')[0]  # property without _FORM, _MIX, etc.
     samples = np.array(data['values']).flatten()
@@ -99,7 +111,9 @@ def build_eqpropdata(data: tinydb.database.Document,
     pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')])
     comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])
 
-    phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters)
+    phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds, **parameters}, models)
+    #wks = Workspace(dbf, species, data_phases, {**pot_conds, **comp_conds, **parameters}, models=model)
+    #phase_record_factory = wks.phase_record_factory
 
     # Now we need to unravel the composition conditions
     # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
@@ -173,7 +187,7 @@ def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str],
 def calc_prop_differences(eqpropdata: EqPropData,
                           parameters: np.ndarray,
                           approximate_equilibrium: Optional[bool] = False,
-                          ) -> Tuple[np.ndarray, np.ndarray]:
+                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """
     Calculate differences between the expected and calculated values for a property
 
@@ -196,42 +210,52 @@ def calc_prop_differences(eqpropdata: EqPropData,
         * weights for this dataset
 
     """
+        
     dbf = eqpropdata.dbf
     species = eqpropdata.species
     phases = eqpropdata.phases
     pot_conds = eqpropdata.potential_conds
     models = eqpropdata.models
     phase_record_factory = eqpropdata.phase_record_factory
-    update_phase_record_parameters(phase_record_factory, parameters)
+    #update_phase_record_parameters(phase_record_factory, parameters)
     params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
     output = as_property(eqpropdata.output)
     weights = np.array(eqpropdata.weight, dtype=np.float64)
     samples = np.array(eqpropdata.samples, dtype=np.float64)
-    wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory, parameters=params_dict)
+    wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory)
 
     calculated_data = []
+    gradient_data = []
     for comp_conds in eqpropdata.composition_conds:
-        cond_dict = OrderedDict(**pot_conds, **comp_conds)
+        cond_dict = OrderedDict(**pot_conds, **comp_conds, **params_dict)
         wks.conditions = cond_dict
-        wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
+        #wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
         wks.models = models
         wks.phase_record_factory = phase_record_factory
         vals = wks.get(output)
         calculated_data.extend(np.atleast_1d(vals).tolist())
+        gradient_props = [JanssonDerivative(output, key) for key in params_dict]
+        gradients = wks.get(*gradient_props)
+        if type(gradients) is list:
+            gradients_magnitude = [float(element) for element in gradients]
+        else:
+            gradients_magnitude = gradients
+        gradient_data.append(gradients_magnitude)
 
     calculated_data = np.array(calculated_data, dtype=np.float64)
+    gradient_data = np.array(gradient_data, dtype=np.float64)
 
     assert calculated_data.shape == samples.shape, f"Calculated data shape {calculated_data.shape} does not match samples shape {samples.shape}"
     assert calculated_data.shape == weights.shape, f"Calculated data shape {calculated_data.shape} does not match weights shape {weights.shape}"
     differences = calculated_data - samples
     _log.debug('Output: %s differences: %s, weights: %s, reference: %s', output, differences, weights, eqpropdata.reference)
-    return differences, weights
+    return differences, weights, gradient_data
 
 
 def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Sequence[EqPropData],
                                                      parameters: np.ndarray,
                                                      approximate_equilibrium: Optional[bool] = False,
-                                                     ) -> float:
+                                                     ) -> Tuple[float, List[float]]:
     """
     Calculate the total equilibrium thermochemical probability for all EqPropData
 
@@ -256,19 +280,27 @@ def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Seq
 
     differences = []
     weights = []
+    gradients = []
     for eqpropdata in eq_thermochemical_data:
-        diffs, wts = calc_prop_differences(eqpropdata, parameters, approximate_equilibrium)
+        diffs, wts, grads = calc_prop_differences(eqpropdata, parameters, approximate_equilibrium)
         if np.any(np.isinf(diffs) | np.isnan(diffs)):
             # NaN or infinity are assumed calculation failures. If we are
             # calculating log-probability, just bail out and return -infinity.
             return -np.inf
         differences.append(diffs)
         weights.append(wts)
+        gradients.append(grads)
 
     differences = np.concatenate(differences, axis=0)
     weights = np.concatenate(weights, axis=0)
+    gradients = np.concatenate(gradients, axis=0)
     probs = norm(loc=0.0, scale=weights).logpdf(differences)
-    return np.sum(probs)
+    grad_probs = -differences*gradients.T/weights**2
+    if grad_probs.ndim == 1:
+        grad_probs_sum = np.sum(grad_probs, axis=0)
+    else:
+        grad_probs_sum = np.sum(grad_probs, axis=1)
+    return np.sum(probs), grad_probs_sum
 
 
 class EquilibriumPropertyResidual(ResidualFunction):
@@ -309,9 +341,9 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl
             weights.extend(dataset_weights.tolist())
         return residuals, weights
 
-    def get_likelihood(self, parameters) -> float:
-        likelihood = calculate_equilibrium_thermochemical_probability(self.property_data, parameters)
-        return likelihood
+    def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
+        likelihood, gradients = calculate_equilibrium_thermochemical_probability(self.property_data, parameters)
+        return likelihood, gradients
 
 
-residual_function_registry.register(EquilibriumPropertyResidual)
\ No newline at end of file
+residual_function_registry.register(EquilibriumPropertyResidual)

From fcbed85e54082e90e1a0163c23acc3d0699acbc6 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:57:26 -0500
Subject: [PATCH 03/27] Added gradient capability

---
 espei/error_functions/zpf_error.py | 122 ++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 28 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index f6d961e2..90f58cf7 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -22,7 +22,7 @@
 import numpy as np
 from numpy.typing import ArrayLike
 from pycalphad import Database, Model, Workspace, variables as v
-from pycalphad.property_framework import IsolatedPhase
+from pycalphad.property_framework import IsolatedPhase, JanssonDerivative
 from pycalphad.core.utils import filter_phases, unpack_species
 from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
 from scipy.stats import norm
@@ -141,9 +141,16 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat
     desired_data = datasets.search((tinydb.where('output') == 'ZPF') &
                                    (tinydb.where('components').test(lambda x: set(x).issubset(comps))) &
                                    (tinydb.where('phases').test(lambda x: len(set(phases).intersection(x)) > 0)))
-    wks = Workspace(dbf, comps, phases, parameters=parameters)
+    wks = Workspace(dbf, comps, phases)
     if model is None:
         model = {}
+        
+    params_keys = []
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        dbf.symbols.pop(key,None)
 
     zpf_data = []  # 1:1 correspondence with each dataset
     for data in desired_data:
@@ -203,7 +210,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat
     return zpf_data
 
 
-def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np.ndarray, approximate_equilibrium: bool = False) -> np.ndarray:
+def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameter_dict, parameters: np.ndarray, approximate_equilibrium: bool = False) -> Tuple[np.ndarray, np.ndarray]:
     """
     Calculate the chemical potentials for the target hyperplane, one vertex at a time
 
@@ -217,25 +224,36 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np
 
     """
     target_hyperplane_chempots = []
+    target_hyperplane_chempots_grads = []
     species = phase_region.species
     phases = phase_region.phases
     models = phase_region.models
-    param_keys = list(models.values())[0]._parameters_arg
-    parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
+    #param_keys = list(models.values())[0]._parameters_arg
+    #parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
     for vertex in phase_region.hyperplane_vertices:
-        update_phase_record_parameters(vertex.phase_record_factory, parameters)
+        #update_phase_record_parameters(vertex.phase_record_factory, parameters)
         cond_dict = {**vertex.comp_conds, **phase_region.potential_conds}
+        params_keys = list(parameter_dict.keys())
+        for index in range(len(parameters)):
+            cond_dict.update({params_keys[index]: parameters[index]})
         if vertex.has_missing_comp_cond:
             # This composition is unknown -- it doesn't contribute to hyperplane estimation
             pass
         else:
             # Extract chemical potential hyperplane from multi-phase calculation
             # Note that we consider all phases in the system, not just ones in this tie region
-            wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=vertex.phase_record_factory, conditions=cond_dict, parameters=parameters_dict)
+            wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=vertex.phase_record_factory, conditions=cond_dict)
             # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species
             active_pure_elements = [list(x.constituents.keys()) for x in species]
             active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"})
             MU_values = [wks.get(v.MU(comp)) for comp in active_pure_elements]
+            gradient_params = [JanssonDerivative(v.MU(spc), key) for spc in active_pure_elements for key in parameter_dict]
+            gradients = wks.get(*gradient_params)
+            if type(gradients) is list:
+                gradients_magnitude = [float(element) for element in gradients]
+            else:
+                gradients_magnitude = gradients
+            gradients_magnitude = np.reshape(gradients_magnitude,(len(MU_values),len(parameter_dict)))
             num_phases = np.sum(wks.eq.Phase.squeeze() != '')
             Y_values = wks.eq.Y.squeeze()
             no_internal_dof = np.all((np.isclose(Y_values, 1.)) | np.isnan(Y_values))
@@ -243,31 +261,58 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np
                 target_hyperplane_chempots.append(np.full_like(MU_values, np.nan))
             else:
                 target_hyperplane_chempots.append(MU_values)
+                target_hyperplane_chempots_grads.append(gradients_magnitude)
     target_hyperplane_mean_chempots = np.nanmean(target_hyperplane_chempots, axis=0, dtype=np.float64)
-    return target_hyperplane_mean_chempots
+    target_hyperplane_chempots_grads = np.nanmean(target_hyperplane_chempots_grads, axis=0, dtype=np.float64) 
+    return target_hyperplane_mean_chempots, target_hyperplane_chempots_grads
 
 
-def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray,
+def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_hyperplane_chempots_grads: np.ndarray,
                                 phase_region: PhaseRegion, dbf: Database, parameter_dict, vertex: RegionVertex,
                                 parameters: np.ndarray, approximate_equilibrium: bool = False) -> Tuple[float,List[float]]:
     """Calculate the integrated driving force between the current hyperplane and target hyperplane.
     """
     species = phase_region.species
     models = phase_region.models
-    param_keys = list(models.values())[0]._parameters_arg
-    parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
+    #param_keys = list(models.values())[0]._parameters_arg
+    #parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
     current_phase = vertex.phase_name
     cond_dict = {**phase_region.potential_conds, **vertex.comp_conds}
+    params_keys = list(parameter_dict.keys())
+    for index in range(len(parameters)):
+        cond_dict.update({params_keys[index]: parameters[index]})
     str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)])
     phase_record_factory = vertex.phase_record_factory
-    update_phase_record_parameters(phase_record_factory, parameters)
+    #update_phase_record_parameters(phase_record_factory, parameters)
     if vertex.has_missing_comp_cond:
         # We don't have the phase composition here, so we estimate the driving force.
         # Can happen if one of the composition conditions is unknown or if the phase is
         # stoichiometric and the user did not specify a valid phase composition.
         single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50)
         df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM
-        driving_force = float(df.max())
+        #driving_force = float(df.max())
+        
+        if np.squeeze(single_eqdata.X).ndim == 1:
+            vertex_comp_estimate = np.squeeze(single_eqdata.X)
+        else:
+            vertex_comp_estimate = np.squeeze(single_eqdata.X)[np.nanargmax(df),:]
+        counter = 0
+        for comp in species:
+            if v.X(comp) in cond_dict.keys():
+                cond_dict.update({v.X(comp): vertex_comp_estimate[counter]})
+            counter = counter + 1
+                
+        wks = Workspace(database=dbf, components=species, phases=current_phase,  phase_record_factory=phase_record_factory, conditions=cond_dict)
+        constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
+        driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex_comp_estimate) - constrained_energy
+        gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
+        gradients = wks.get(*gradient_params)
+        if type(gradients) is list:
+            constrained_energy_gradient = [float(element) for element in gradients]
+        else:
+            constrained_energy_gradient = gradients
+        driving_force_gradient = np.squeeze(np.matmul(vertex_comp_estimate,target_hyperplane_chempots_grads) - constrained_energy_gradient)
+        
     elif vertex.is_disordered:
         # Construct disordered sublattice configuration from composition dict
         # Compute energy
@@ -295,17 +340,24 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray,
         driving_force = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM
         driving_force = float(np.squeeze(driving_force))
     else:
-        wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict, parameters=parameters_dict)
+        wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy
-    return driving_force
+        gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
+        gradients = wks.get(*gradient_params)
+        if type(gradients) is list:
+            constrained_energy_gradient = [float(element) for element in gradients]
+        else:
+            constrained_energy_gradient = gradients
+        driving_force_gradient = np.squeeze(np.matmul(np.reshape(vertex.composition,(1,-1)),target_hyperplane_chempots_grads) - constrained_energy_gradient)
+    return driving_force, driving_force_gradient
 
 
 def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
                                  parameters: ArrayLike = None,
                                  approximate_equilibrium: bool = False,
                                  short_circuit: bool = False
-                                 ) -> Tuple[List[List[float]], List[List[float]]]:
+                                 ) -> Tuple[List[List[float]], List[List[float]], List[List[float]]]:
     """
     Calculate error due to phase equilibria data
 
@@ -339,41 +391,49 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
         parameters = np.array([])
     driving_forces = []
     weights = []
+    gradients = []
     for data in zpf_data:
         data_driving_forces = []
         data_weights = []
+        data_gradients = []
         weight = data['weight']
         dataset_ref = data['dataset_reference']
         # for the set of phases and corresponding tie-line verticies in equilibrium
         for phase_region in data['phase_regions']:
             # 1. Calculate the average multiphase hyperplane
             eq_str = phase_region.eq_str()
-            target_hyperplane = estimate_hyperplane(phase_region, data['dbf'], parameters, approximate_equilibrium=approximate_equilibrium)
+            #target_hyperplane = estimate_hyperplane(phase_region, data['dbf'], parameters, approximate_equilibrium=approximate_equilibrium)
+            target_hyperplane, hyperplane_grads = estimate_hyperplane(phase_region, data['dbf'], data['parameter_dict'], parameters, approximate_equilibrium=approximate_equilibrium)
             if np.any(np.isnan(target_hyperplane)):
                 _log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
                 data_driving_forces.extend([0]*len(phase_region.vertices))
                 data_weights.extend([weight]*len(phase_region.vertices))
+                data_gradients.extend([0]*len(phase_region.vertices))
                 continue
             # 2. Calculate the driving force to that hyperplane for each vertex
             for vertex in phase_region.vertices:
-                driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters,
-                                                            approximate_equilibrium=approximate_equilibrium,
-                                                            )
+                #driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters,
+                 #                                           approximate_equilibrium=approximate_equilibrium,
+                 #                                           )
+                driving_force, df_grad = driving_force_to_hyperplane(target_hyperplane, hyperplane_grads, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium,)
+                                                             
                 if np.isinf(driving_force) and short_circuit:
                     _log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s. Short circuiting.', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
                     return [[np.inf]], [[np.inf]]
                 data_driving_forces.append(driving_force)
                 data_weights.append(weight)
+                data_gradients.append(df_grad)
                 _log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
         driving_forces.append(data_driving_forces)
         weights.append(data_weights)
-    return driving_forces, weights
+        gradients.append(data_gradients)
+    return driving_forces, weights, gradients
 
 
 def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
                         parameters: np.ndarray = None,
                         data_weight: int = 1.0,
-                        approximate_equilibrium: bool = False) -> float:
+                        approximate_equilibrium: bool = False) -> Tuple[float, List[float]]:
     """
     Calculate the likelihood due to phase equilibria data.
 
@@ -387,15 +447,21 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
     """
     if len(zpf_data) == 0:
         return 0.0
-    driving_forces, weights = calculate_zpf_driving_forces(zpf_data, parameters, approximate_equilibrium, short_circuit=True)
+    driving_forces, weights, gradients = calculate_zpf_driving_forces(zpf_data, parameters, approximate_equilibrium, short_circuit=True)
     # Driving forces and weights are 2D ragged arrays with the shape (len(zpf_data), len(zpf_data['values']))
     driving_forces = np.concatenate(driving_forces).T
     weights = np.concatenate(weights)
+    gradients = np.concatenate(gradients)
     if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))):
         return -np.inf
     log_probabilites = norm.logpdf(driving_forces, loc=0, scale=1000/data_weight/weights)
+    grad_log_probs = -driving_forces*gradients.T/(1000/data_weight/weights)**2
+    if grad_log_probs.ndim == 1:
+        grad_log_probs_sum = np.sum(grad_log_probs, axis =0)
+    else:
+        grad_log_probs_sum = np.sum(grad_log_probs, axis =1)
     _log.debug('Data weight: %s, driving forces: %s, weights: %s, probabilities: %s', data_weight, driving_forces, weights, log_probabilites)
-    return np.sum(log_probabilites)
+    return np.sum(log_probabilites), grad_log_probs_sum
 
 
 class ZPFResidual(ResidualFunction):
@@ -432,9 +498,9 @@ def get_residuals(self, parameters: ArrayLike) -> Tuple[List[float], List[float]
         weights = np.concatenate(weights).tolist()
         return residuals, weights
 
-    def get_likelihood(self, parameters) -> float:
-        likelihood = calculate_zpf_error(self.zpf_data, parameters, data_weight=self.weight)
-        return likelihood
+    def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
+        likelihood, gradients = calculate_zpf_error(self.zpf_data, parameters, data_weight=self.weight)
+        return likelihood, gradients
 
 
-residual_function_registry.register(ZPFResidual)
\ No newline at end of file
+residual_function_registry.register(ZPFResidual)

From d3f38dfa4a0a6a46db577361bc1dffb0aa1f9fb7 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:21:07 -0500
Subject: [PATCH 04/27] Added gradient capability

---
 .../non_equilibrium_thermochemical_error.py   | 63 ++++++++++++++-----
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index 60dd806c..43974ba1 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -16,7 +16,7 @@
 from pycalphad import Database, Model, ReferenceState, variables as v
 from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases
 from pycalphad import Workspace
-from pycalphad.property_framework import IsolatedPhase
+from pycalphad.property_framework import IsolatedPhase, JanssonDerivative
 from pycalphad.property_framework.metaproperties import find_first_compset
 from pycalphad.core.solver import Solver, SolverResult
 
@@ -249,7 +249,7 @@ def get_sample_condition_dicts(calculate_dict: Dict[Any, Any], configuration_tup
     return sample_condition_dicts
 
 
-def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dict=None, symbols_to_fit=None):
+def get_thermochemical_data(dbf, comps, phases, datasets, parameters, model=None, weight_dict=None, symbols_to_fit=None):
     """
 
     Parameters
@@ -274,6 +274,17 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic
     list
         List of data dictionaries to iterate over
     """
+    
+    params_keys = []
+
+    # XXX: This mutates the global pycalphad namespace
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        # XXX: Mutates argument to function
+        dbf.symbols.pop(key,None)
+        
     # phase by phase, then property by property, then by model exclusions
     if weight_dict is None:
         weight_dict = {}
@@ -330,7 +341,8 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic
                 curr_data = filter_temperatures(curr_data)
                 calculate_dict = get_prop_samples(curr_data, constituents)
                 model_cls = model.get(phase_name, Model)
-                mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit)
+                #mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit)
+                mod = model_cls(dbf, comps, phase_name)
                 if prop.endswith('_FORM'):
                     output = ''.join(prop.split('_')[:-1])+"R"
                     mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion})
@@ -368,7 +380,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
     phase_name = calc_data['phase_name']
     models = calc_data['model']  # Dict[PhaseName: Model]
     output = calc_data['output']
-    phase_record_factory = calc_data['phase_record_factory']
+    #phase_record_factory = calc_data['phase_record_factory']
     sample_values = calc_data['calculate_dict']['values']
     str_statevar_dict = calc_data['str_statevar_dict']
 
@@ -382,8 +394,9 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         counter = counter + 1
 
     differences = []
+    gradients = []
     for index in range(len(sample_values)):
-        cond_dict = {}
+        cond_dict = {**parameters}
         for sv_key, sv_val in str_statevar_dict.items():
             cond_dict.update({sv_key: sv_val[index]})
 
@@ -407,7 +420,8 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why.
         # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered.
         # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid.
-        wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, parameters=parameters, solver=NoSolveSolver())
+        #wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, solver=NoSolveSolver())
+        wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, solver=NoSolveSolver())
         # We then get a composition set and we use a special "NoSolveSolver" to
         # ensure that we don't change from the data-specified DOF.
         compset = find_first_compset(phase_name, wks)
@@ -417,9 +431,21 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         iso_phase = IsolatedPhase(compset, wks=wks)
         iso_phase.solver = NoSolveSolver()
         results = wks.get(iso_phase(output))
+        gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
+        grads = wks.get(*gradient_props) # this is giving NAN for some parameters
+        
+        #if type(grads) is list:
+        #    for i in range(len(grads)):
+         #       if np.isnan(grads[i]):
+         #           grads[i] = 0
+        #else:
+         #   if np.isnan(grads):
+         #       grads = 0
+                
         sample_differences = results - sample_values[index]
         differences.append(sample_differences)
-    return differences
+        gradients.append(grads)
+    return differences, gradients
 
 
 def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], dbf, parameters=None):
@@ -443,16 +469,24 @@ def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: Li
         parameters = {}
 
     prob_error = 0.0
+    overall_grad = np.zeros(len(parameters))
     for data in thermochemical_data:
         phase_name = data['phase_name']
         sample_values = data['calculate_dict']['values']
-        differences = compute_fixed_configuration_property_differences(dbf, data, parameters)
+        differences, gradients = compute_fixed_configuration_property_differences(dbf, data, parameters)
         differences = np.array(differences)
+        gradients = np.array(gradients)
         probabilities = norm.logpdf(differences, loc=0, scale=data['weights'])
         prob_sum = np.sum(probabilities)
         _log.debug("%s(%s) - probability sum: %0.2f, data: %s, differences: %s, probabilities: %s, references: %s", data['prop'], phase_name, prob_sum, sample_values, differences, probabilities, data['calculate_dict']['references'])
         prob_error += prob_sum
-    return prob_error
+        derivative = -differences*gradients.T/data['weights']**2
+        if derivative.ndim == 1:
+            grad_prob = np.sum(derivative, axis = 0)
+        else:
+            grad_prob = np.sum(derivative, axis=1)
+        overall_grad += grad_prob
+    return prob_error, overall_grad
 
 
 class FixedConfigurationPropertyResidual(ResidualFunction):
@@ -480,7 +514,8 @@ def __init__(
         phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
         if symbols_to_fit is None:
             symbols_to_fit = database_symbols_to_fit(database)
-        self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit)
+        parameters = dict(zip(symbols_to_fit, [0]*len(symbols_to_fit)))
+        self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, parameters, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit)
         self._symbols_to_fit = symbols_to_fit
         self.dbf = database
 
@@ -498,10 +533,10 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl
             weights.extend(dataset_weights)
         return residuals, weights
 
-    def get_likelihood(self, parameters) -> float:
+    def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
         parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())}
-        likelihood = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters)
-        return likelihood
+        likelihood, gradient = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters)
+        return likelihood, gradient
 
 
-residual_function_registry.register(FixedConfigurationPropertyResidual)
\ No newline at end of file
+residual_function_registry.register(FixedConfigurationPropertyResidual)

From d4f9b4020ab9d5df01e87c7f372c6ec77da8bebe Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 26 Sep 2024 11:59:04 -0500
Subject: [PATCH 05/27] Update activity_error.py

---
 espei/error_functions/activity_error.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
index 080187a7..5cddecd9 100644
--- a/espei/error_functions/activity_error.py
+++ b/espei/error_functions/activity_error.py
@@ -199,14 +199,14 @@ def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phas
     gradients = np.concatenate(gradients)
     derivative = -np.array(residuals)*np.array(gradients).T/np.array(weights)**2
     if derivative.ndim == 1:
-        likelihood_grads = np.sum(-np.array(residuals)*np.array(gradients).T/np.array(weights)**2, axis=0)
+        likelihood_grads = np.sum(derivative, axis=0)
     else:
-        likelihood_grads = np.sum(-np.array(residuals)*np.array(gradients).T/np.array(weights)**2, axis=1)
+        likelihood_grads = np.sum(derivative, axis=1)
     if np.isnan(likelihood):
         # TODO: revisit this case and evaluate whether it is resonable for NaN
         # to show up here. When this comment was written, the test
         # test_subsystem_activity_probability would trigger a NaN.
-        return -np.inf
+        return -np.inf, np.zeros(len(parameters))
     return likelihood, likelihood_grads
 
 

From 4c593c14b2f82e51b79a8650dba67b1d841a310d Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:00:16 -0500
Subject: [PATCH 06/27] Update equilibrium_thermochemical_error.py

---
 .../equilibrium_thermochemical_error.py           | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
index 449a5eb7..0698ba71 100644
--- a/espei/error_functions/equilibrium_thermochemical_error.py
+++ b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -231,16 +231,12 @@ def calc_prop_differences(eqpropdata: EqPropData,
         wks.conditions = cond_dict
         #wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
         wks.models = models
-        wks.phase_record_factory = phase_record_factory
+        #wks.phase_record_factory = phase_record_factory
         vals = wks.get(output)
         calculated_data.extend(np.atleast_1d(vals).tolist())
         gradient_props = [JanssonDerivative(output, key) for key in params_dict]
         gradients = wks.get(*gradient_props)
-        if type(gradients) is list:
-            gradients_magnitude = [float(element) for element in gradients]
-        else:
-            gradients_magnitude = gradients
-        gradient_data.append(gradients_magnitude)
+        gradient_data.append(gradients)
 
     calculated_data = np.array(calculated_data, dtype=np.float64)
     gradient_data = np.array(gradient_data, dtype=np.float64)
@@ -276,7 +272,7 @@ def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Seq
 
     """
     if len(eq_thermochemical_data) == 0:
-        return 0.0
+        return 0.0, np.zeros(len(parameters))
 
     differences = []
     weights = []
@@ -286,7 +282,7 @@ def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Seq
         if np.any(np.isinf(diffs) | np.isnan(diffs)):
             # NaN or infinity are assumed calculation failures. If we are
             # calculating log-probability, just bail out and return -infinity.
-            return -np.inf
+            return -np.inf, np.zeros(len(parameters))
         differences.append(diffs)
         weights.append(wts)
         gradients.append(grads)
@@ -347,3 +343,6 @@ def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
 
 
 residual_function_registry.register(EquilibriumPropertyResidual)
+
+
+residual_function_registry.register(EquilibriumPropertyResidual)

From a504e904ae639325dc1bb446d08429851b48812f Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:01:10 -0500
Subject: [PATCH 07/27] Update non_equilibrium_thermochemical_error.py

---
 .../non_equilibrium_thermochemical_error.py               | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index 43974ba1..b16ad35a 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -433,14 +433,6 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         results = wks.get(iso_phase(output))
         gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
         grads = wks.get(*gradient_props) # this is giving NAN for some parameters
-        
-        #if type(grads) is list:
-        #    for i in range(len(grads)):
-         #       if np.isnan(grads[i]):
-         #           grads[i] = 0
-        #else:
-         #   if np.isnan(grads):
-         #       grads = 0
                 
         sample_differences = results - sample_values[index]
         differences.append(sample_differences)

From 97f4eb1ad222184d79789a60a6ffa04c3a668f8d Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:02:01 -0500
Subject: [PATCH 08/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index 90f58cf7..1c6c2a94 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -246,7 +246,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameter_dict
             # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species
             active_pure_elements = [list(x.constituents.keys()) for x in species]
             active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"})
-            MU_values = [wks.get(v.MU(comp)) for comp in active_pure_elements]
+            MU_values = [wks.get(v.MU(comp)) for comp in active_pure_elements] 
             gradient_params = [JanssonDerivative(v.MU(spc), key) for spc in active_pure_elements for key in parameter_dict]
             gradients = wks.get(*gradient_params)
             if type(gradients) is list:
@@ -285,10 +285,12 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
     phase_record_factory = vertex.phase_record_factory
     #update_phase_record_parameters(phase_record_factory, parameters)
     if vertex.has_missing_comp_cond:
+        for index in range(len(parameters)):
+            str_statevar_dict.update({params_keys[index]: parameters[index]})
         # We don't have the phase composition here, so we estimate the driving force.
         # Can happen if one of the composition conditions is unknown or if the phase is
         # stoichiometric and the user did not specify a valid phase composition.
-        single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50)
+        single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50) ## SOMETHING WEIRD HAPPENS WHEN PDENS IS TOO HIGH!
         df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM
         #driving_force = float(df.max())
         
@@ -296,12 +298,12 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
             vertex_comp_estimate = np.squeeze(single_eqdata.X)
         else:
             vertex_comp_estimate = np.squeeze(single_eqdata.X)[np.nanargmax(df),:]
+        #print(vertex_comp_estimate)
         counter = 0
         for comp in species:
             if v.X(comp) in cond_dict.keys():
                 cond_dict.update({v.X(comp): vertex_comp_estimate[counter]})
             counter = counter + 1
-                
         wks = Workspace(database=dbf, components=species, phases=current_phase,  phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex_comp_estimate) - constrained_energy
@@ -406,17 +408,17 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
             target_hyperplane, hyperplane_grads = estimate_hyperplane(phase_region, data['dbf'], data['parameter_dict'], parameters, approximate_equilibrium=approximate_equilibrium)
             if np.any(np.isnan(target_hyperplane)):
                 _log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
+                #print('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
                 data_driving_forces.extend([0]*len(phase_region.vertices))
                 data_weights.extend([weight]*len(phase_region.vertices))
-                data_gradients.extend([0]*len(phase_region.vertices))
+                data_gradients.extend([[0]*len(parameters)]*len(phase_region.vertices))
                 continue
             # 2. Calculate the driving force to that hyperplane for each vertex
             for vertex in phase_region.vertices:
                 #driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters,
                  #                                           approximate_equilibrium=approximate_equilibrium,
                  #                                           )
-                driving_force, df_grad = driving_force_to_hyperplane(target_hyperplane, hyperplane_grads, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium,)
-                                                             
+                driving_force, df_grad = driving_force_to_hyperplane(target_hyperplane, hyperplane_grads, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium)
                 if np.isinf(driving_force) and short_circuit:
                     _log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s. Short circuiting.', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
                     return [[np.inf]], [[np.inf]]
@@ -453,7 +455,7 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
     weights = np.concatenate(weights)
     gradients = np.concatenate(gradients)
     if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))):
-        return -np.inf
+        return -np.inf, np.zeros(np.shape(gradients)[1]) #NEED TO ASK ABOUT THIS
     log_probabilites = norm.logpdf(driving_forces, loc=0, scale=1000/data_weight/weights)
     grad_log_probs = -driving_forces*gradients.T/(1000/data_weight/weights)**2
     if grad_log_probs.ndim == 1:

From e57a991e19d1b5c5eacda28a72f56180367acc21 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 24 Oct 2024 14:51:28 -0500
Subject: [PATCH 09/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 46 ++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index 1c6c2a94..54b9c60d 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -263,7 +263,9 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameter_dict
                 target_hyperplane_chempots.append(MU_values)
                 target_hyperplane_chempots_grads.append(gradients_magnitude)
     target_hyperplane_mean_chempots = np.nanmean(target_hyperplane_chempots, axis=0, dtype=np.float64)
+    #print(target_hyperplane_mean_chempots)
     target_hyperplane_chempots_grads = np.nanmean(target_hyperplane_chempots_grads, axis=0, dtype=np.float64) 
+    #print(target_hyperplane_chempots_grads)
     return target_hyperplane_mean_chempots, target_hyperplane_chempots_grads
 
 
@@ -306,13 +308,19 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
             counter = counter + 1
         wks = Workspace(database=dbf, components=species, phases=current_phase,  phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
+        #print(constrained_energy)
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex_comp_estimate) - constrained_energy
+        #print(driving_force)
+        constrained_energy_gradient = []
+        for key in parameter_dict:
+            constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
+            
         gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
         gradients = wks.get(*gradient_params)
-        if type(gradients) is list:
-            constrained_energy_gradient = [float(element) for element in gradients]
-        else:
-            constrained_energy_gradient = gradients
+        constrained_energy_gradient1 = gradients
+        
+        #print(constrained_energy_gradient)
+        #print(constrained_energy_gradient1)
         driving_force_gradient = np.squeeze(np.matmul(vertex_comp_estimate,target_hyperplane_chempots_grads) - constrained_energy_gradient)
         
     elif vertex.is_disordered:
@@ -344,14 +352,21 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
     else:
         wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
+        #print(constrained_energy)
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy
-        gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
-        gradients = wks.get(*gradient_params)
-        if type(gradients) is list:
-            constrained_energy_gradient = [float(element) for element in gradients]
-        else:
-            constrained_energy_gradient = gradients
+        #print(driving_force)
+        constrained_energy_gradient = []
+        for key in parameter_dict:
+            constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
+        #gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
+        #gradients = wks.get(*gradient_params)
+        #if type(gradients) is list:
+        #    constrained_energy_gradient = [float(element) for element in gradients]
+        #else:
+        #    constrained_energy_gradient = gradients
+        #print(constrained_energy_gradient)
         driving_force_gradient = np.squeeze(np.matmul(np.reshape(vertex.composition,(1,-1)),target_hyperplane_chempots_grads) - constrained_energy_gradient)
+        #print(driving_force_gradient)
     return driving_force, driving_force_gradient
 
 
@@ -411,7 +426,10 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
                 #print('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
                 data_driving_forces.extend([0]*len(phase_region.vertices))
                 data_weights.extend([weight]*len(phase_region.vertices))
-                data_gradients.extend([[0]*len(parameters)]*len(phase_region.vertices))
+                if len(parameters) == 1:
+                    data_gradients.extend(np.zeros(len(phase_region.vertices)))
+                else:
+                    data_gradients.extend([[0]*len(parameters)]*len(phase_region.vertices))
                 continue
             # 2. Calculate the driving force to that hyperplane for each vertex
             for vertex in phase_region.vertices:
@@ -455,7 +473,11 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
     weights = np.concatenate(weights)
     gradients = np.concatenate(gradients)
     if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))):
-        return -np.inf, np.zeros(np.shape(gradients)[1]) #NEED TO ASK ABOUT THIS
+        if len(parameters) == 1:
+            return -np.inf, -np.inf
+        else:
+            return -np.inf, np.ones(len(parameters))*(-np.inf)
+        
     log_probabilites = norm.logpdf(driving_forces, loc=0, scale=1000/data_weight/weights)
     grad_log_probs = -driving_forces*gradients.T/(1000/data_weight/weights)**2
     if grad_log_probs.ndim == 1:

From 3ac40d861d124b58adbe6222d918b2c9c9811cce Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Mon, 4 Nov 2024 16:31:52 -0600
Subject: [PATCH 10/27] Update equilibrium_thermochemical_error.py

---
 .../equilibrium_thermochemical_error.py       | 698 +++++++++++-------
 1 file changed, 444 insertions(+), 254 deletions(-)

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
index 0698ba71..233033e9 100644
--- a/espei/error_functions/equilibrium_thermochemical_error.py
+++ b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -1,305 +1,491 @@
 """
-Calculate error due to equilibrium thermochemical properties.
+Calculate error due to thermochemical quantities: heat capacity, entropy, enthalpy.
 """
 
 import logging
 from collections import OrderedDict
-from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Type, Union
+from typing import Any, Dict, List, NewType, Optional, Tuple, Union
 
+import symengine
+from scipy.stats import norm
 import numpy as np
 import numpy.typing as npt
-import tinydb
+from symengine import Symbol
 from tinydb import where
-from scipy.stats import norm
-from pycalphad import Database, Model, ReferenceState, variables as v
-from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
 from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
-from pycalphad import Workspace, as_property
-from pycalphad.property_framework import JanssonDerivative
+from pycalphad import Database, Model, ReferenceState, variables as v
+from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases
+from pycalphad import Workspace
+from pycalphad.property_framework import IsolatedPhase, JanssonDerivative
+from pycalphad.property_framework.metaproperties import find_first_compset
+from pycalphad.core.solver import Solver, SolverResult
+
 
-from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
+from espei.datasets import Dataset
+from espei.core_utils import ravel_conditions, get_prop_data, filter_temperatures
+from espei.parameter_selection.redlich_kister import calc_interaction_product
 from espei.phase_models import PhaseModelSpecification
-from espei.shadow_functions import update_phase_record_parameters
+from espei.sublattice_tools import canonicalize, recursive_tuplify, tuplify
 from espei.typing import SymbolName
-from espei.utils import PickleableTinyDB, database_symbols_to_fit
+from espei.utils import database_symbols_to_fit, PickleableTinyDB
+from .residual_base import ResidualFunction, residual_function_registry
 
 _log = logging.getLogger(__name__)
 
-
-EqPropData = NamedTuple('EqPropData', (('dbf', Database),
-                                       ('species', Sequence[v.Species]),
-                                       ('phases', Sequence[str]),
-                                       ('potential_conds', Dict[v.StateVariable, float]),
-                                       ('composition_conds', Sequence[Dict[v.X, float]]),
-                                       ('models', Dict[str, Model]),
-                                       ('params_keys', Dict[str, float]),
-                                       ('phase_record_factory', PhaseRecordFactory),
-                                       ('output', str),
-                                       ('samples', np.ndarray),
-                                       ('weight', np.ndarray),
-                                       ('reference', str),
-                                       ))
-
-
-def build_eqpropdata(data: tinydb.database.Document,
-                     dbf: Database,
-                     model: Optional[Dict[str, Type[Model]]] = None,
-                     parameters: Optional[Dict[str, float]] = None,
-                     data_weight_dict: Optional[Dict[str, float]] = None
-                     ) -> EqPropData:
+class NoSolveSolver(Solver):
+    def solve(self, composition_sets, conditions):
+        """
+        Initialize a solver, but don't actually do anything - i.e. return the SolverResult as if the calculation converged.
+
+        """
+        spec = self.get_system_spec(composition_sets, conditions)
+        self._fix_state_variables_in_compsets(composition_sets, conditions)
+        state = spec.get_new_state(composition_sets)
+        # DO NOT: converged = spec.run_loop(state, 1000)
+
+        if self.remove_metastable:
+            phase_idx = 0
+            compsets_to_remove = []
+            for compset in composition_sets:
+                # Mark unstable phases for removal
+                if compset.NP <= 0.0 and not compset.fixed:
+                    compsets_to_remove.append(int(phase_idx))
+                phase_idx += 1
+            # Watch removal order here, as the indices of composition_sets are changing!
+            for idx in reversed(compsets_to_remove):
+                del composition_sets[idx]
+
+        phase_amt = [compset.NP for compset in composition_sets]
+
+        x = composition_sets[0].dof
+        state_variables = composition_sets[0].phase_record.state_variables
+        num_statevars = len(state_variables)
+        for compset in composition_sets[1:]:
+            x = np.r_[x, compset.dof[num_statevars:]]
+        x = np.r_[x, phase_amt]
+        chemical_potentials = np.array(state.chemical_potentials)
+
+        if self.verbose:
+            print('Chemical Potentials', chemical_potentials)
+            print(np.asarray(x))
+        return SolverResult(converged=True, x=x, chemical_potentials=chemical_potentials)
+
+
+# TODO: make into an object similar to how ZPF data works?
+FixedConfigurationCalculationData = NewType("FixedConfigurationCalculationData", Dict[str, Any])
+
+def filter_sublattice_configurations(desired_data: List[Dataset], subl_model) -> List[Dataset]:  # TODO: symmetry support
+    """Modify the desired_data to remove any configurations that cannot be represented by the sublattice model."""
+    subl_model_sets = [set(subl) for subl in subl_model]
+    for data in desired_data:
+        matching_configs = []  # binary mask of whether a configuration is represented by the sublattice model
+        for config in data['solver']['sublattice_configurations']:
+            config = recursive_tuplify(canonicalize(config, None))
+            if (
+                len(config) == len(subl_model) and
+                all(subl.issuperset(tuplify(config_subl)) for subl, config_subl in zip(subl_model_sets, config))
+            ):
+                matching_configs.append(True)
+            else:
+                matching_configs.append(False)
+        matching_configs = np.asarray(matching_configs, dtype=np.bool_)
+
+        # Rewrite output values with filtered data
+        data['values'] = np.array(data['values'], dtype=np.float64)[..., matching_configs]
+        data['solver']['sublattice_configurations'] = np.array(data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist()
+        if 'sublattice_occupancies' in data['solver']:
+            data['solver']['sublattice_occupancies'] = np.array(data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist()
+    return desired_data
+
+
+def calculate_points_array(phase_constituents, configuration, occupancies=None):
     """
-    Build EqPropData for the calculations corresponding to a single dataset.
+    Calculate the points array to use in pycalphad calculate calls.
+
+    Converts the configuration data (and occupancies for mixing data) into the
+    points array by looking up the indices in the active phase constituents.
 
     Parameters
     ----------
-    data : tinydb.database.Document
-        Document corresponding to a single ESPEI dataset.
-    dbf : Database
-        Database that should be used to construct the `Model` and `PhaseRecord` objects.
-    model : Optional[Dict[str, Type[Model]]]
-        Dictionary phase names to pycalphad Model classes.
-    parameters : Optional[Dict[str, float]]
-        Mapping of parameter symbols to values.
-    data_weight_dict : Optional[Dict[str, float]]
-        Mapping of a data type (e.g. `HM` or `SM`) to a weight.
+    phase_constituents : list
+        List of active constituents in a phase
+    configuration : list
+        List of the sublattice configuration
+    occupancies : list
+        List of sublattice occupancies. Required for mixing sublattices, otherwise takes no effect.
 
     Returns
     -------
-    EqPropData
-    """
-    parameters = parameters if parameters is not None else {}
-    data_weight_dict = data_weight_dict if data_weight_dict is not None else {}
-    property_std_deviation = {
-        'HM': 500.0,  # J/mol
-        'SM':   0.2,  # J/K-mol
-        'CPM':  0.2,  # J/K-mol
-    }
+    numpy.ndarray
 
-    #params_keys, _ = extract_parameters(parameters)
-    
-    params_keys = []
+    Notes
+    -----
+    Errors will be raised if components in the configuration are not in the
+    corresponding phase constituents sublattice.
+    """
+    # pad the occupancies for zipping if none were passed (the case for non-mixing)
+    if occupancies is None:
+        occupancies = [0] * len(configuration)
+
+    # construct the points array from zeros
+    points = np.zeros(sum(len(subl) for subl in phase_constituents))
+    current_subl_idx = 0  # index that marks the beginning of the sublattice
+    for phase_subl, config_subl, subl_occupancies in zip(phase_constituents, configuration, occupancies):
+        phase_subl = list(phase_subl)
+        if isinstance(config_subl, (tuple, list)):
+            # we have mixing on the sublattice
+            for comp, comp_occupancy in zip(config_subl, subl_occupancies):
+                points[current_subl_idx + phase_subl.index(comp)] = comp_occupancy
+        else:
+            points[current_subl_idx + phase_subl.index(config_subl)] = 1
+        current_subl_idx += len(phase_subl)
+    return points
 
-    # XXX: This mutates the global pycalphad namespace
-    for key in parameters.keys():
-        if not hasattr(v, key):
-            setattr(v, key, v.IndependentPotential(key))
-        params_keys.append(getattr(v, key))
-        # XXX: Mutates argument to function
-        dbf.symbols.pop(key,None)
 
-    data_comps = list(set(data['components']).union({'VA'}))
-    species = sorted(unpack_species(dbf, data_comps), key=str)
-    data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
-    #models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
-    models = instantiate_models(dbf, species, data_phases, model=model)
-    output = data['output']
-    property_output = output.split('_')[0]  # property without _FORM, _MIX, etc.
-    samples = np.array(data['values']).flatten()
-    reference = data.get('reference', '')
-
-    # Models are now modified in response to the data from this data
-    # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below
-    if 'reference_states' in data:
-        property_output = output[:-1] if output.endswith('R') else output  # unreferenced model property so we can tell shift_reference_state what to build.
-        reference_states = []
-        for el, vals in data['reference_states'].items():
-            reference_states.append(ReferenceState(v.Species(el), vals['phase'], fixed_statevars=vals.get('fixed_state_variables')))
-        for mod in models.values():
-            mod.shift_reference_state(reference_states, dbf, output=(property_output,))
-
-    data['conditions'].setdefault('N', 1.0)  # Add default for N. Nothing else is supported in pycalphad anyway.
-    pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')])
-    comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])
-
-    phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds, **parameters}, models)
-    #wks = Workspace(dbf, species, data_phases, {**pot_conds, **comp_conds, **parameters}, models=model)
-    #phase_record_factory = wks.phase_record_factory
-
-    # Now we need to unravel the composition conditions
-    # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
-    # composition conditions are only broadcast against the potentials, not
-    # each other. Each individual composition needs to be computed
-    # independently, since broadcasting over composition cannot be turned off
-    # in pycalphad.
-    rav_comp_conds = [OrderedDict(zip(comp_conds.keys(), pt_comps)) for pt_comps in zip(*comp_conds.values())]
-
-    # Build weights, should be the same size as the values
-    total_num_calculations = len(rav_comp_conds)*np.prod([len(vals) for vals in pot_conds.values()])
-    dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations)
-    weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten()
-
-    return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference)
-
-
-def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str],
-                                        phases: Sequence[str],
-                                        datasets: PickleableTinyDB,
-                                        model: Optional[Dict[str, Model]] = None,
-                                        parameters: Optional[Dict[str, float]] = None,
-                                        data_weight_dict: Optional[Dict[str, float]] = None,
-                                        ) -> Sequence[EqPropData]:
+def get_prop_samples(desired_data, constituents):
     """
-    Get all the EqPropData for each matching equilibrium thermochemical dataset in the datasets
+    Return data values and the conditions to calculate them using pycalphad.calculate
 
     Parameters
     ----------
-    dbf : Database
-        Database with parameters to fit
-    comps : Sequence[str]
-        List of pure element components used to find matching datasets.
-    phases : Sequence[str]
-        List of phases used to search for matching datasets.
-    datasets : PickleableTinyDB
-        Datasets that contain single phase data
-    model : Optional[Dict[str, Type[Model]]]
-        Dictionary phase names to pycalphad Model classes.
-    parameters : Optional[Dict[str, float]]
-        Mapping of parameter symbols to values.
-    data_weight_dict : Optional[Dict[str, float]]
-        Mapping of a data type (e.g. `HM` or `SM`) to a weight.
-
-    Notes
-    -----
-    Found datasets will be subsets of the components and phases. Equilibrium
-    thermochemical data is assumed to be any data that does not have the
-    `solver` key, and does not have an output of `ZPF` or `ACR` (which
-    correspond to different data types than can be calculated here.)
+    desired_data : List[Dict[str, Any]]
+        List of dataset dictionaries that contain the values to sample
+    constituents : List[List[str]]
+        Names of constituents in each sublattice.
 
     Returns
     -------
-    Sequence[EqPropData]
+    Dict[str, Union[float, ArrayLike, List[float]]]
+        Dictionary of condition kwargs for pycalphad's calculate and the expected values
+
     """
+    # TODO: assumes T, P, N as conditions
+    # calculate needs points, state variable lists, and values to compare to
+    num_dof = sum(map(len, constituents))
+    calculate_dict = {
+        'N': np.array([]),
+        'P': np.array([]),
+        'T': np.array([]),
+        'points': np.atleast_2d([[]]).reshape(-1, num_dof),
+        'values': np.array([]),
+        'weights': [],
+        'references': [],
+    }
 
-    desired_data = datasets.search(
-        # data that isn't ZPF or non-equilibrium thermochemical
-        (where('output') != 'ZPF') & (~where('solver').exists()) &
-        (where('output').test(lambda x: 'ACR' not in x)) &  # activity data not supported yet
-        (where('components').test(lambda x: set(x).issubset(comps))) &
-        (where('phases').test(lambda x: set(x).issubset(set(phases))))
-    )
+    for datum in desired_data:
+        # extract the data we care about
+        datum_T = datum['conditions']['T']
+        datum_P = datum['conditions']['P']
+        # TODO: fix this when N different from 1 allowed in pycalphad
+        datum_N = np.full_like(datum['values'], 1.0)
+        configurations = datum['solver']['sublattice_configurations']
+        occupancies = datum['solver'].get('sublattice_occupancies')
+        values = np.array(datum['values'])
+        if values.size == 0:
+            # Skip any data that don't have any values left (e.g. after filtering)
+            continue
+        # Broadcast the weights to the shape of the values. This ensures that
+        # the sizes of the weights and values are the same, which is important
+        # because they are flattened later (so the shape information is lost).
+        weights = np.broadcast_to(np.asarray(datum.get('weight', 1.0)), values.shape)
+
+        # broadcast and flatten the conditions arrays
+        P, T, N = ravel_conditions(values, datum_P, datum_T, datum_N)
+        if occupancies is None:
+            occupancies = [None] * len(configurations)
+
+        # calculate the points arrays, should be 2d array of points arrays
+        points = np.array([calculate_points_array(constituents, config, occup) for config, occup in zip(configurations, occupancies)])
+        assert values.shape == weights.shape, f"Values data shape {values.shape} does not match weights shape {weights.shape}"
+
+        # add everything to the calculate_dict
+        calculate_dict['P'] = np.concatenate([calculate_dict['P'], P])
+        calculate_dict['T'] = np.concatenate([calculate_dict['T'], T])
+        calculate_dict['N'] = np.concatenate([calculate_dict['N'], N])
+        calculate_dict['points'] = np.concatenate([calculate_dict['points'], np.tile(points, (values.shape[0]*values.shape[1], 1))], axis=0)
+        calculate_dict['values'] = np.concatenate([calculate_dict['values'], values.flatten()])
+        calculate_dict['weights'].extend(weights.flatten())
+        calculate_dict['references'].extend([datum.get('reference', "") for _ in range(values.flatten().size)])
+    return calculate_dict
+
+
+def get_sample_condition_dicts(calculate_dict: Dict[Any, Any], configuration_tuple: Tuple[Union[str, Tuple[str]]], phase_name: str) -> List[Dict[Symbol, float]]:
+    sublattice_dof = list(map(len, configuration_tuple))
+    sample_condition_dicts = []
+    for sample_idx in range(calculate_dict["values"].size):
+        cond_dict = {}
+        points = calculate_dict["points"][sample_idx, :]
+
+        # T and P
+        cond_dict[v.T] = calculate_dict["T"][sample_idx]
+        cond_dict[v.P] = calculate_dict["P"][sample_idx]
+
+        # YS site fraction product
+        site_fraction_product = np.prod(points)
+        cond_dict[Symbol("YS")] = site_fraction_product
+
+        # Reconstruct site fractions in sublattice form from points
+        # Required so we can identify which sublattices have interactions
+        points_idxs = [0] + np.cumsum(sublattice_dof).tolist()
+        site_fractions = []
+        for subl_idx in range(len(points_idxs)-1):
+            subl_site_fractions = points[points_idxs[subl_idx]:points_idxs[subl_idx+1]]
+            for species_name, site_frac in zip(configuration_tuple[subl_idx], subl_site_fractions):
+                cond_dict[v.Y(phase_name, subl_idx, species_name)] = site_frac
+            site_fractions.append(subl_site_fractions.tolist())
+
+        # Z (binary) or V_I, V_J, V_K (ternary) interaction products
+        interaction_product = calc_interaction_product(site_fractions)
+        if hasattr(interaction_product, "__len__"):
+            # Ternary interaction
+            assert len(interaction_product) == 3
+            cond_dict[Symbol("V_I")] = interaction_product[0]
+            cond_dict[Symbol("V_J")] = interaction_product[1]
+            cond_dict[Symbol("V_K")] = interaction_product[2]
+        else:
+            cond_dict[Symbol("Z")] = interaction_product
 
-    eq_thermochemical_data = []  # 1:1 correspondence with each dataset
-    for data in desired_data:
-        eq_thermochemical_data.append(build_eqpropdata(data, dbf, model=model, parameters=parameters, data_weight_dict=data_weight_dict))
-    return eq_thermochemical_data
+        sample_condition_dicts.append(cond_dict)
+    return sample_condition_dicts
 
 
-def calc_prop_differences(eqpropdata: EqPropData,
-                          parameters: np.ndarray,
-                          approximate_equilibrium: Optional[bool] = False,
-                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+def get_thermochemical_data(dbf, comps, phases, datasets, parameters, model=None, weight_dict=None, symbols_to_fit=None):
     """
-    Calculate differences between the expected and calculated values for a property
 
     Parameters
     ----------
-    eqpropdata : EqPropData
-        Data corresponding to equilibrium calculations for a single datasets.
-    parameters : np.ndarray
-        Array of parameters to fit. Must be sorted in the same symbol sorted
-        order used to create the PhaseRecords.
-    approximate_equilibrium : Optional[bool]
-        Whether or not to use an approximate version of equilibrium that does
-        not refine the solution and uses ``starting_point`` instead.
+    dbf : pycalphad.Database
+        Database to consider
+    comps : list
+        List of active component names
+    phases : list
+        List of phases to consider
+    datasets : espei.utils.PickleableTinyDB
+        Datasets that contain single phase data
+    model : Optional[Dict[str, Type[Model]]]
+        Dictionary phase names to pycalphad Model classes.
+    weight_dict : dict
+        Dictionary of weights for each data type, e.g. {'HM': 200, 'SM': 2}
+    symbols_to_fit : list
+        Parameters to fit. Used to build the models and PhaseRecords.
 
     Returns
     -------
-    Tuple[np.ndarray, np.ndarray]
-        Pair of
-        * differences between the calculated property and expected property
-        * weights for this dataset
-
+    list
+        List of data dictionaries to iterate over
     """
+    
+    params_keys = []
+
+    # XXX: This mutates the global pycalphad namespace
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        # XXX: Mutates argument to function
+        dbf.symbols.pop(key,None)
         
-    dbf = eqpropdata.dbf
-    species = eqpropdata.species
-    phases = eqpropdata.phases
-    pot_conds = eqpropdata.potential_conds
-    models = eqpropdata.models
-    phase_record_factory = eqpropdata.phase_record_factory
-    #update_phase_record_parameters(phase_record_factory, parameters)
-    params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
-    output = as_property(eqpropdata.output)
-    weights = np.array(eqpropdata.weight, dtype=np.float64)
-    samples = np.array(eqpropdata.samples, dtype=np.float64)
-    wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory)
-
-    calculated_data = []
-    gradient_data = []
-    for comp_conds in eqpropdata.composition_conds:
-        cond_dict = OrderedDict(**pot_conds, **comp_conds, **params_dict)
-        wks.conditions = cond_dict
-        #wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
-        wks.models = models
-        #wks.phase_record_factory = phase_record_factory
-        vals = wks.get(output)
-        calculated_data.extend(np.atleast_1d(vals).tolist())
-        gradient_props = [JanssonDerivative(output, key) for key in params_dict]
-        gradients = wks.get(*gradient_props)
-        gradient_data.append(gradients)
-
-    calculated_data = np.array(calculated_data, dtype=np.float64)
-    gradient_data = np.array(gradient_data, dtype=np.float64)
-
-    assert calculated_data.shape == samples.shape, f"Calculated data shape {calculated_data.shape} does not match samples shape {samples.shape}"
-    assert calculated_data.shape == weights.shape, f"Calculated data shape {calculated_data.shape} does not match weights shape {weights.shape}"
-    differences = calculated_data - samples
-    _log.debug('Output: %s differences: %s, weights: %s, reference: %s', output, differences, weights, eqpropdata.reference)
-    return differences, weights, gradient_data
-
-
-def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Sequence[EqPropData],
-                                                     parameters: np.ndarray,
-                                                     approximate_equilibrium: Optional[bool] = False,
-                                                     ) -> Tuple[float, List[float]]:
+    # phase by phase, then property by property, then by model exclusions
+    if weight_dict is None:
+        weight_dict = {}
+
+    if model is None:
+        model = {}
+
+    if symbols_to_fit is not None:
+        symbols_to_fit = sorted(symbols_to_fit)
+    else:
+        symbols_to_fit = database_symbols_to_fit(dbf)
+
+    species_comps = set(unpack_species(dbf, comps))
+
+    # estimated from NIST TRC uncertainties
+    property_std_deviation = {
+        'HM': 500.0/weight_dict.get('HM', 1.0),  # J/mol
+        'SM':   0.2/weight_dict.get('SM', 1.0),  # J/K-mol
+        'CPM':  0.2/weight_dict.get('CPM', 1.0),  # J/K-mol
+    }
+    properties = ['HM_FORM', 'SM_FORM', 'CPM_FORM', 'HM_MIX', 'SM_MIX', 'CPM_MIX']
+
+    ref_states = []
+    for el in get_pure_elements(dbf, comps):
+        ref_state = ReferenceState(el, dbf.refstates[el]['phase'])
+        ref_states.append(ref_state)
+    all_data_dicts = []
+    for phase_name in phases:
+        if phase_name not in dbf.phases:
+            continue
+        # phase constituents are Species objects, so we need to be doing intersections with those
+        phase_constituents = dbf.phases[phase_name].constituents
+        # phase constituents must be filtered to only active:
+        constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species_comps))] for subl_constituents in phase_constituents]
+        for prop in properties:
+            desired_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=(where('solver').exists()))
+            if len(desired_data) == 0:
+                continue
+            unique_exclusions = set([tuple(sorted(set(d.get('excluded_model_contributions', [])))) for d in desired_data])
+            for exclusion in unique_exclusions:
+                data_dict = {
+                    'phase_name': phase_name,
+                    'prop': prop,
+                    # needs the following keys to be added:
+                    # species, calculate_dict, phase_record_factory, model, output, weights
+                }
+                # get all the data with these model exclusions
+                if exclusion == tuple([]):
+                    exc_search = (~where('excluded_model_contributions').exists()) & (where('solver').exists())
+                else:
+                    exc_search = (where('excluded_model_contributions').test(lambda x: tuple(sorted(x)) == exclusion)) & (where('solver').exists())
+                curr_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=exc_search)
+                curr_data = filter_sublattice_configurations(curr_data, constituents)
+                curr_data = filter_temperatures(curr_data)
+                calculate_dict = get_prop_samples(curr_data, constituents)
+                model_cls = model.get(phase_name, Model)
+                #mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit)
+                mod = model_cls(dbf, comps, phase_name)
+                if prop.endswith('_FORM'):
+                    output = ''.join(prop.split('_')[:-1])+"R"
+                    mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion})
+                else:
+                    output = prop
+                for contrib in exclusion:
+                    mod.models[contrib] = symengine.S.Zero
+                    try:
+                        # TODO: we can remove this try/except block when pycalphad 0.8.5
+                        # is released with these internal API changes
+                        mod.endmember_reference_model.models[contrib] = symengine.S.Zero
+                    except AttributeError:
+                        mod.reference_model.models[contrib] = symengine.S.Zero
+                model_dict = {phase_name: mod}
+                species = sorted(unpack_species(dbf, comps), key=str)
+                data_dict['species'] = species
+                statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)}
+                statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0])))
+                phase_record_factory = PhaseRecordFactory(dbf, species, statevar_dict, model_dict,
+                                                   parameters={s: 0 for s in symbols_to_fit})
+                str_statevar_dict = OrderedDict((str(k), vals) for k, vals in statevar_dict.items())
+                data_dict['str_statevar_dict'] = str_statevar_dict
+                data_dict['phase_record_factory'] = phase_record_factory
+                data_dict['calculate_dict'] = calculate_dict
+                data_dict['model'] = model_dict
+                data_dict['output'] = output
+                data_dict['weights'] = np.array(property_std_deviation[prop.split('_')[0]])/np.array(calculate_dict.pop('weights'))
+                data_dict['constituents'] = constituents
+                all_data_dicts.append(data_dict)
+    return all_data_dicts
+
+
+def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters):
+    species = calc_data['species']
+    phase_name = calc_data['phase_name']
+    models = calc_data['model']  # Dict[PhaseName: Model]
+    output = calc_data['output']
+    #phase_record_factory = calc_data['phase_record_factory']
+    sample_values = calc_data['calculate_dict']['values']
+    str_statevar_dict = calc_data['str_statevar_dict']
+
+    constituent_list = []
+    sublattice_list = []
+    counter = 0
+    for sublattice in calc_data['constituents']:
+        for const in sublattice:
+            sublattice_list.append(counter)
+            constituent_list.append(const)
+        counter = counter + 1
+
+    differences = []
+    gradients = []
+    for index in range(len(sample_values)):
+        cond_dict = {**parameters}
+        for sv_key, sv_val in str_statevar_dict.items():
+            cond_dict.update({sv_key: sv_val[index]})
+
+        # Build internal DOF as if they were used in conditions
+        dof = {}
+        for site_frac in range(len(constituent_list)):
+            comp = constituent_list[site_frac]
+            occupancy = calc_data['calculate_dict']['points'][index,site_frac]
+            sublattice = sublattice_list[site_frac]
+            dof.update({v.Y(phase_name,sublattice,comp): occupancy})
+
+        # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species
+        # Build composition conditions, probably not necessary given that we don't actually solve anything, but still useful in terms of derivatives probably.
+        active_pure_elements = [list(x.constituents.keys()) for x in species]
+        active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"})
+        ind_comps = len(active_pure_elements) - 1
+        for comp in active_pure_elements:
+            if v.Species(comp) != v.Species('VA') and ind_comps > 0:
+                cond_dict[v.X(comp)] = float(models[phase_name].moles(comp).xreplace(dof))
+                ind_comps = ind_comps - 1
+        # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why.
+        # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered.
+        # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid.
+        #wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, solver=NoSolveSolver())
+        wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, solver=NoSolveSolver())
+        # We then get a composition set and we use a special "NoSolveSolver" to
+        # ensure that we don't change from the data-specified DOF.
+        compset = find_first_compset(phase_name, wks)
+        new_sitefracs = np.array([sf for _, sf in sorted(dof.items(), key=lambda y: (y[0].phase_name, y[0].sublattice_index, y[0].species.name))])
+        new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)])  # no updates expected
+        compset.update(new_sitefracs, 1.0, new_statevars)
+        iso_phase = IsolatedPhase(compset, wks=wks)
+        iso_phase.solver = NoSolveSolver()
+        results = wks.get(iso_phase(output))
+        #gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
+        #grads = wks.get(*gradient_props) # this is giving NAN for some parameters
+        
+        grads = []
+        for key in parameters:
+            grads.append(wks.get(iso_phase(output+'.'+key)))
+                
+        sample_differences = results - sample_values[index]
+        differences.append(sample_differences)
+        gradients.append(grads)
+    return differences, gradients
+
+
+def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], dbf, parameters=None):
     """
-    Calculate the total equilibrium thermochemical probability for all EqPropData
+    Calculate the weighted single phase error in the Database
 
     Parameters
     ----------
-    eq_thermochemical_data : Sequence[EqPropData]
-        List of equilibrium thermochemical data corresponding to the datasets.
+    thermochemical_data : list
+        List of thermochemical data dicts
     parameters : np.ndarray
-        Values of parameters for this iteration to be updated in PhaseRecords.
-    approximate_equilibrium : Optional[bool], optional
-
-    eq_thermochemical_data : Sequence[EqPropData]
+        Array of parameters to calculate the error with.
 
     Returns
     -------
     float
-        Sum of log-probability for all thermochemical data.
+        A single float of the residual sum of square errors
 
     """
-    if len(eq_thermochemical_data) == 0:
-        return 0.0, np.zeros(len(parameters))
-
-    differences = []
-    weights = []
-    gradients = []
-    for eqpropdata in eq_thermochemical_data:
-        diffs, wts, grads = calc_prop_differences(eqpropdata, parameters, approximate_equilibrium)
-        if np.any(np.isinf(diffs) | np.isnan(diffs)):
-            # NaN or infinity are assumed calculation failures. If we are
-            # calculating log-probability, just bail out and return -infinity.
-            return -np.inf, np.zeros(len(parameters))
-        differences.append(diffs)
-        weights.append(wts)
-        gradients.append(grads)
-
-    differences = np.concatenate(differences, axis=0)
-    weights = np.concatenate(weights, axis=0)
-    gradients = np.concatenate(gradients, axis=0)
-    probs = norm(loc=0.0, scale=weights).logpdf(differences)
-    grad_probs = -differences*gradients.T/weights**2
-    if grad_probs.ndim == 1:
-        grad_probs_sum = np.sum(grad_probs, axis=0)
-    else:
-        grad_probs_sum = np.sum(grad_probs, axis=1)
-    return np.sum(probs), grad_probs_sum
+    if parameters is None:
+        parameters = {}
+
+    prob_error = 0.0
+    overall_grad = np.zeros(len(parameters))
+    for data in thermochemical_data:
+        phase_name = data['phase_name']
+        sample_values = data['calculate_dict']['values']
+        differences, gradients = compute_fixed_configuration_property_differences(dbf, data, parameters)
+        differences = np.array(differences)
+        gradients = np.array(gradients)
+        probabilities = norm.logpdf(differences, loc=0, scale=data['weights'])
+        prob_sum = np.sum(probabilities)
+        _log.debug("%s(%s) - probability sum: %0.2f, data: %s, differences: %s, probabilities: %s, references: %s", data['prop'], phase_name, prob_sum, sample_values, differences, probabilities, data['calculate_dict']['references'])
+        prob_error += prob_sum
+        derivative = -differences*gradients.T/data['weights']**2
+        if derivative.ndim == 1:
+            grad_prob = np.sum(derivative, axis = 0)
+        else:
+            grad_prob = np.sum(derivative, axis=1)
+        overall_grad += grad_prob
+    return prob_error, overall_grad
 
 
-class EquilibriumPropertyResidual(ResidualFunction):
+class FixedConfigurationPropertyResidual(ResidualFunction):
     def __init__(
         self,
         database: Database,
@@ -324,25 +510,29 @@ def __init__(
         phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
         if symbols_to_fit is None:
             symbols_to_fit = database_symbols_to_fit(database)
-        # okay if parameters are initialized to zero, we only need the symbol names
         parameters = dict(zip(symbols_to_fit, [0]*len(symbols_to_fit)))
-        self.property_data = get_equilibrium_thermochemical_data(database, comps, phases, datasets, model_dict, parameters, data_weight_dict=self.weight)
+        self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, parameters, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit)
+        self._symbols_to_fit = symbols_to_fit
+        self.dbf = database
 
     def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[float]]:
         residuals = []
         weights = []
-        for data in self.property_data:
-            dataset_residuals, dataset_weights = calc_prop_differences(data, parameters)
-            residuals.extend(dataset_residuals.tolist())
-            weights.extend(dataset_weights.tolist())
+        for data in self.thermochemical_data:
+            dataset_residuals = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters)))
+            residuals.extend(dataset_residuals)
+            dataset_weights = np.asarray(data["weights"], dtype=float).flatten().tolist()
+            if len(dataset_weights) != len(dataset_residuals):
+                # we need to broadcast the residuals. For now, assume the weights are a scalar, since that's all that's supported
+                assert len(dataset_weights) == 1
+                dataset_weights = [float(dataset_weights[0]) for _ in range(len(dataset_residuals))]
+            weights.extend(dataset_weights)
         return residuals, weights
 
     def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
-        likelihood, gradients = calculate_equilibrium_thermochemical_probability(self.property_data, parameters)
-        return likelihood, gradients
-
-
-residual_function_registry.register(EquilibriumPropertyResidual)
+        parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())}
+        likelihood, gradient = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters)
+        return likelihood, gradient
 
 
-residual_function_registry.register(EquilibriumPropertyResidual)
+residual_function_registry.register(FixedConfigurationPropertyResidual)

From 5299e4a2b9bba74b007cfb69a631eba42a667f3d Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Mon, 4 Nov 2024 16:32:59 -0600
Subject: [PATCH 11/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index 54b9c60d..c929b72c 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -310,17 +310,23 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
         #print(constrained_energy)
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex_comp_estimate) - constrained_energy
-        #print(driving_force)
+        #constrained_energy_gradient = []
+        #for key in parameter_dict:
+        #    constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
+            
+        #gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
+        #constrained_energy_gradient = wks.get(*gradient_params)
+        
+        #ip = IsolatedPhase(current_phase, wks=wks)
+        #gradient_params = [JanssonDerivative(ip('GM'), key) for key in parameter_dict]
+        #constrained_energy_gradient = wks.get(*gradient_params)
+        
+        ip = IsolatedPhase(current_phase, wks=wks)
         constrained_energy_gradient = []
         for key in parameter_dict:
-            constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
-            
-        gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
-        gradients = wks.get(*gradient_params)
-        constrained_energy_gradient1 = gradients
+            constrained_energy_gradient.append(wks.get(ip('GM.'+key)))
         
-        #print(constrained_energy_gradient)
-        #print(constrained_energy_gradient1)
+    
         driving_force_gradient = np.squeeze(np.matmul(vertex_comp_estimate,target_hyperplane_chempots_grads) - constrained_energy_gradient)
         
     elif vertex.is_disordered:

From 2ca3fc582c3edf333f3cd541dcde7de303465bad Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 6 Nov 2024 18:05:42 -0600
Subject: [PATCH 12/27] Update non_equilibrium_thermochemical_error.py

---
 .../non_equilibrium_thermochemical_error.py           | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index b16ad35a..eb554474 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -1,4 +1,3 @@
-"""
 Calculate error due to thermochemical quantities: heat capacity, entropy, enthalpy.
 """
 
@@ -431,8 +430,12 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         iso_phase = IsolatedPhase(compset, wks=wks)
         iso_phase.solver = NoSolveSolver()
         results = wks.get(iso_phase(output))
-        gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
-        grads = wks.get(*gradient_props) # this is giving NAN for some parameters
+        #gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
+        #grads = wks.get(*gradient_props) # this is giving NAN for some parameters
+        
+        grads = []
+        for key in parameters:
+            grads.append(wks.get(iso_phase(output+'.'+key)))
                 
         sample_differences = results - sample_values[index]
         differences.append(sample_differences)
@@ -515,7 +518,7 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl
         residuals = []
         weights = []
         for data in self.thermochemical_data:
-            dataset_residuals = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters)))
+            dataset_residuals, grads = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters)))
             residuals.extend(dataset_residuals)
             dataset_weights = np.asarray(data["weights"], dtype=float).flatten().tolist()
             if len(dataset_weights) != len(dataset_residuals):

From 9b52326a17a24bd2ff4c2532fb0b412ec76a8a47 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 6 Nov 2024 18:06:45 -0600
Subject: [PATCH 13/27] Update equilibrium_thermochemical_error.py

---
 .../equilibrium_thermochemical_error.py       | 695 +++++++-----------
 1 file changed, 251 insertions(+), 444 deletions(-)

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
index 233033e9..a01c000d 100644
--- a/espei/error_functions/equilibrium_thermochemical_error.py
+++ b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -1,491 +1,305 @@
 """
-Calculate error due to thermochemical quantities: heat capacity, entropy, enthalpy.
+Calculate error due to equilibrium thermochemical properties.
 """
 
 import logging
 from collections import OrderedDict
-from typing import Any, Dict, List, NewType, Optional, Tuple, Union
+from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Type, Union
 
-import symengine
-from scipy.stats import norm
 import numpy as np
 import numpy.typing as npt
-from symengine import Symbol
+import tinydb
 from tinydb import where
-from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
+from scipy.stats import norm
 from pycalphad import Database, Model, ReferenceState, variables as v
-from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases
-from pycalphad import Workspace
-from pycalphad.property_framework import IsolatedPhase, JanssonDerivative
-from pycalphad.property_framework.metaproperties import find_first_compset
-from pycalphad.core.solver import Solver, SolverResult
-
+from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
+from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
+from pycalphad import Workspace, as_property
+from pycalphad.property_framework import JanssonDerivative
 
-from espei.datasets import Dataset
-from espei.core_utils import ravel_conditions, get_prop_data, filter_temperatures
-from espei.parameter_selection.redlich_kister import calc_interaction_product
+from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
 from espei.phase_models import PhaseModelSpecification
-from espei.sublattice_tools import canonicalize, recursive_tuplify, tuplify
+from espei.shadow_functions import update_phase_record_parameters
 from espei.typing import SymbolName
-from espei.utils import database_symbols_to_fit, PickleableTinyDB
-from .residual_base import ResidualFunction, residual_function_registry
+from espei.utils import PickleableTinyDB, database_symbols_to_fit
 
 _log = logging.getLogger(__name__)
 
-class NoSolveSolver(Solver):
-    def solve(self, composition_sets, conditions):
-        """
-        Initialize a solver, but don't actually do anything - i.e. return the SolverResult as if the calculation converged.
-
-        """
-        spec = self.get_system_spec(composition_sets, conditions)
-        self._fix_state_variables_in_compsets(composition_sets, conditions)
-        state = spec.get_new_state(composition_sets)
-        # DO NOT: converged = spec.run_loop(state, 1000)
-
-        if self.remove_metastable:
-            phase_idx = 0
-            compsets_to_remove = []
-            for compset in composition_sets:
-                # Mark unstable phases for removal
-                if compset.NP <= 0.0 and not compset.fixed:
-                    compsets_to_remove.append(int(phase_idx))
-                phase_idx += 1
-            # Watch removal order here, as the indices of composition_sets are changing!
-            for idx in reversed(compsets_to_remove):
-                del composition_sets[idx]
-
-        phase_amt = [compset.NP for compset in composition_sets]
-
-        x = composition_sets[0].dof
-        state_variables = composition_sets[0].phase_record.state_variables
-        num_statevars = len(state_variables)
-        for compset in composition_sets[1:]:
-            x = np.r_[x, compset.dof[num_statevars:]]
-        x = np.r_[x, phase_amt]
-        chemical_potentials = np.array(state.chemical_potentials)
-
-        if self.verbose:
-            print('Chemical Potentials', chemical_potentials)
-            print(np.asarray(x))
-        return SolverResult(converged=True, x=x, chemical_potentials=chemical_potentials)
-
-
-# TODO: make into an object similar to how ZPF data works?
-FixedConfigurationCalculationData = NewType("FixedConfigurationCalculationData", Dict[str, Any])
-
-def filter_sublattice_configurations(desired_data: List[Dataset], subl_model) -> List[Dataset]:  # TODO: symmetry support
-    """Modify the desired_data to remove any configurations that cannot be represented by the sublattice model."""
-    subl_model_sets = [set(subl) for subl in subl_model]
-    for data in desired_data:
-        matching_configs = []  # binary mask of whether a configuration is represented by the sublattice model
-        for config in data['solver']['sublattice_configurations']:
-            config = recursive_tuplify(canonicalize(config, None))
-            if (
-                len(config) == len(subl_model) and
-                all(subl.issuperset(tuplify(config_subl)) for subl, config_subl in zip(subl_model_sets, config))
-            ):
-                matching_configs.append(True)
-            else:
-                matching_configs.append(False)
-        matching_configs = np.asarray(matching_configs, dtype=np.bool_)
-
-        # Rewrite output values with filtered data
-        data['values'] = np.array(data['values'], dtype=np.float64)[..., matching_configs]
-        data['solver']['sublattice_configurations'] = np.array(data['solver']['sublattice_configurations'], dtype=np.object_)[matching_configs].tolist()
-        if 'sublattice_occupancies' in data['solver']:
-            data['solver']['sublattice_occupancies'] = np.array(data['solver']['sublattice_occupancies'], dtype=np.object_)[matching_configs].tolist()
-    return desired_data
-
-
-def calculate_points_array(phase_constituents, configuration, occupancies=None):
-    """
-    Calculate the points array to use in pycalphad calculate calls.
-
-    Converts the configuration data (and occupancies for mixing data) into the
-    points array by looking up the indices in the active phase constituents.
 
-    Parameters
-    ----------
-    phase_constituents : list
-        List of active constituents in a phase
-    configuration : list
-        List of the sublattice configuration
-    occupancies : list
-        List of sublattice occupancies. Required for mixing sublattices, otherwise takes no effect.
-
-    Returns
-    -------
-    numpy.ndarray
-
-    Notes
-    -----
-    Errors will be raised if components in the configuration are not in the
-    corresponding phase constituents sublattice.
+EqPropData = NamedTuple('EqPropData', (('dbf', Database),
+                                       ('species', Sequence[v.Species]),
+                                       ('phases', Sequence[str]),
+                                       ('potential_conds', Dict[v.StateVariable, float]),
+                                       ('composition_conds', Sequence[Dict[v.X, float]]),
+                                       ('models', Dict[str, Model]),
+                                       ('params_keys', Dict[str, float]),
+                                       ('phase_record_factory', PhaseRecordFactory),
+                                       ('output', str),
+                                       ('samples', np.ndarray),
+                                       ('weight', np.ndarray),
+                                       ('reference', str),
+                                       ))
+
+
+def build_eqpropdata(data: tinydb.database.Document,
+                     dbf: Database,
+                     model: Optional[Dict[str, Type[Model]]] = None,
+                     parameters: Optional[Dict[str, float]] = None,
+                     data_weight_dict: Optional[Dict[str, float]] = None
+                     ) -> EqPropData:
     """
-    # pad the occupancies for zipping if none were passed (the case for non-mixing)
-    if occupancies is None:
-        occupancies = [0] * len(configuration)
-
-    # construct the points array from zeros
-    points = np.zeros(sum(len(subl) for subl in phase_constituents))
-    current_subl_idx = 0  # index that marks the beginning of the sublattice
-    for phase_subl, config_subl, subl_occupancies in zip(phase_constituents, configuration, occupancies):
-        phase_subl = list(phase_subl)
-        if isinstance(config_subl, (tuple, list)):
-            # we have mixing on the sublattice
-            for comp, comp_occupancy in zip(config_subl, subl_occupancies):
-                points[current_subl_idx + phase_subl.index(comp)] = comp_occupancy
-        else:
-            points[current_subl_idx + phase_subl.index(config_subl)] = 1
-        current_subl_idx += len(phase_subl)
-    return points
-
-
-def get_prop_samples(desired_data, constituents):
-    """
-    Return data values and the conditions to calculate them using pycalphad.calculate
+    Build EqPropData for the calculations corresponding to a single dataset.
 
     Parameters
     ----------
-    desired_data : List[Dict[str, Any]]
-        List of dataset dictionaries that contain the values to sample
-    constituents : List[List[str]]
-        Names of constituents in each sublattice.
+    data : tinydb.database.Document
+        Document corresponding to a single ESPEI dataset.
+    dbf : Database
+        Database that should be used to construct the `Model` and `PhaseRecord` objects.
+    model : Optional[Dict[str, Type[Model]]]
+        Dictionary phase names to pycalphad Model classes.
+    parameters : Optional[Dict[str, float]]
+        Mapping of parameter symbols to values.
+    data_weight_dict : Optional[Dict[str, float]]
+        Mapping of a data type (e.g. `HM` or `SM`) to a weight.
 
     Returns
     -------
-    Dict[str, Union[float, ArrayLike, List[float]]]
-        Dictionary of condition kwargs for pycalphad's calculate and the expected values
-
+    EqPropData
     """
-    # TODO: assumes T, P, N as conditions
-    # calculate needs points, state variable lists, and values to compare to
-    num_dof = sum(map(len, constituents))
-    calculate_dict = {
-        'N': np.array([]),
-        'P': np.array([]),
-        'T': np.array([]),
-        'points': np.atleast_2d([[]]).reshape(-1, num_dof),
-        'values': np.array([]),
-        'weights': [],
-        'references': [],
+    parameters = parameters if parameters is not None else {}
+    data_weight_dict = data_weight_dict if data_weight_dict is not None else {}
+    property_std_deviation = {
+        'HM': 500.0,  # J/mol
+        'SM':   0.2,  # J/K-mol
+        'CPM':  0.2,  # J/K-mol
     }
 
-    for datum in desired_data:
-        # extract the data we care about
-        datum_T = datum['conditions']['T']
-        datum_P = datum['conditions']['P']
-        # TODO: fix this when N different from 1 allowed in pycalphad
-        datum_N = np.full_like(datum['values'], 1.0)
-        configurations = datum['solver']['sublattice_configurations']
-        occupancies = datum['solver'].get('sublattice_occupancies')
-        values = np.array(datum['values'])
-        if values.size == 0:
-            # Skip any data that don't have any values left (e.g. after filtering)
-            continue
-        # Broadcast the weights to the shape of the values. This ensures that
-        # the sizes of the weights and values are the same, which is important
-        # because they are flattened later (so the shape information is lost).
-        weights = np.broadcast_to(np.asarray(datum.get('weight', 1.0)), values.shape)
-
-        # broadcast and flatten the conditions arrays
-        P, T, N = ravel_conditions(values, datum_P, datum_T, datum_N)
-        if occupancies is None:
-            occupancies = [None] * len(configurations)
-
-        # calculate the points arrays, should be 2d array of points arrays
-        points = np.array([calculate_points_array(constituents, config, occup) for config, occup in zip(configurations, occupancies)])
-        assert values.shape == weights.shape, f"Values data shape {values.shape} does not match weights shape {weights.shape}"
-
-        # add everything to the calculate_dict
-        calculate_dict['P'] = np.concatenate([calculate_dict['P'], P])
-        calculate_dict['T'] = np.concatenate([calculate_dict['T'], T])
-        calculate_dict['N'] = np.concatenate([calculate_dict['N'], N])
-        calculate_dict['points'] = np.concatenate([calculate_dict['points'], np.tile(points, (values.shape[0]*values.shape[1], 1))], axis=0)
-        calculate_dict['values'] = np.concatenate([calculate_dict['values'], values.flatten()])
-        calculate_dict['weights'].extend(weights.flatten())
-        calculate_dict['references'].extend([datum.get('reference', "") for _ in range(values.flatten().size)])
-    return calculate_dict
-
-
-def get_sample_condition_dicts(calculate_dict: Dict[Any, Any], configuration_tuple: Tuple[Union[str, Tuple[str]]], phase_name: str) -> List[Dict[Symbol, float]]:
-    sublattice_dof = list(map(len, configuration_tuple))
-    sample_condition_dicts = []
-    for sample_idx in range(calculate_dict["values"].size):
-        cond_dict = {}
-        points = calculate_dict["points"][sample_idx, :]
-
-        # T and P
-        cond_dict[v.T] = calculate_dict["T"][sample_idx]
-        cond_dict[v.P] = calculate_dict["P"][sample_idx]
-
-        # YS site fraction product
-        site_fraction_product = np.prod(points)
-        cond_dict[Symbol("YS")] = site_fraction_product
-
-        # Reconstruct site fractions in sublattice form from points
-        # Required so we can identify which sublattices have interactions
-        points_idxs = [0] + np.cumsum(sublattice_dof).tolist()
-        site_fractions = []
-        for subl_idx in range(len(points_idxs)-1):
-            subl_site_fractions = points[points_idxs[subl_idx]:points_idxs[subl_idx+1]]
-            for species_name, site_frac in zip(configuration_tuple[subl_idx], subl_site_fractions):
-                cond_dict[v.Y(phase_name, subl_idx, species_name)] = site_frac
-            site_fractions.append(subl_site_fractions.tolist())
-
-        # Z (binary) or V_I, V_J, V_K (ternary) interaction products
-        interaction_product = calc_interaction_product(site_fractions)
-        if hasattr(interaction_product, "__len__"):
-            # Ternary interaction
-            assert len(interaction_product) == 3
-            cond_dict[Symbol("V_I")] = interaction_product[0]
-            cond_dict[Symbol("V_J")] = interaction_product[1]
-            cond_dict[Symbol("V_K")] = interaction_product[2]
-        else:
-            cond_dict[Symbol("Z")] = interaction_product
-
-        sample_condition_dicts.append(cond_dict)
-    return sample_condition_dicts
+    #params_keys, _ = extract_parameters(parameters)
+    
+    params_keys = []
 
+    # XXX: This mutates the global pycalphad namespace
+    for key in parameters.keys():
+        if not hasattr(v, key):
+            setattr(v, key, v.IndependentPotential(key))
+        params_keys.append(getattr(v, key))
+        # XXX: Mutates argument to function
+        dbf.symbols.pop(key,None)
 
-def get_thermochemical_data(dbf, comps, phases, datasets, parameters, model=None, weight_dict=None, symbols_to_fit=None):
+    data_comps = list(set(data['components']).union({'VA'}))
+    species = sorted(unpack_species(dbf, data_comps), key=str)
+    data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
+    #models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
+    models = instantiate_models(dbf, species, data_phases, model=model)
+    output = data['output']
+    property_output = output.split('_')[0]  # property without _FORM, _MIX, etc.
+    samples = np.array(data['values']).flatten()
+    reference = data.get('reference', '')
+
+    # Models are now modified in response to the data from this data
+    # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below
+    if 'reference_states' in data:
+        property_output = output[:-1] if output.endswith('R') else output  # unreferenced model property so we can tell shift_reference_state what to build.
+        reference_states = []
+        for el, vals in data['reference_states'].items():
+            reference_states.append(ReferenceState(v.Species(el), vals['phase'], fixed_statevars=vals.get('fixed_state_variables')))
+        for mod in models.values():
+            mod.shift_reference_state(reference_states, dbf, output=(property_output,))
+
+    data['conditions'].setdefault('N', 1.0)  # Add default for N. Nothing else is supported in pycalphad anyway.
+    pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')])
+    comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])
+
+    phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds, **parameters}, models)
+    #wks = Workspace(dbf, species, data_phases, {**pot_conds, **comp_conds, **parameters}, models=model)
+    #phase_record_factory = wks.phase_record_factory
+
+    # Now we need to unravel the composition conditions
+    # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
+    # composition conditions are only broadcast against the potentials, not
+    # each other. Each individual composition needs to be computed
+    # independently, since broadcasting over composition cannot be turned off
+    # in pycalphad.
+    rav_comp_conds = [OrderedDict(zip(comp_conds.keys(), pt_comps)) for pt_comps in zip(*comp_conds.values())]
+
+    # Build weights, should be the same size as the values
+    total_num_calculations = len(rav_comp_conds)*np.prod([len(vals) for vals in pot_conds.values()])
+    dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations)
+    weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten()
+
+    return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference)
+
+
+def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str],
+                                        phases: Sequence[str],
+                                        datasets: PickleableTinyDB,
+                                        model: Optional[Dict[str, Model]] = None,
+                                        parameters: Optional[Dict[str, float]] = None,
+                                        data_weight_dict: Optional[Dict[str, float]] = None,
+                                        ) -> Sequence[EqPropData]:
     """
+    Get all the EqPropData for each matching equilibrium thermochemical dataset in the datasets
 
     Parameters
     ----------
-    dbf : pycalphad.Database
-        Database to consider
-    comps : list
-        List of active component names
-    phases : list
-        List of phases to consider
-    datasets : espei.utils.PickleableTinyDB
+    dbf : Database
+        Database with parameters to fit
+    comps : Sequence[str]
+        List of pure element components used to find matching datasets.
+    phases : Sequence[str]
+        List of phases used to search for matching datasets.
+    datasets : PickleableTinyDB
         Datasets that contain single phase data
     model : Optional[Dict[str, Type[Model]]]
         Dictionary phase names to pycalphad Model classes.
-    weight_dict : dict
-        Dictionary of weights for each data type, e.g. {'HM': 200, 'SM': 2}
-    symbols_to_fit : list
-        Parameters to fit. Used to build the models and PhaseRecords.
+    parameters : Optional[Dict[str, float]]
+        Mapping of parameter symbols to values.
+    data_weight_dict : Optional[Dict[str, float]]
+        Mapping of a data type (e.g. `HM` or `SM`) to a weight.
+
+    Notes
+    -----
+    Found datasets will be subsets of the components and phases. Equilibrium
+    thermochemical data is assumed to be any data that does not have the
+    `solver` key, and does not have an output of `ZPF` or `ACR` (which
+    correspond to different data types than can be calculated here.)
 
     Returns
     -------
-    list
-        List of data dictionaries to iterate over
+    Sequence[EqPropData]
     """
-    
-    params_keys = []
-
-    # XXX: This mutates the global pycalphad namespace
-    for key in parameters.keys():
-        if not hasattr(v, key):
-            setattr(v, key, v.IndependentPotential(key))
-        params_keys.append(getattr(v, key))
-        # XXX: Mutates argument to function
-        dbf.symbols.pop(key,None)
-        
-    # phase by phase, then property by property, then by model exclusions
-    if weight_dict is None:
-        weight_dict = {}
 
-    if model is None:
-        model = {}
+    desired_data = datasets.search(
+        # data that isn't ZPF or non-equilibrium thermochemical
+        (where('output') != 'ZPF') & (~where('solver').exists()) &
+        (where('output').test(lambda x: 'ACR' not in x)) &  # activity data not supported yet
+        (where('components').test(lambda x: set(x).issubset(comps))) &
+        (where('phases').test(lambda x: set(x).issubset(set(phases))))
+    )
 
-    if symbols_to_fit is not None:
-        symbols_to_fit = sorted(symbols_to_fit)
-    else:
-        symbols_to_fit = database_symbols_to_fit(dbf)
+    eq_thermochemical_data = []  # 1:1 correspondence with each dataset
+    for data in desired_data:
+        eq_thermochemical_data.append(build_eqpropdata(data, dbf, model=model, parameters=parameters, data_weight_dict=data_weight_dict))
+    return eq_thermochemical_data
 
-    species_comps = set(unpack_species(dbf, comps))
 
-    # estimated from NIST TRC uncertainties
-    property_std_deviation = {
-        'HM': 500.0/weight_dict.get('HM', 1.0),  # J/mol
-        'SM':   0.2/weight_dict.get('SM', 1.0),  # J/K-mol
-        'CPM':  0.2/weight_dict.get('CPM', 1.0),  # J/K-mol
-    }
-    properties = ['HM_FORM', 'SM_FORM', 'CPM_FORM', 'HM_MIX', 'SM_MIX', 'CPM_MIX']
-
-    ref_states = []
-    for el in get_pure_elements(dbf, comps):
-        ref_state = ReferenceState(el, dbf.refstates[el]['phase'])
-        ref_states.append(ref_state)
-    all_data_dicts = []
-    for phase_name in phases:
-        if phase_name not in dbf.phases:
-            continue
-        # phase constituents are Species objects, so we need to be doing intersections with those
-        phase_constituents = dbf.phases[phase_name].constituents
-        # phase constituents must be filtered to only active:
-        constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species_comps))] for subl_constituents in phase_constituents]
-        for prop in properties:
-            desired_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=(where('solver').exists()))
-            if len(desired_data) == 0:
-                continue
-            unique_exclusions = set([tuple(sorted(set(d.get('excluded_model_contributions', [])))) for d in desired_data])
-            for exclusion in unique_exclusions:
-                data_dict = {
-                    'phase_name': phase_name,
-                    'prop': prop,
-                    # needs the following keys to be added:
-                    # species, calculate_dict, phase_record_factory, model, output, weights
-                }
-                # get all the data with these model exclusions
-                if exclusion == tuple([]):
-                    exc_search = (~where('excluded_model_contributions').exists()) & (where('solver').exists())
-                else:
-                    exc_search = (where('excluded_model_contributions').test(lambda x: tuple(sorted(x)) == exclusion)) & (where('solver').exists())
-                curr_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=exc_search)
-                curr_data = filter_sublattice_configurations(curr_data, constituents)
-                curr_data = filter_temperatures(curr_data)
-                calculate_dict = get_prop_samples(curr_data, constituents)
-                model_cls = model.get(phase_name, Model)
-                #mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit)
-                mod = model_cls(dbf, comps, phase_name)
-                if prop.endswith('_FORM'):
-                    output = ''.join(prop.split('_')[:-1])+"R"
-                    mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion})
-                else:
-                    output = prop
-                for contrib in exclusion:
-                    mod.models[contrib] = symengine.S.Zero
-                    try:
-                        # TODO: we can remove this try/except block when pycalphad 0.8.5
-                        # is released with these internal API changes
-                        mod.endmember_reference_model.models[contrib] = symengine.S.Zero
-                    except AttributeError:
-                        mod.reference_model.models[contrib] = symengine.S.Zero
-                model_dict = {phase_name: mod}
-                species = sorted(unpack_species(dbf, comps), key=str)
-                data_dict['species'] = species
-                statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)}
-                statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0])))
-                phase_record_factory = PhaseRecordFactory(dbf, species, statevar_dict, model_dict,
-                                                   parameters={s: 0 for s in symbols_to_fit})
-                str_statevar_dict = OrderedDict((str(k), vals) for k, vals in statevar_dict.items())
-                data_dict['str_statevar_dict'] = str_statevar_dict
-                data_dict['phase_record_factory'] = phase_record_factory
-                data_dict['calculate_dict'] = calculate_dict
-                data_dict['model'] = model_dict
-                data_dict['output'] = output
-                data_dict['weights'] = np.array(property_std_deviation[prop.split('_')[0]])/np.array(calculate_dict.pop('weights'))
-                data_dict['constituents'] = constituents
-                all_data_dicts.append(data_dict)
-    return all_data_dicts
-
-
-def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters):
-    species = calc_data['species']
-    phase_name = calc_data['phase_name']
-    models = calc_data['model']  # Dict[PhaseName: Model]
-    output = calc_data['output']
-    #phase_record_factory = calc_data['phase_record_factory']
-    sample_values = calc_data['calculate_dict']['values']
-    str_statevar_dict = calc_data['str_statevar_dict']
-
-    constituent_list = []
-    sublattice_list = []
-    counter = 0
-    for sublattice in calc_data['constituents']:
-        for const in sublattice:
-            sublattice_list.append(counter)
-            constituent_list.append(const)
-        counter = counter + 1
+def calc_prop_differences(eqpropdata: EqPropData,
+                          parameters: np.ndarray,
+                          approximate_equilibrium: Optional[bool] = False,
+                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Calculate differences between the expected and calculated values for a property
 
-    differences = []
-    gradients = []
-    for index in range(len(sample_values)):
-        cond_dict = {**parameters}
-        for sv_key, sv_val in str_statevar_dict.items():
-            cond_dict.update({sv_key: sv_val[index]})
-
-        # Build internal DOF as if they were used in conditions
-        dof = {}
-        for site_frac in range(len(constituent_list)):
-            comp = constituent_list[site_frac]
-            occupancy = calc_data['calculate_dict']['points'][index,site_frac]
-            sublattice = sublattice_list[site_frac]
-            dof.update({v.Y(phase_name,sublattice,comp): occupancy})
-
-        # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species
-        # Build composition conditions, probably not necessary given that we don't actually solve anything, but still useful in terms of derivatives probably.
-        active_pure_elements = [list(x.constituents.keys()) for x in species]
-        active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"})
-        ind_comps = len(active_pure_elements) - 1
-        for comp in active_pure_elements:
-            if v.Species(comp) != v.Species('VA') and ind_comps > 0:
-                cond_dict[v.X(comp)] = float(models[phase_name].moles(comp).xreplace(dof))
-                ind_comps = ind_comps - 1
-        # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why.
-        # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered.
-        # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid.
-        #wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, solver=NoSolveSolver())
-        wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, solver=NoSolveSolver())
-        # We then get a composition set and we use a special "NoSolveSolver" to
-        # ensure that we don't change from the data-specified DOF.
-        compset = find_first_compset(phase_name, wks)
-        new_sitefracs = np.array([sf for _, sf in sorted(dof.items(), key=lambda y: (y[0].phase_name, y[0].sublattice_index, y[0].species.name))])
-        new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)])  # no updates expected
-        compset.update(new_sitefracs, 1.0, new_statevars)
-        iso_phase = IsolatedPhase(compset, wks=wks)
-        iso_phase.solver = NoSolveSolver()
-        results = wks.get(iso_phase(output))
-        #gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
-        #grads = wks.get(*gradient_props) # this is giving NAN for some parameters
-        
-        grads = []
-        for key in parameters:
-            grads.append(wks.get(iso_phase(output+'.'+key)))
-                
-        sample_differences = results - sample_values[index]
-        differences.append(sample_differences)
-        gradients.append(grads)
-    return differences, gradients
+    Parameters
+    ----------
+    eqpropdata : EqPropData
+        Data corresponding to equilibrium calculations for a single datasets.
+    parameters : np.ndarray
+        Array of parameters to fit. Must be sorted in the same symbol sorted
+        order used to create the PhaseRecords.
+    approximate_equilibrium : Optional[bool]
+        Whether or not to use an approximate version of equilibrium that does
+        not refine the solution and uses ``starting_point`` instead.
 
+    Returns
+    -------
+    Tuple[np.ndarray, np.ndarray]
+        Pair of
+        * differences between the calculated property and expected property
+        * weights for this dataset
 
-def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], dbf, parameters=None):
     """
-    Calculate the weighted single phase error in the Database
+        
+    dbf = eqpropdata.dbf
+    species = eqpropdata.species
+    phases = eqpropdata.phases
+    pot_conds = eqpropdata.potential_conds
+    models = eqpropdata.models
+    phase_record_factory = eqpropdata.phase_record_factory
+    #update_phase_record_parameters(phase_record_factory, parameters)
+    params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
+    output = as_property(eqpropdata.output)
+    weights = np.array(eqpropdata.weight, dtype=np.float64)
+    samples = np.array(eqpropdata.samples, dtype=np.float64)
+    wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory)
+
+    calculated_data = []
+    gradient_data = []
+    for comp_conds in eqpropdata.composition_conds:
+        cond_dict = OrderedDict(**pot_conds, **comp_conds, **params_dict)
+        wks.conditions = cond_dict
+        #wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
+        wks.models = models
+        #wks.phase_record_factory = phase_record_factory
+        vals = wks.get(output)
+        calculated_data.extend(np.atleast_1d(vals).tolist())
+        gradient_props = [JanssonDerivative(output, key) for key in params_dict]
+        gradients = wks.get(*gradient_props)
+        gradient_data.append(gradients)
+
+    calculated_data = np.array(calculated_data, dtype=np.float64)
+    gradient_data = np.array(gradient_data, dtype=np.float64)
+
+    assert calculated_data.shape == samples.shape, f"Calculated data shape {calculated_data.shape} does not match samples shape {samples.shape}"
+    assert calculated_data.shape == weights.shape, f"Calculated data shape {calculated_data.shape} does not match weights shape {weights.shape}"
+    differences = calculated_data - samples
+    _log.debug('Output: %s differences: %s, weights: %s, reference: %s', output, differences, weights, eqpropdata.reference)
+    return differences, weights, gradient_data
+
+
+def calculate_equilibrium_thermochemical_probability(eq_thermochemical_data: Sequence[EqPropData],
+                                                     parameters: np.ndarray,
+                                                     approximate_equilibrium: Optional[bool] = False,
+                                                     ) -> Tuple[float, List[float]]:
+    """
+    Calculate the total equilibrium thermochemical probability for all EqPropData
 
     Parameters
     ----------
-    thermochemical_data : list
-        List of thermochemical data dicts
+    eq_thermochemical_data : Sequence[EqPropData]
+        List of equilibrium thermochemical data corresponding to the datasets.
     parameters : np.ndarray
-        Array of parameters to calculate the error with.
+        Values of parameters for this iteration to be updated in PhaseRecords.
+    approximate_equilibrium : Optional[bool], optional
+
+    eq_thermochemical_data : Sequence[EqPropData]
 
     Returns
     -------
     float
-        A single float of the residual sum of square errors
+        Sum of log-probability for all thermochemical data.
 
     """
-    if parameters is None:
-        parameters = {}
-
-    prob_error = 0.0
-    overall_grad = np.zeros(len(parameters))
-    for data in thermochemical_data:
-        phase_name = data['phase_name']
-        sample_values = data['calculate_dict']['values']
-        differences, gradients = compute_fixed_configuration_property_differences(dbf, data, parameters)
-        differences = np.array(differences)
-        gradients = np.array(gradients)
-        probabilities = norm.logpdf(differences, loc=0, scale=data['weights'])
-        prob_sum = np.sum(probabilities)
-        _log.debug("%s(%s) - probability sum: %0.2f, data: %s, differences: %s, probabilities: %s, references: %s", data['prop'], phase_name, prob_sum, sample_values, differences, probabilities, data['calculate_dict']['references'])
-        prob_error += prob_sum
-        derivative = -differences*gradients.T/data['weights']**2
-        if derivative.ndim == 1:
-            grad_prob = np.sum(derivative, axis = 0)
-        else:
-            grad_prob = np.sum(derivative, axis=1)
-        overall_grad += grad_prob
-    return prob_error, overall_grad
+    if len(eq_thermochemical_data) == 0:
+        return 0.0, np.zeros(len(parameters))
+
+    differences = []
+    weights = []
+    gradients = []
+    for eqpropdata in eq_thermochemical_data:
+        diffs, wts, grads = calc_prop_differences(eqpropdata, parameters, approximate_equilibrium)
+        if np.any(np.isinf(diffs) | np.isnan(diffs)):
+            # NaN or infinity are assumed calculation failures. If we are
+            # calculating log-probability, just bail out and return -infinity.
+            return -np.inf, np.zeros(len(parameters))
+        differences.append(diffs)
+        weights.append(wts)
+        gradients.append(grads)
+
+    differences = np.concatenate(differences, axis=0)
+    weights = np.concatenate(weights, axis=0)
+    gradients = np.concatenate(gradients, axis=0)
+    probs = norm(loc=0.0, scale=weights).logpdf(differences)
+    grad_probs = -differences*gradients.T/weights**2
+    if grad_probs.ndim == 1:
+        grad_probs_sum = np.sum(grad_probs, axis=0)
+    else:
+        grad_probs_sum = np.sum(grad_probs, axis=1)
+    return np.sum(probs), grad_probs_sum
 
 
-class FixedConfigurationPropertyResidual(ResidualFunction):
+class EquilibriumPropertyResidual(ResidualFunction):
     def __init__(
         self,
         database: Database,
@@ -510,29 +324,22 @@ def __init__(
         phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
         if symbols_to_fit is None:
             symbols_to_fit = database_symbols_to_fit(database)
+        # okay if parameters are initialized to zero, we only need the symbol names
         parameters = dict(zip(symbols_to_fit, [0]*len(symbols_to_fit)))
-        self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, parameters, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit)
-        self._symbols_to_fit = symbols_to_fit
-        self.dbf = database
+        self.property_data = get_equilibrium_thermochemical_data(database, comps, phases, datasets, model_dict, parameters, data_weight_dict=self.weight)
 
     def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[float]]:
         residuals = []
         weights = []
-        for data in self.thermochemical_data:
-            dataset_residuals = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters)))
-            residuals.extend(dataset_residuals)
-            dataset_weights = np.asarray(data["weights"], dtype=float).flatten().tolist()
-            if len(dataset_weights) != len(dataset_residuals):
-                # we need to broadcast the residuals. For now, assume the weights are a scalar, since that's all that's supported
-                assert len(dataset_weights) == 1
-                dataset_weights = [float(dataset_weights[0]) for _ in range(len(dataset_residuals))]
-            weights.extend(dataset_weights)
+        for data in self.property_data:
+            dataset_residuals, dataset_weights = calc_prop_differences(data, parameters)
+            residuals.extend(dataset_residuals.tolist())
+            weights.extend(dataset_weights.tolist())
         return residuals, weights
 
     def get_likelihood(self, parameters) -> Tuple[float, List[float]]:
-        parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())}
-        likelihood, gradient = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters)
-        return likelihood, gradient
+        likelihood, gradients = calculate_equilibrium_thermochemical_probability(self.property_data, parameters)
+        return likelihood, gradients
 
 
-residual_function_registry.register(FixedConfigurationPropertyResidual)
+residual_function_registry.register(EquilibriumPropertyResidual)

From 2c19c2fcb4d5db929bb19b1299f23ca264d68c90 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 7 Nov 2024 11:52:18 -0600
Subject: [PATCH 14/27] Update non_equilibrium_thermochemical_error.py

---
 espei/error_functions/non_equilibrium_thermochemical_error.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index eb554474..1db63f1a 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -1,3 +1,4 @@
+"""
 Calculate error due to thermochemical quantities: heat capacity, entropy, enthalpy.
 """
 

From e2f79d5c855c04d15f88f8a4a5da23a5ecfbcaa9 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:07:20 -0600
Subject: [PATCH 15/27] Update activity_error.py

---
 espei/error_functions/activity_error.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
index 5cddecd9..e61a2a27 100644
--- a/espei/error_functions/activity_error.py
+++ b/espei/error_functions/activity_error.py
@@ -196,12 +196,15 @@ def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phas
     """
     residuals, weights, gradients = calculate_activity_residuals(dbf, comps, phases, datasets, parameters=parameters, phase_models=phase_models, callables=callables, data_weight=data_weight)
     likelihood = np.sum(norm(0, scale=weights).logpdf(residuals))
-    gradients = np.concatenate(gradients)
-    derivative = -np.array(residuals)*np.array(gradients).T/np.array(weights)**2
-    if derivative.ndim == 1:
-        likelihood_grads = np.sum(derivative, axis=0)
-    else:
-        likelihood_grads = np.sum(derivative, axis=1)
+    if len(gradients) == 0:
+        likelihood_grads = []
+    else: 
+        gradients = np.concatenate(gradients)
+        derivative = -np.array(residuals)*np.array(gradients).T/np.array(weights)**2
+        if derivative.ndim == 1:
+            likelihood_grads = np.sum(derivative, axis=0)
+        else:
+            likelihood_grads = np.sum(derivative, axis=1)
     if np.isnan(likelihood):
         # TODO: revisit this case and evaluate whether it is resonable for NaN
         # to show up here. When this comment was written, the test
@@ -255,7 +258,7 @@ def __init__(
 
     def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[float]]:
         parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())}
-        residuals, weights = calculate_activity_residuals(parameters=parameters, **self._activity_likelihood_kwargs)
+        residuals, weights, grads = calculate_activity_residuals(parameters=parameters, **self._activity_likelihood_kwargs)
         return residuals, weights
 
     def get_likelihood(self, parameters: npt.NDArray) -> Tuple[float, List[float]]:

From 908bbbf88b7ff604c81906f377c18737e4cfa531 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:08:37 -0600
Subject: [PATCH 16/27] Update non_equilibrium_thermochemical_error.py

---
 .../non_equilibrium_thermochemical_error.py        | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index 1db63f1a..77528448 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -16,7 +16,7 @@
 from pycalphad import Database, Model, ReferenceState, variables as v
 from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases
 from pycalphad import Workspace
-from pycalphad.property_framework import IsolatedPhase, JanssonDerivative
+from pycalphad.property_framework import IsolatedPhase, JanssonDerivative, ModelComputedProperty
 from pycalphad.property_framework.metaproperties import find_first_compset
 from pycalphad.core.solver import Solver, SolverResult
 
@@ -422,6 +422,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid.
         #wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, solver=NoSolveSolver())
         wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, solver=NoSolveSolver())
+        #### Do we need the degrees of freedom as a condition here? Or does this break what is happening next?
         # We then get a composition set and we use a special "NoSolveSolver" to
         # ensure that we don't change from the data-specified DOF.
         compset = find_first_compset(phase_name, wks)
@@ -431,12 +432,13 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         iso_phase = IsolatedPhase(compset, wks=wks)
         iso_phase.solver = NoSolveSolver()
         results = wks.get(iso_phase(output))
-        #gradient_props = [JanssonDerivative(iso_phase(output), key) for key in parameters]
-        #grads = wks.get(*gradient_props) # this is giving NAN for some parameters
         
-        grads = []
-        for key in parameters:
-            grads.append(wks.get(iso_phase(output+'.'+key)))
+        prop = ModelComputedProperty(output)
+        # chemical potentials are an input to _compute_property_gradient, but they aren't used, so here they are just
+        # passed as zeros
+        grads = prop._compute_property_gradient([compset], wks.conditions,np.zeros(len(active_pure_elements)))
+        grads = grads[0][len(str_statevar_dict):len(str_statevar_dict)+len(parameters)]
+        
                 
         sample_differences = results - sample_values[index]
         differences.append(sample_differences)

From 56c161133d713ddf656998acf53b274a5d547c66 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:09:57 -0600
Subject: [PATCH 17/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index c929b72c..152e6b1a 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -300,12 +300,15 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
             vertex_comp_estimate = np.squeeze(single_eqdata.X)
         else:
             vertex_comp_estimate = np.squeeze(single_eqdata.X)[np.nanargmax(df),:]
-        #print(vertex_comp_estimate)
         counter = 0
         for comp in species:
-            if v.X(comp) in cond_dict.keys():
-                cond_dict.update({v.X(comp): vertex_comp_estimate[counter]})
-            counter = counter + 1
+            if v.Species(comp) != v.Species('VA'):
+                if v.X(comp) in cond_dict.keys():
+                    cond_dict.update({v.X(comp): vertex_comp_estimate[counter]})
+                counter = counter + 1
+                
+        # local_conds = dict(zip(single_eqdata.components, single_eqdata.X))
+            
         wks = Workspace(database=dbf, components=species, phases=current_phase,  phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
         #print(constrained_energy)
@@ -472,7 +475,7 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
 
     """
     if len(zpf_data) == 0:
-        return 0.0
+        return 0.0, []
     driving_forces, weights, gradients = calculate_zpf_driving_forces(zpf_data, parameters, approximate_equilibrium, short_circuit=True)
     # Driving forces and weights are 2D ragged arrays with the shape (len(zpf_data), len(zpf_data['values']))
     driving_forces = np.concatenate(driving_forces).T

From 591d968f84339be496b9fccafbcfea265338665a Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:10:55 -0600
Subject: [PATCH 18/27] Update opt_mcmc.py

---
 espei/optimizers/opt_mcmc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/espei/optimizers/opt_mcmc.py b/espei/optimizers/opt_mcmc.py
index 5d0cc865..9b0d2229 100644
--- a/espei/optimizers/opt_mcmc.py
+++ b/espei/optimizers/opt_mcmc.py
@@ -290,7 +290,7 @@ def predict(params, **ctx):
         likelihoods = {}
         for residual_obj in ctx.get("residual_objs", []):
             residual_starttime = time.time()
-            likelihood = residual_obj.get_likelihood(params)
+            likelihood, grads = residual_obj.get_likelihood(params)
             residual_time = time.time() - residual_starttime
             likelihoods[type(residual_obj).__name__] = (likelihood, residual_time)
             lnlike += likelihood

From 9fffa7ceffb1374a3f958fe8089005f9bde54bec Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:12:09 -0600
Subject: [PATCH 19/27] Update test_error_functions.py

---
 tests/test_error_functions.py | 104 +++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py
index 7af9e5c4..71abbefa 100644
--- a/tests/test_error_functions.py
+++ b/tests/test_error_functions.py
@@ -33,7 +33,7 @@ def test_activity_error(datasets_db):
     datasets_db.insert(CU_MG_EXP_ACTIVITY)
 
     dbf = Database(CU_MG_TDB)
-    error = calculate_activity_error(dbf, ['CU','MG','VA'], list(dbf.phases.keys()), datasets_db, {}, {}, {})
+    error, grads = calculate_activity_error(dbf, ['CU','MG','VA'], list(dbf.phases.keys()), datasets_db, parameters=None, phase_models=None, callables=None, data_weight=1.0)
     assert np.isclose(error, -257.41020886970756, rtol=1e-6)
 
 
@@ -47,7 +47,7 @@ def test_activity_residual_function(datasets_db):
     residuals, weights = residual_func.get_residuals(np.asarray([]))
     assert len(residuals) == len(weights)
     assert np.allclose(residuals, [6522.652187085958, -1890.1414208991046, -4793.211215856485, -3018.311675280318, -1062.6724585088668, -2224.814500229084, -2256.9820026771777, -1735.8692674535414, -805.219891012428, 0.0])
-    likelihood = residual_func.get_likelihood(np.asarray([]))
+    likelihood, grads = residual_func.get_likelihood(np.asarray([]))
     assert np.isclose(likelihood, -257.41020886970756, rtol=1e-6)
 
 
@@ -61,14 +61,14 @@ def test_subsystem_activity_probability(datasets_db):
     phases = list(dbf_tern.phases.keys())
 
     # Truth
-    bin_prob = calculate_activity_error(dbf_bin, ['CR','NI','VA'], phases, datasets_db, {}, {}, {})
+    bin_prob, bin_prob_grads = calculate_activity_error(dbf_bin, ['CR','NI','VA'], phases, datasets_db, {}, {}, {})
 
     # Getting binary subsystem data explictly (from binary input)
-    prob = calculate_activity_error(dbf_tern, ['CR','NI','VA'], phases, datasets_db, {}, {}, {})
+    prob, prob_grads = calculate_activity_error(dbf_tern, ['CR','NI','VA'], phases, datasets_db, {}, {}, {})
     assert np.isclose(prob, bin_prob)
 
     # Getting binary subsystem from ternary input
-    prob = calculate_activity_error(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db, {}, {}, {})
+    prob, prob_grads = calculate_activity_error(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db, {}, {}, {})
     assert np.isclose(prob, bin_prob)
 
 
@@ -78,11 +78,11 @@ def test_get_thermochemical_data_filters_invalid_sublattice_configurations(datas
     dbf = Database(CU_MG_TDB)
     comps = ["CU", "MG", "VA"]
     phases = ["CUMG2"]
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[])
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters={}, symbols_to_fit=[])
     print('thermochemical data:', thermochemical_data)
     assert thermochemical_data[0]["calculate_dict"]["values"].shape == (2,)
 
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error, -14.28729)
 
 
@@ -96,7 +96,7 @@ def test_fixed_configuration_residual_function(datasets_db):
     residuals, weights = residual_func.get_residuals(np.asarray([]))
     assert len(residuals) == len(weights)
     assert np.allclose(residuals, [-10.0, -100.0])
-    likelihood = residual_func.get_likelihood(np.asarray([]))
+    likelihood, grads = residual_func.get_likelihood(np.asarray([]))
     assert np.isclose(likelihood, -14.28729, rtol=1e-6)
 
 
@@ -119,11 +119,11 @@ def test_get_thermochemical_data_filters_configurations_when_all_configurations_
     dbf = Database(CU_MG_TDB)
     comps = ["CU", "MG", "VA"]
     phases = ["CUMG2"]
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db)
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters={})
     print('thermochemical data:', thermochemical_data)
     assert thermochemical_data[0]["calculate_dict"]["values"].shape == (0,)
 
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error, 0)
 
 
@@ -134,9 +134,8 @@ def test_non_equilibrium_thermochemical_error_with_multiple_X_points(datasets_db
     dbf = Database(CU_MG_TDB)
     phases = list(dbf.phases.keys())
     comps = ['CU', 'MG', 'VA']
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[])
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
-
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters={}, symbols_to_fit=[])
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error, -4061.119001241541, rtol=1e-6)
 
 
@@ -147,8 +146,8 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_points(datasets_db
     dbf = Database(CU_MG_TDB)
     phases = list(dbf.phases.keys())
     comps = ['CU', 'MG', 'VA']
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[])
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters={}, symbols_to_fit=[])
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error,-14.287293263253728, rtol=1e-6)
 
 
@@ -159,11 +158,12 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_X_points(datasets_
     dbf = Database(CU_MG_TDB)
     phases = list(dbf.phases.keys())
     comps = ['CU', 'MG', 'VA']
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db)
     symbols_to_fit = database_symbols_to_fit(dbf)
-    initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf, parameters=dict(zip(symbols_to_fit, initial_guess)))
-
+    params = dict(zip(symbols_to_fit, [0]*len(symbols_to_fit)))
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters=params)
+    #initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit])
+    #error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf, parameters=dict(zip(symbols_to_fit, initial_guess)))
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf, parameters=params)
     assert np.isclose(float(error), -3282497.2380024833, rtol=1e-6)
 
 def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess_only(datasets_db):
@@ -213,8 +213,8 @@ def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess
     # the dataset is excess only
     zero_error_prob = scipy.stats.norm(loc=0, scale=0.2).logpdf(0.0)  # SM weight = 0.2
     # Explicitly pass parameters={} to not try fitting anything
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[])
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters = {}, symbols_to_fit=[])
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error, zero_error_prob, atol=1e-6)
 
 
@@ -264,8 +264,8 @@ def test_non_equilibrium_thermochemical_error_for_of_enthalpy_mixing(datasets_db
     # the dataset is excess only
     zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0)  # HM weight = 500
     # Explicitly pass parameters={} to not try fitting anything
-    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[])
-    error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, parameters = {},symbols_to_fit=[])
+    error, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     assert np.isclose(error, zero_error_prob, atol=1e-6)
 
 
@@ -279,17 +279,17 @@ def test_subsystem_non_equilibrium_thermochemcial_probability(datasets_db):
     phases = list(dbf_tern.phases.keys())
 
     # Truth
-    thermochemical_data = get_thermochemical_data(dbf_bin, ['CR', 'NI', 'VA'], phases, datasets_db)
-    bin_prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_bin)
+    thermochemical_data = get_thermochemical_data(dbf_bin, ['CR', 'NI', 'VA'], phases, datasets_db,parameters={})
+    bin_prob, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_bin)
 
     # Getting binary subsystem data explictly (from binary input)
-    thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'NI', 'VA'], phases, datasets_db)
-    prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern)
+    thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'NI', 'VA'], phases, datasets_db, parameters = {})
+    prob, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern)
     assert np.isclose(prob, bin_prob)
 
     # Getting binary subsystem from ternary input
-    thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db)
-    prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern)
+    thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db, parameters = {})
+    prob, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern)
     assert np.isclose(prob, bin_prob)
 
 
@@ -305,7 +305,7 @@ def test_zpf_error_zero(datasets_db):
     zero_error_prob = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0)
 
     zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {})
-    error = calculate_zpf_error(zpf_data, np.array([]))
+    error, grads = calculate_zpf_error(zpf_data, np.array([]))
     assert np.isclose(error, zero_error_prob, rtol=1e-6)
 
 
@@ -319,7 +319,7 @@ def test_zpf_residual_function(datasets_db):
     residuals, weights = residual_func.get_residuals(np.asarray([]))
     assert len(residuals) == len(weights)
     assert np.allclose(residuals, [0.0, 0.0], atol=1e-3)  # looser tolerance due to numerical instabilities
-    likelihood = residual_func.get_likelihood(np.asarray([]))
+    likelihood, grads = residual_func.get_likelihood(np.asarray([]))
     # ZPF weight = 1 kJ and there are two points in the tieline
     zero_error_prob = np.sum(scipy.stats.norm(loc=0, scale=1000.0).logpdf([0.0, 0.0]))
     assert np.isclose(likelihood, zero_error_prob, rtol=1e-6)
@@ -336,16 +336,16 @@ def test_subsystem_zpf_probability(datasets_db):
 
     # Truth
     zpf_data = get_zpf_data(dbf_bin, ['CR', 'NI', 'VA'], phases, datasets_db, {})
-    bin_prob = calculate_zpf_error(zpf_data, np.array([]))
+    bin_prob, grads = calculate_zpf_error(zpf_data, np.array([]))
 
     # Getting binary subsystem data explictly (from binary input)
     zpf_data = get_zpf_data(dbf_tern, ['CR', 'NI', 'VA'], phases, datasets_db, {})
-    prob = calculate_zpf_error(zpf_data, np.array([]))
+    prob, grads = calculate_zpf_error(zpf_data, np.array([]))
     assert np.isclose(prob, bin_prob)
 
     # Getting binary subsystem from ternary input
     zpf_data = get_zpf_data(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db, {})
-    prob = calculate_zpf_error(zpf_data, np.array([]))
+    prob, grads = calculate_zpf_error(zpf_data, np.array([]))
     assert np.isclose(prob, bin_prob)
 
 
@@ -367,7 +367,7 @@ def test_zpf_error_species(datasets_db):
     zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0)
 
     zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {})
-    likelihood = calculate_zpf_error(zpf_data)
+    likelihood, grads = calculate_zpf_error(zpf_data)
     assert np.isclose(likelihood, zero_error_probability)
 
 
@@ -384,7 +384,7 @@ def test_zpf_error_equilibrium_failure(datasets_db):
     zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {})
 
     with mock.patch('espei.error_functions.zpf_error.estimate_hyperplane', return_value=np.array([np.nan, np.nan])):
-        likelihood = calculate_zpf_error(zpf_data)
+        likelihood, grads = calculate_zpf_error(zpf_data)
         assert np.isclose(likelihood, zero_error_probability, rtol=1e-6)
 
 
@@ -400,7 +400,7 @@ def test_zpf_error_works_for_stoichiometric_cmpd_tielines(datasets_db):
     zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0)
 
     zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {})
-    likelihood = calculate_zpf_error(zpf_data)
+    likelihood, grads = calculate_zpf_error(zpf_data)
     assert np.isclose(likelihood, zero_error_probability, rtol=1e-6)
 
 
@@ -412,8 +412,8 @@ def test_non_equilibrium_thermochemcial_species(datasets_db):
     dbf = Database(LI_SN_TDB)
     phases = ['LIQUID']
 
-    thermochemical_data = get_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db)
-    prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
+    thermochemical_data = get_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db, parameters={})
+    prob, grads = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf)
     # Near zero error and non-zero error
     assert np.isclose(prob, (-7.13354663 + -22.43585011))
 
@@ -429,7 +429,7 @@ def test_equilibrium_thermochemcial_error_species(datasets_db):
     eqdata = get_equilibrium_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db)
     # Thermo-Calc
     truth_values = np.array([0.0, -28133.588, -40049.995, 0.0])
-    residuals, weights = calc_prop_differences(eqdata[0], np.array([]))
+    residuals, weights, grads = calc_prop_differences(eqdata[0], np.array([]))
     assert np.all(np.isclose(residuals, truth_values, atol=3e-5))
 
 
@@ -443,7 +443,7 @@ def test_equilibrium_thermochemical_error_unsupported_property(datasets_db):
     phases = list(dbf.phases.keys())
 
     eqdata = get_equilibrium_thermochemical_data(dbf, ['CR', 'NI'], phases, datasets_db)
-    residuals, weights = calc_prop_differences(eqdata[0], np.array([]))
+    residuals, weights, grads = calc_prop_differences(eqdata[0], np.array([]))
     assert np.all(np.isclose(residuals, EXPECTED_VALUES, atol=1e-3))
 
 
@@ -457,7 +457,7 @@ def test_equilibrium_property_residual_function(datasets_db):
     assert len(residuals) == len(weights)
     assert np.allclose(residuals, [374.6625, 0.0, 0.0])
     # Regression test "truth" values - got values by running
-    likelihood = residual_func.get_likelihood(np.asarray([]))
+    likelihood, grads = residual_func.get_likelihood(np.asarray([]))
     assert np.isclose(likelihood, -70188.75126872442, rtol=1e-6)
 
 
@@ -470,17 +470,17 @@ def test_equilibrium_thermochemical_error_computes_correct_probability(datasets_
     # Test that errors update in response to changing parameters
     # no parameters
     eqdata = get_equilibrium_thermochemical_data(dbf, ['CU', 'MG'], phases, datasets_db)
-    errors, weights = calc_prop_differences(eqdata[0], np.array([]))
+    errors, weights, grads = calc_prop_differences(eqdata[0], np.array([]))
     expected_vals = [-31626.6*0.5*0.5]
     assert np.all(np.isclose(errors, expected_vals))
-
+    
     # VV0017 (LIQUID, L0)
     eqdata = get_equilibrium_thermochemical_data(dbf, ['CU', 'MG'], phases, datasets_db, parameters={'VV0017': -31626.6})
     # unchanged, should be the same as before
-    errors, weights = calc_prop_differences(eqdata[0], np.array([-31626.6]))
+    errors, weights, grads = calc_prop_differences(eqdata[0], np.array([-31626.6]))
     assert np.all(np.isclose(errors, [-31626.6*0.5*0.5]))
     # change to -40000
-    errors, weights = calc_prop_differences(eqdata[0], np.array([-40000], np.float64))
+    errors, weights, grads = calc_prop_differences(eqdata[0], np.array([-40000], np.float64))
     assert np.all(np.isclose(errors, [-40000*0.5*0.5]))
 
 
@@ -495,17 +495,17 @@ def test_driving_force_miscibility_gap(datasets_db):
 
     # Ideal solution case
     params = np.array([0.0])
-    prob = calculate_zpf_error(zpf_data, parameters=params)
+    prob, grads = calculate_zpf_error(zpf_data, parameters=params)
     assert np.isclose(prob, zero_error_prob)
 
     # Negative interaction case
     params = np.array([-10000.0])
-    prob = calculate_zpf_error(zpf_data, parameters=params)
+    prob, grads = calculate_zpf_error(zpf_data, parameters=params)
     assert np.isclose(prob, zero_error_prob)
 
     # Miscibility gap case
     params = np.array([10000.0])
-    prob = calculate_zpf_error(zpf_data, parameters=params)
+    prob, grads = calculate_zpf_error(zpf_data, parameters=params)
     # Remember these are log probabilities, so more negative means smaller probability and larger error
     assert prob < zero_error_prob
 
@@ -536,7 +536,7 @@ def test_setting_up_context_with_custom_models(datasets_db):
     with pytest.raises(ModelTestException):
         ctx = setup_context(dbf, datasets_db, phase_models=phase_models)
 
-
+#this one requires work on the activity error function
 def test_zpf_context_is_pickleable(datasets_db):
     """Test that the context for ZPF data is pickleable"""
     datasets_db.insert(CU_MG_DATASET_ZPF_ZERO_ERROR)
@@ -619,7 +619,7 @@ def test_zpf_error_for_prescribed_hyperplane_composition(datasets_db):
     datasets_db.insert(A_B_DATASET_ALPHA)
     dbf = Database(A_B_REGULAR_SOLUTION_TDB)  # Ideal solution case by default
     zpf_data = get_zpf_data(dbf, ["A", "B"], ["ALPHA"], datasets_db, {})
-    driving_forces, weights = calculate_zpf_driving_forces(zpf_data)
+    driving_forces, weights, grads = calculate_zpf_driving_forces(zpf_data)
     flat_driving_forces = np.asarray(driving_forces).flatten()
     assert len(flat_driving_forces) == 1
     assert np.isclose(flat_driving_forces[0], 0.0)
@@ -630,7 +630,7 @@ def test_zpf_error_hyperplane_with_null_phases(datasets_db):
     datasets_db.insert(CU_MG_DATASET_ZPF_HYPERPLANE_TWOPHASE)
     dbf = Database(CU_MG_TDB)  # Ideal solution case by default
     zpf_data = get_zpf_data(dbf, ["CU", "MG"], list(dbf.phases.keys()), datasets_db, {})
-    driving_forces, weights = calculate_zpf_driving_forces(zpf_data)
+    driving_forces, weights, grads = calculate_zpf_driving_forces(zpf_data)
     flat_driving_forces = np.asarray(driving_forces).flatten()
     assert len(flat_driving_forces) == 2  # One for each vertex, HCP_A3 and CUMG2
     assert np.allclose(flat_driving_forces, [-18.05883506, -780.50836135])

From 55c5f7d71feda1eea65b81e08dff86129ba163fd Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Fri, 6 Dec 2024 14:22:29 -0600
Subject: [PATCH 20/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index 152e6b1a..1cdc094c 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -525,7 +525,7 @@ def __init__(
         self.zpf_data = get_zpf_data(database, comps, phases, datasets, parameters, model_dict)
 
     def get_residuals(self, parameters: ArrayLike) -> Tuple[List[float], List[float]]:
-        driving_forces, weights = calculate_zpf_driving_forces(self.zpf_data, parameters, short_circuit=True)
+        driving_forces, weights, grads = calculate_zpf_driving_forces(self.zpf_data, parameters, short_circuit=True)
         # Driving forces and weights are 2D ragged arrays with the shape (len(zpf_data), len(zpf_data['values']))
         residuals = np.concatenate(driving_forces).tolist()
         weights = np.concatenate(weights).tolist()

From ca89722e7604cdabcd1d983eb79ac85b312d9150 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 23 Apr 2025 20:50:17 -0500
Subject: [PATCH 21/27] Update activity_error.py

---
 espei/error_functions/activity_error.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
index e61a2a27..690f3b0c 100644
--- a/espei/error_functions/activity_error.py
+++ b/espei/error_functions/activity_error.py
@@ -50,8 +50,7 @@ def target_chempots_from_activity(component, parameters, target_activity, temper
     numpy.ndarray
         Array of experimental chemical potentials
     """
-    # acr_i = exp((mu_i - mu_i^{ref})/(RT))
-    # so mu_i = R*T*ln(acr_i) + mu_i^{ref}
+    
     ref_chempot = wks_ref.get(v.MU(component))
     exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot
     
@@ -85,12 +84,12 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
 
     params_keys = []
 
-    # XXX: This mutates the global pycalphad namespace
+    # This mutates the global pycalphad namespace
     for key in parameters.keys():
         if not hasattr(v, key):
             setattr(v, key, v.IndependentPotential(key))
         params_keys.append(getattr(v, key))
-        # XXX: Mutates argument to function
+        # Mutates argument to function
         dbf.symbols.pop(key,None)
         
     activity_datasets = datasets.search(
@@ -268,3 +267,4 @@ def get_likelihood(self, parameters: npt.NDArray) -> Tuple[float, List[float]]:
 
 
 residual_function_registry.register(ActivityResidual)
+

From dd4f9f0e6651fcec3a4c691a9892104ba9f6ffa0 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 23 Apr 2025 20:51:45 -0500
Subject: [PATCH 22/27] Update equilibrium_thermochemical_error.py

---
 .../equilibrium_thermochemical_error.py       | 24 ++++---------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
index a01c000d..da9872f5 100644
--- a/espei/error_functions/equilibrium_thermochemical_error.py
+++ b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -12,14 +12,13 @@
 from tinydb import where
 from scipy.stats import norm
 from pycalphad import Database, Model, ReferenceState, variables as v
-from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
+from pycalphad.core.utils import instantiate_models, filter_phases, unpack_species, unpack_condition
 from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
 from pycalphad import Workspace, as_property
 from pycalphad.property_framework import JanssonDerivative
 
 from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
 from espei.phase_models import PhaseModelSpecification
-from espei.shadow_functions import update_phase_record_parameters
 from espei.typing import SymbolName
 from espei.utils import PickleableTinyDB, database_symbols_to_fit
 
@@ -74,23 +73,20 @@ def build_eqpropdata(data: tinydb.database.Document,
         'SM':   0.2,  # J/K-mol
         'CPM':  0.2,  # J/K-mol
     }
-
-    #params_keys, _ = extract_parameters(parameters)
     
     params_keys = []
 
-    # XXX: This mutates the global pycalphad namespace
+    # This mutates the global pycalphad namespace
     for key in parameters.keys():
         if not hasattr(v, key):
             setattr(v, key, v.IndependentPotential(key))
         params_keys.append(getattr(v, key))
-        # XXX: Mutates argument to function
+        # Mutates argument to function
         dbf.symbols.pop(key,None)
 
     data_comps = list(set(data['components']).union({'VA'}))
     species = sorted(unpack_species(dbf, data_comps), key=str)
     data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
-    #models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
     models = instantiate_models(dbf, species, data_phases, model=model)
     output = data['output']
     property_output = output.split('_')[0]  # property without _FORM, _MIX, etc.
@@ -112,15 +108,6 @@ def build_eqpropdata(data: tinydb.database.Document,
     comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])
 
     phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds, **parameters}, models)
-    #wks = Workspace(dbf, species, data_phases, {**pot_conds, **comp_conds, **parameters}, models=model)
-    #phase_record_factory = wks.phase_record_factory
-
-    # Now we need to unravel the composition conditions
-    # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
-    # composition conditions are only broadcast against the potentials, not
-    # each other. Each individual composition needs to be computed
-    # independently, since broadcasting over composition cannot be turned off
-    # in pycalphad.
     rav_comp_conds = [OrderedDict(zip(comp_conds.keys(), pt_comps)) for pt_comps in zip(*comp_conds.values())]
 
     # Build weights, should be the same size as the values
@@ -217,7 +204,6 @@ def calc_prop_differences(eqpropdata: EqPropData,
     pot_conds = eqpropdata.potential_conds
     models = eqpropdata.models
     phase_record_factory = eqpropdata.phase_record_factory
-    #update_phase_record_parameters(phase_record_factory, parameters)
     params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
     output = as_property(eqpropdata.output)
     weights = np.array(eqpropdata.weight, dtype=np.float64)
@@ -229,9 +215,7 @@ def calc_prop_differences(eqpropdata: EqPropData,
     for comp_conds in eqpropdata.composition_conds:
         cond_dict = OrderedDict(**pot_conds, **comp_conds, **params_dict)
         wks.conditions = cond_dict
-        #wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
         wks.models = models
-        #wks.phase_record_factory = phase_record_factory
         vals = wks.get(output)
         calculated_data.extend(np.atleast_1d(vals).tolist())
         gradient_props = [JanssonDerivative(output, key) for key in params_dict]
@@ -332,7 +316,7 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl
         residuals = []
         weights = []
         for data in self.property_data:
-            dataset_residuals, dataset_weights = calc_prop_differences(data, parameters)
+            dataset_residuals, dataset_weights, dataset_grads = calc_prop_differences(data, parameters)
             residuals.extend(dataset_residuals.tolist())
             weights.extend(dataset_weights.tolist())
         return residuals, weights

From c995edf927f763dad4538fc4d12c82695320b767 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 23 Apr 2025 20:54:18 -0500
Subject: [PATCH 23/27] Update non_equilibrium_thermochemical_error.py

---
 .../non_equilibrium_thermochemical_error.py            | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py
index 77528448..02d8f21d 100644
--- a/espei/error_functions/non_equilibrium_thermochemical_error.py
+++ b/espei/error_functions/non_equilibrium_thermochemical_error.py
@@ -16,7 +16,7 @@
 from pycalphad import Database, Model, ReferenceState, variables as v
 from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases
 from pycalphad import Workspace
-from pycalphad.property_framework import IsolatedPhase, JanssonDerivative, ModelComputedProperty
+from pycalphad.property_framework import IsolatedPhase, ModelComputedProperty
 from pycalphad.property_framework.metaproperties import find_first_compset
 from pycalphad.core.solver import Solver, SolverResult
 
@@ -277,12 +277,12 @@ def get_thermochemical_data(dbf, comps, phases, datasets, parameters, model=None
     
     params_keys = []
 
-    # XXX: This mutates the global pycalphad namespace
+    # This mutates the global pycalphad namespace
     for key in parameters.keys():
         if not hasattr(v, key):
             setattr(v, key, v.IndependentPotential(key))
         params_keys.append(getattr(v, key))
-        # XXX: Mutates argument to function
+        # Mutates argument to function
         dbf.symbols.pop(key,None)
         
     # phase by phase, then property by property, then by model exclusions
@@ -341,7 +341,6 @@ def get_thermochemical_data(dbf, comps, phases, datasets, parameters, model=None
                 curr_data = filter_temperatures(curr_data)
                 calculate_dict = get_prop_samples(curr_data, constituents)
                 model_cls = model.get(phase_name, Model)
-                #mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit)
                 mod = model_cls(dbf, comps, phase_name)
                 if prop.endswith('_FORM'):
                     output = ''.join(prop.split('_')[:-1])+"R"
@@ -380,7 +379,6 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
     phase_name = calc_data['phase_name']
     models = calc_data['model']  # Dict[PhaseName: Model]
     output = calc_data['output']
-    #phase_record_factory = calc_data['phase_record_factory']
     sample_values = calc_data['calculate_dict']['values']
     str_statevar_dict = calc_data['str_statevar_dict']
 
@@ -420,9 +418,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig
         # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why.
         # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered.
         # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid.
-        #wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, solver=NoSolveSolver())
         wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, solver=NoSolveSolver())
-        #### Do we need the degrees of freedom as a condition here? Or does this break what is happening next?
         # We then get a composition set and we use a special "NoSolveSolver" to
         # ensure that we don't change from the data-specified DOF.
         compset = find_first_compset(phase_name, wks)

From 0ebfeaa53d3f7e172ff1afb3de4b6b7e471c7e08 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Wed, 23 Apr 2025 20:56:03 -0500
Subject: [PATCH 24/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 53 ++++++++----------------------
 1 file changed, 13 insertions(+), 40 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index 1cdc094c..cc0dd530 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -29,7 +29,7 @@
 import tinydb
 
 from espei.phase_models import PhaseModelSpecification
-from espei.shadow_functions import calculate_, update_phase_record_parameters
+from espei.shadow_functions import calculate_
 from espei.typing import SymbolName
 from espei.utils import PickleableTinyDB, database_symbols_to_fit
 from .residual_base import ResidualFunction, residual_function_registry
@@ -228,10 +228,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameter_dict
     species = phase_region.species
     phases = phase_region.phases
     models = phase_region.models
-    #param_keys = list(models.values())[0]._parameters_arg
-    #parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
     for vertex in phase_region.hyperplane_vertices:
-        #update_phase_record_parameters(vertex.phase_record_factory, parameters)
         cond_dict = {**vertex.comp_conds, **phase_region.potential_conds}
         params_keys = list(parameter_dict.keys())
         for index in range(len(parameters)):
@@ -263,9 +260,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameter_dict
                 target_hyperplane_chempots.append(MU_values)
                 target_hyperplane_chempots_grads.append(gradients_magnitude)
     target_hyperplane_mean_chempots = np.nanmean(target_hyperplane_chempots, axis=0, dtype=np.float64)
-    #print(target_hyperplane_mean_chempots)
     target_hyperplane_chempots_grads = np.nanmean(target_hyperplane_chempots_grads, axis=0, dtype=np.float64) 
-    #print(target_hyperplane_chempots_grads)
     return target_hyperplane_mean_chempots, target_hyperplane_chempots_grads
 
 
@@ -276,8 +271,6 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
     """
     species = phase_region.species
     models = phase_region.models
-    #param_keys = list(models.values())[0]._parameters_arg
-    #parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters))
     current_phase = vertex.phase_name
     cond_dict = {**phase_region.potential_conds, **vertex.comp_conds}
     params_keys = list(parameter_dict.keys())
@@ -285,7 +278,6 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
         cond_dict.update({params_keys[index]: parameters[index]})
     str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)])
     phase_record_factory = vertex.phase_record_factory
-    #update_phase_record_parameters(phase_record_factory, parameters)
     if vertex.has_missing_comp_cond:
         for index in range(len(parameters)):
             str_statevar_dict.update({params_keys[index]: parameters[index]})
@@ -294,16 +286,24 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
         # stoichiometric and the user did not specify a valid phase composition.
         single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50) ## SOMETHING WEIRD HAPPENS WHEN PDENS IS TOO HIGH!
         df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM
-        #driving_force = float(df.max())
         
         if np.squeeze(single_eqdata.X).ndim == 1:
             vertex_comp_estimate = np.squeeze(single_eqdata.X)
+        elif (np.isnan(df).all()):
+            driving_force = np.nan
+            driving_force_gradient = np.full(len(parameters), np.nan)
+            return driving_force, driving_force_gradient
         else:
             vertex_comp_estimate = np.squeeze(single_eqdata.X)[np.nanargmax(df),:]
+            
         counter = 0
         for comp in species:
             if v.Species(comp) != v.Species('VA'):
                 if v.X(comp) in cond_dict.keys():
+                    if vertex_comp_estimate[counter] < 5e-6:
+                        vertex_comp_estimate[counter] = 5e-6
+                    elif vertex_comp_estimate[counter] > 1 - 5e-6:
+                        vertex_comp_estimate[counter] = 1 - 5e-6
                     cond_dict.update({v.X(comp): vertex_comp_estimate[counter]})
                 counter = counter + 1
                 
@@ -311,25 +311,12 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
             
         wks = Workspace(database=dbf, components=species, phases=current_phase,  phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
-        #print(constrained_energy)
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex_comp_estimate) - constrained_energy
-        #constrained_energy_gradient = []
-        #for key in parameter_dict:
-        #    constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
-            
-        #gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
-        #constrained_energy_gradient = wks.get(*gradient_params)
-        
-        #ip = IsolatedPhase(current_phase, wks=wks)
-        #gradient_params = [JanssonDerivative(ip('GM'), key) for key in parameter_dict]
-        #constrained_energy_gradient = wks.get(*gradient_params)
-        
         ip = IsolatedPhase(current_phase, wks=wks)
         constrained_energy_gradient = []
         for key in parameter_dict:
             constrained_energy_gradient.append(wks.get(ip('GM.'+key)))
         
-    
         driving_force_gradient = np.squeeze(np.matmul(vertex_comp_estimate,target_hyperplane_chempots_grads) - constrained_energy_gradient)
         
     elif vertex.is_disordered:
@@ -361,21 +348,11 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, target_h
     else:
         wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict)
         constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM'))
-        #print(constrained_energy)
         driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy
-        #print(driving_force)
         constrained_energy_gradient = []
         for key in parameter_dict:
             constrained_energy_gradient.append(wks.get(IsolatedPhase(current_phase,wks=wks)('GM.'+key)))
-        #gradient_params = [JanssonDerivative(IsolatedPhase(current_phase, wks=wks)('GM'), key) for key in parameter_dict]
-        #gradients = wks.get(*gradient_params)
-        #if type(gradients) is list:
-        #    constrained_energy_gradient = [float(element) for element in gradients]
-        #else:
-        #    constrained_energy_gradient = gradients
-        #print(constrained_energy_gradient)
         driving_force_gradient = np.squeeze(np.matmul(np.reshape(vertex.composition,(1,-1)),target_hyperplane_chempots_grads) - constrained_energy_gradient)
-        #print(driving_force_gradient)
     return driving_force, driving_force_gradient
 
 
@@ -428,11 +405,9 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
         for phase_region in data['phase_regions']:
             # 1. Calculate the average multiphase hyperplane
             eq_str = phase_region.eq_str()
-            #target_hyperplane = estimate_hyperplane(phase_region, data['dbf'], parameters, approximate_equilibrium=approximate_equilibrium)
             target_hyperplane, hyperplane_grads = estimate_hyperplane(phase_region, data['dbf'], data['parameter_dict'], parameters, approximate_equilibrium=approximate_equilibrium)
             if np.any(np.isnan(target_hyperplane)):
                 _log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
-                #print('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref)
                 data_driving_forces.extend([0]*len(phase_region.vertices))
                 data_weights.extend([weight]*len(phase_region.vertices))
                 if len(parameters) == 1:
@@ -442,9 +417,6 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]],
                 continue
             # 2. Calculate the driving force to that hyperplane for each vertex
             for vertex in phase_region.vertices:
-                #driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters,
-                 #                                           approximate_equilibrium=approximate_equilibrium,
-                 #                                           )
                 driving_force, df_grad = driving_force_to_hyperplane(target_hyperplane, hyperplane_grads, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium)
                 if np.isinf(driving_force) and short_circuit:
                     _log.debug('Equilibria: (%s), current phase: %s, hyperplane: %s, driving force: %s, reference: %s. Short circuiting.', eq_str, vertex.phase_name, target_hyperplane, driving_force, dataset_ref)
@@ -483,12 +455,13 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
     gradients = np.concatenate(gradients)
     if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))):
         if len(parameters) == 1:
-            return -np.inf, -np.inf
+            return -np.inf, -np.inf, 1
         else:
-            return -np.inf, np.ones(len(parameters))*(-np.inf)
+            return -np.inf, np.ones(len(parameters))*(-np.inf), np.ones(len(parameters))
         
     log_probabilites = norm.logpdf(driving_forces, loc=0, scale=1000/data_weight/weights)
     grad_log_probs = -driving_forces*gradients.T/(1000/data_weight/weights)**2
+
     if grad_log_probs.ndim == 1:
         grad_log_probs_sum = np.sum(grad_log_probs, axis =0)
     else:

From 4888897eec880b6b9a67484dfb680d7de0a00750 Mon Sep 17 00:00:00 2001
From: Courtney Kunselman <53878004+cjkunselman18@users.noreply.github.com>
Date: Thu, 24 Apr 2025 19:18:36 -0500
Subject: [PATCH 25/27] Update zpf_error.py

---
 espei/error_functions/zpf_error.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py
index cc0dd530..121b4a8a 100644
--- a/espei/error_functions/zpf_error.py
+++ b/espei/error_functions/zpf_error.py
@@ -455,9 +455,9 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]],
     gradients = np.concatenate(gradients)
     if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))):
         if len(parameters) == 1:
-            return -np.inf, -np.inf, 1
+            return -np.inf, -np.inf
         else:
-            return -np.inf, np.ones(len(parameters))*(-np.inf), np.ones(len(parameters))
+            return -np.inf, np.ones(len(parameters))*(-np.inf)
         
     log_probabilites = norm.logpdf(driving_forces, loc=0, scale=1000/data_weight/weights)
     grad_log_probs = -driving_forces*gradients.T/(1000/data_weight/weights)**2

From a69125b56d1f41d4562aac50c9e5aaa348bbb36e Mon Sep 17 00:00:00 2001
From: Brandon Bocklund <bocklund1@llnl.gov>
Date: Wed, 30 Jul 2025 19:45:48 -0700
Subject: [PATCH 26/27] Activity error bugfix when there is no activity data
 (gradients should be zero vector of parameters, not an empty list)

---
 espei/error_functions/activity_error.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
index 690f3b0c..02916030 100644
--- a/espei/error_functions/activity_error.py
+++ b/espei/error_functions/activity_error.py
@@ -50,10 +50,10 @@ def target_chempots_from_activity(component, parameters, target_activity, temper
     numpy.ndarray
         Array of experimental chemical potentials
     """
-    
+
     ref_chempot = wks_ref.get(v.MU(component))
     exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot
-    
+
     gradient_props = [JanssonDerivative(v.MU(component), key) for key in parameters]
     gradients = wks_ref.get(*gradient_props)
     if type(gradients) is list:
@@ -91,7 +91,7 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         params_keys.append(getattr(v, key))
         # Mutates argument to function
         dbf.symbols.pop(key,None)
-        
+
     activity_datasets = datasets.search(
         (tinydb.where('output').test(lambda x: 'ACR' in x)) &
         (tinydb.where('components').test(lambda x: set(x).issubset(comps))))
@@ -196,8 +196,8 @@ def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phas
     residuals, weights, gradients = calculate_activity_residuals(dbf, comps, phases, datasets, parameters=parameters, phase_models=phase_models, callables=callables, data_weight=data_weight)
     likelihood = np.sum(norm(0, scale=weights).logpdf(residuals))
     if len(gradients) == 0:
-        likelihood_grads = []
-    else: 
+        likelihood_grads = np.zeros(len(parameters))
+    else:
         gradients = np.concatenate(gradients)
         derivative = -np.array(residuals)*np.array(gradients).T/np.array(weights)**2
         if derivative.ndim == 1:
@@ -267,4 +267,3 @@ def get_likelihood(self, parameters: npt.NDArray) -> Tuple[float, List[float]]:
 
 
 residual_function_registry.register(ActivityResidual)
-

From fb035ac67fb1cdc7d9866f9c103c330d8fa993fc Mon Sep 17 00:00:00 2001
From: Brandon Bocklund <bocklund1@llnl.gov>
Date: Sat, 16 Aug 2025 20:21:02 -0700
Subject: [PATCH 27/27] Update scipy optimizer

---
 espei/optimizers/opt_base.py  |  2 +-
 espei/optimizers/opt_scipy.py | 23 +++++++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/espei/optimizers/opt_base.py b/espei/optimizers/opt_base.py
index d0d80d9c..7134372e 100644
--- a/espei/optimizers/opt_base.py
+++ b/espei/optimizers/opt_base.py
@@ -4,7 +4,7 @@
 from .utils import OptimizerError
 
 class OptimizerBase(object):
-    """Enable fitting and replaying fitting steps"""
+    """Base class for ESPEI optimizers"""
     def __init__(self, dbf):
         self.orig_dbf = copy.deepcopy(dbf)
         self.dbf = copy.deepcopy(dbf)
diff --git a/espei/optimizers/opt_scipy.py b/espei/optimizers/opt_scipy.py
index 93a5c014..e51e732d 100644
--- a/espei/optimizers/opt_scipy.py
+++ b/espei/optimizers/opt_scipy.py
@@ -10,18 +10,19 @@
 
 
 class SciPyOptimizer(OptimizerBase):
-    def _fit(self, symbols, ds, method='Powell', maxiter=50, verbose=False, **kwargs):
+    """ESPEI optimizer using SciPy's optimize.minimize function"""
+    def _fit(self, symbols, ds, method='CG', maxiter=50, verbose=False, data_weights=None, **kwargs):
         # Use Scipy's minimize with the Powell method to optimize Lnprob
-        ctx = setup_context(self.dbf, ds, symbols)
-
-        symbols_to_fit = ctx['symbols_to_fit']
-        initial_guess = np.array([unpack_piecewise(self.dbf.symbols[s]) for s in symbols_to_fit])
+        symbols_to_fit = symbols
+        initial_guess = np.array([unpack_piecewise(self.dbf.symbols[s]) for s in symbols])
+        ctx = setup_context(self.dbf, ds, symbols, data_weights=data_weights) # data_weights=mcmc_data_weights, phase_models=self.phase_models, make_callables=cbs)
         if verbose:
             print('Fitting', symbols_to_fit)
             print('Initial guess', initial_guess)
         s = time.time()
-        out = minimize(self.predict, initial_guess, args=(ctx,), method=method,
+        out = minimize(self.predict, initial_guess, args=(ctx,), method=method, jac=True,
                        options={'maxiter': maxiter}, **kwargs)
+        self.minimize_result = out
         xs = np.atleast_1d(out.x).tolist()
         if verbose:
             print('Found', xs, 'in', int(time.time() - s), 's')
@@ -32,12 +33,18 @@ def _fit(self, symbols, ds, method='Powell', maxiter=50, verbose=False, **kwargs
     def predict(params, ctx):
         starttime = time.time()
         lnlike = 0.0
+        grads = []
         likelihoods = {}
         for residual_obj in ctx.get("residual_objs", []):
-            likelihood = residual_obj.get_likelihood(params)
+            likelihood, likelihood_grad = residual_obj.get_likelihood(params)
             likelihoods[type(residual_obj).__name__] = likelihood
             lnlike += likelihood
+            grads.append(likelihood_grad)
+        grad = np.sum(grads, axis=0)
+
         like_str = ". ".join([f"{ky}: {vl:0.3f}" for ky, vl in likelihoods.items()])
+        grad_L_str = "[" + ",".join(f"{x:0.3f}" for x in grad) + "]"
         _log.trace('Likelihood - %0.2fs. %s. Total: %0.3f.', time.time() - starttime, like_str, lnlike)
+        _log.trace('∇L = {%s}', grad_L_str)
         error = np.array(lnlike, dtype=np.float64)
-        return error
+        return -error, -grad