diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index 31b14c38..d421b0db 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index f532f96c..562bcab2 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,26 @@ # CatLearn -CatLearn utilieties machine learning in form of Gaussian Process or Student T process to accelerate catalysis simulations. The Nudged-elastic-band method (NEB) is accelerated with MLNEB code. Furthermore, a global adsorption search is accelerated with the MLGO code. -CalLearn uses ASE for handling the atomic systems and the calculator interface for the potential energy calculations. +CatLearn utilizes machine learning, specifically the Gaussian Process or Student T process, to accelerate catalysis simulations. + +The local optimization of a structure is accelerated with the `LocalAL` code. +The Nudged-elastic-band method (NEB) is accelerated with the `MLNEB` code. +Furthermore, a global adsorption search without local relaxation is accelerated with the `AdsorptionAL` code. +Additionally, a global adsorption search with local relaxation is accelerated with the `MLGO` code. +At last, a random sampling of adsorbate positions, combined with local relaxation, accelerates the global adsorption search with the `RandomAdsorptionAL` code. + +CalLearn uses ASE to handle atomic systems and the calculator interface to calculate the potential energy. ## Installation -You can simply install CatLearn by dowloading it from github as: +You can install CatLearn by downloading it from GitHub as: ```shell -$ git clone https://github.com/avishart/CatLearn +$ git clone --single-branch --branch activelearning https://github.com/avishart/CatLearn $ pip install -e CatLearn/. ``` -You can also install CatLearn directly from github: +You can also install CatLearn directly from GitHub: ```shell -$ pip install git@github.com:avishart/CatLearn.git +$ pip install git@github.com:avishart/CatLearn.git@activelearning ``` However, it is recommended to install a specific tag to ensure it is a stable version: @@ -21,12 +28,74 @@ However, it is recommended to install a specific tag to ensure it is a stable ve $ pip install git+https://github.com/avishart/CatLearn.git@v.x.x.x ``` +The dependency of ASE has only been thoroughly tested up to version 3.26.0. + ## Usage +The active learning class is generalized to work for any defined optimizer method for ASE `Atoms` structures. The optimization method is executed iteratively with a machine-learning calculator that is retrained for each iteration. The active learning converges when the uncertainty is low (`unc_convergence`) and the energy change is within `unc_convergence` or the maximum force is within the tolerance value set. + +Predefined active learning methods are created: `LocalAL`, `MLNEB`, `AdsorptionAL`, `MLGO`, and `RandomAdsorptionAL`. -The following code shows how to use MLNEB: +The outputs of the active learning are `predicted.traj`, `evaluated.traj`, `predicted_evaluated.traj`, `converged.traj`, `initial_struc.traj`, `ml_summary.txt`, and `ml_time.txt`: +- The `predicted.traj` file contains the structures that the machine-learning calculator predicts after each optimization loop. +- The training data and ASE calculator evaluated structures are within `evaluated.traj` file. +- The `predicted_evaluated.traj` file has the exact same structures as the `evaluated.traj` file, but with machine-learning predicted properties. +- The converged structures calculated with the machine-learning calculator are saved in the `converged.traj` file. +- The initial structure(s) is/are saved into the `initial_struc.traj` file. +- The summary of the active learning is saved into a table in the `ml_summary.txt` file. +- The time spent on structure evaluation, machine-learning training, and prediction at each iteration is stored in `ml_time.txt`. + +### LocalAL +The following code shows how to use `LocalAL`: ```python -from catlearn.optimize.mlneb import MLNEB +from catlearn.activelearning.local import LocalAL from ase.io import read +from ase.optimize import FIRE + +# Load initial structure +atoms = read("initial.traj") + +# Make the ASE calculator +calc = ... + +# Initialize local optimization +dyn = LocalAL( + atoms=atoms, + ase_calc=calc, + unc_convergence=0.05, + local_opt=FIRE, + local_opt_kwargs={}, + save_memory=False, + use_restart=True, + min_data=3, + restart=False, + verbose=True, +) +dyn.run( + fmax=0.05, + max_unc=0.30, + steps=100, + ml_steps=1000, +) + +``` + +The active learning minimization can be visualized by extending the Python script with the following code: +```python +import matplotlib.pyplot as plt +from catlearn.tools.plot import plot_minimize + +fig, ax = plt.subplots() +plot_minimize("predicted_evaluated.traj", "evaluated.traj", ax=ax) +plt.savefig('AL_minimization.png') +plt.close() +``` + +### MLNEB +The following code shows how to use `MLNEB`: +```python +from catlearn.activelearning.mlneb import MLNEB +from ase.io import read +from ase.optimize import FIRE # Load endpoints initial = read("initial.traj") @@ -40,13 +109,24 @@ mlneb = MLNEB( start=initial, end=final, ase_calc=calc, - interpolation="linear", + unc_convergence=0.05, n_images=15, - full_output=True, + neb_method="improvedtangentneb", + neb_kwargs={}, + neb_interpolation="linear", + start_without_ci=True, + reuse_ci_path=True, + save_memory=False, + parallel_run=False, + local_opt=FIRE, + local_opt_kwargs={}, + use_restart=True, + min_data=3, + restart=False, + verbose=True, ) mlneb.run( fmax=0.05, - unc_convergence=0.05, max_unc=0.30, steps=100, ml_steps=1000, @@ -54,9 +134,53 @@ mlneb.run( ``` -The following code shows how to use MLGO: +The `MLNEB` optimization can be restarted from the last predicted path and reusing the training data with the argument `restart=True`. Alternatively, the optimization can be restarted from the last predicted path without reusing the training data by setting the `neb_interpolation="predicted.traj"`. + +The obtained NEB band from the MLNEB optimization can be visualized in three ways. + +The converged NEB band with uncertainties can be visualized by extending the Python code with the following code: +```python +import matplotlib.pyplot as plt +from catlearn.tools.plot import plot_neb + +fig, ax = plt.subplots() +plot_neb(mlneb.get_structures(), use_uncertainty=True, ax=ax) +plt.savefig('Converged_NEB.png') +plt.close() +``` + +The converged NEB band can also be plotted with the predicted curve between the images by extending with the following code: +```python +import matplotlib.pyplot as plt +from catlearn.tools.plot import plot_neb_fit_mlcalc + +fig, ax = plt.subplots() +plot_neb_fit_mlcalc( + mlneb.get_structures(), + mlcalc=mlneb.get_mlcalc(), + use_uncertainty=True, + include_noise=True, + ax=ax, +) +plt.savefig('Converged_NEB_fit.png') +plt.close() +``` + +All the obtained NEB bands from `MLNEB` can also be visualized within the same figure by using the following code: +```python +import matplotlib.pyplot as plt +from catlearn.tools.plot import plot_all_neb + +fig, ax = plt.subplots() +plot_all_neb("predicted.traj", n_images=15, ax=ax) +plt.savefig('All_NEB_paths.png') +plt.close() +``` + +### AdsorptionAL +The following code shows how to use `AdsorptionAL`: ```python -from catlearn.optimize.mlgo import MLGO +from catlearn.activelearning.adsorption import AdsorptionAL from ase.io import read # Load the slab and the adsorbate @@ -79,17 +203,136 @@ bounds = np.array( ) # Initialize MLGO -mlgo = MLGO(slab, ads, ase_calc=calc, bounds=bounds, full_output=True) +dyn = AdsorptionAL( + slab=slab, + adsorbate=ads, + adsorbate2=None, + ase_calc=calc, + unc_convergence=0.02, + bounds=bounds, + opt_kwargs={}, + parallel_run=False, + min_data=3, + restart=False, + verbose=True +) +dyn.run( + fmax=0.05, + max_unc=0.30, + steps=100, + ml_steps=4000, +) + +``` + +The `AdsorptionAL` optimization can be visualized in the same way as the `LocalAL` optimization. + +### MLGO +The following code shows how to use `MLGO`: +```python +from catlearn.activelearning.mlgo import MLGO +from ase.io import read +from ase.optimize import FIRE + +# Load the slab and the adsorbate +slab = read("slab.traj") +ads = read("adsorbate.traj") + +# Make the ASE calculator +calc = ... + +# Make the boundary conditions for the adsorbate +bounds = np.array( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.5, 1.0], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] +) + +# Initialize MLGO +mlgo = MLGO( + slab=slab, + adsorbate=ads, + adsorbate2=None, + ase_calc=calc, + unc_convergence=0.02, + bounds=bounds, + opt_kwargs={}, + local_opt=FIRE, + local_opt_kwargs={}, + reuse_data_local=True, + parallel_run=False, + min_data=3, + restart=False, + verbose=True +) mlgo.run( fmax=0.05, + max_unc=0.30, + steps=100, + ml_steps=4000, +) + +``` + +The `MLGO` optimization can be visualized in the same way as the `LocalAL` optimization. + +### RandomAdsorptionAL +The following code shows how to use `RandomAdsorptionAL`: +```python +from catlearn.activelearning.randomadsorption import RandomAdsorptionAL +from ase.io import read +from ase.optimize import FIRE + +# Load the slab and the adsorbate +slab = read("slab.traj") +ads = read("adsorbate.traj") + +# Make the ASE calculator +calc = ... + +# Make the boundary conditions for the adsorbate +bounds = np.array( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.5, 1.0], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] +) + +# Initialize MLGO +dyn = RandomAdsorptionAL( + slab=slab, + adsorbate=ads, + adsorbate2=None, + ase_calc=calc, unc_convergence=0.02, + bounds=bounds, + n_random_draws=200, + use_initial_opt=False, + initial_fmax=0.2, + use_repulsive_check=True, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + min_data=3, + restart=False, + verbose=True +) +dyn.run( + fmax=0.05, max_unc=0.30, steps=100, - ml_steps=1000, - ml_chains=8, - relax=True, - local_steps=500, + ml_steps=4000, ) ``` +The `RandomAdsorptionAL` optimization can be visualized in the same way as the `LocalAL` optimization. diff --git a/catlearn/_version.py b/catlearn/_version.py index 8a09eb69..64809e13 100644 --- a/catlearn/_version.py +++ b/catlearn/_version.py @@ -1,3 +1,3 @@ -__version__ = "5.6.1" +__version__ = "7.2.0" __all__ = ["__version__"] diff --git a/catlearn/optimize/__init__.py b/catlearn/activelearning/__init__.py similarity index 64% rename from catlearn/optimize/__init__.py rename to catlearn/activelearning/__init__.py index 772fce19..de8ec0e6 100644 --- a/catlearn/optimize/__init__.py +++ b/catlearn/activelearning/__init__.py @@ -1,5 +1,3 @@ -from .mlneb import MLNEB -from .mlgo import MLGO from .acquisition import ( Acquisition, AcqEnergy, @@ -13,10 +11,14 @@ AcqEI, AcqPI, ) +from .activelearning import ActiveLearning +from .local import LocalAL +from .mlneb import MLNEB +from .adsorption import AdsorptionAL +from .mlgo import MLGO +from .randomadsorption import RandomAdsorptionAL __all__ = [ - "MLNEB", - "MLGO", "Acquisition", "AcqEnergy", "AcqUncertainty", @@ -28,4 +30,10 @@ "AcqULCB", "AcqEI", "AcqPI", + "ActiveLearning", + "LocalAL", + "MLNEB", + "AdsorptionAL", + "MLGO", + "RandomAdsorptionAL", ] diff --git a/catlearn/activelearning/acquisition.py b/catlearn/activelearning/acquisition.py new file mode 100644 index 00000000..ca0d2b2f --- /dev/null +++ b/catlearn/activelearning/acquisition.py @@ -0,0 +1,821 @@ +from numpy import argsort, array, max as max_ +from numpy.random import default_rng, Generator, RandomState +from scipy.stats import norm + + +class Acquisition: + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + """ + + def __init__(self, objective="min", seed=None, **kwargs): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + self.update_arguments(objective=objective, seed=seed, **kwargs) + + def calculate(self, energy, uncertainty=None, **kwargs): + "Calculate the acquisition function value." + raise NotImplementedError() + + def choose(self, values): + "Sort a list of acquisition function values." + if self.objective == "min": + return argsort(values) + elif self.objective == "max": + return argsort(values)[::-1] + elif self.objective == "random": + return self.rng.permutation(len(values)) + raise ValueError("The objective should be 'min', 'max' or 'random'.") + + def objective_value(self, value): + "Return the value by changing the sign dependent on the method." + if self.objective == "min": + return -value + return value + + def update_arguments(self, objective=None, seed=None, **kwargs): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the objective + if objective is not None: + self.objective = objective.lower() + return self + + def set_seed(self, seed=None): + "Set the random seed." + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict(objective=self.objective, seed=self.seed) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + def copy(self): + "Copy the object." + # Get all arguments + arg_kwargs, constant_kwargs, object_kwargs = self.get_arguments() + # Make a clone + clone = self.__class__(**arg_kwargs) + # Check if constants have to be saved + if len(constant_kwargs.keys()): + for key, value in constant_kwargs.items(): + clone.__dict__[key] = value + # Check if objects have to be saved + if len(object_kwargs.keys()): + for key, value in object_kwargs.items(): + clone.__dict__[key] = value.copy() + return clone + + def __repr__(self): + arg_kwargs = self.get_arguments()[0] + str_kwargs = ",".join( + [f"{key}={value}" for key, value in arg_kwargs.items()] + ) + return "{}({})".format(self.__class__.__name__, str_kwargs) + + +class AcqEnergy(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted energy is used as the acquisition function. + """ + + def calculate(self, energy, uncertainty=None, **kwargs): + "Calculate the acquisition function value as the predicted energy." + return energy + + +class AcqUncertainty(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted uncertainty as the acquisition function. + """ + + def __init__(self, objective="max", seed=None, **kwargs): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + - 'draw': Sort by drawing from the uncertainty squared. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + super().__init__(objective=objective, seed=seed, **kwargs) + + def calculate(self, energy, uncertainty=None, **kwargs): + "Calculate the acquisition value as the predicted uncertainty." + return uncertainty + + def choose(self, values): + "Sort a list of acquisition function values." + if self.objective == "min": + return argsort(values) + elif self.objective == "max": + return argsort(values)[::-1] + elif self.objective == "random": + return self.rng.permutation(len(values)) + elif self.objective == "draw": + values = array(values) ** 2 + p = values / values.sum() + return self.rng.choice( + len(values), + size=len(values), + replace=False, + p=p, + ) + raise ValueError( + "The objective should be 'min', 'max', 'random', or 'draw'." + ) + + def update_arguments(self, objective=None, seed=None, **kwargs): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + - 'draw': Sort by drawing from the uncertainty squared. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + return super().update_arguments( + objective=objective, + seed=seed, + ) + + +class AcqUCB(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted upper confidence interval (ucb) as the acquisition function. + """ + + def __init__( + self, + objective="max", + seed=None, + kappa=2.0, + kappamax=3.0, + **kwargs, + ): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + kappa: float or str + The kappa value for the upper confidence interval. + If a string, a random value between 0 and kappamax is used. + kappamax: float + The maximum kappa value for the upper confidence interval. + If kappa is a string, a random value between 0 and this value + is used. + """ + self.update_arguments( + objective=objective, + seed=seed, + kappa=kappa, + kappamax=kappamax, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + "Calculate the acquisition function value as the predicted ucb." + kappa = self.get_kappa() + return energy + kappa * uncertainty + + def get_kappa(self): + "Get the kappa value." + if isinstance(self.kappa, str): + return self.rng.uniform(0, self.kappamax) + return self.kappa + + def update_arguments( + self, + objective=None, + seed=None, + kappa=None, + kappamax=None, + **kwargs, + ): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + kappa: float or str + The kappa value for the upper confidence interval. + If a string, a random value between 0 and kappamax is used. + kappamax: float + The maximum kappa value for the upper confidence interval. + If kappa is a string, a random value between 0 and this value + is used. + """ + # Set the parameters in the parent class + super().update_arguments( + objective=objective, + seed=seed, + ) + # Set the kappa value + if kappa is not None: + if isinstance(kappa, (float, int)): + kappa = abs(kappa) + self.kappa = kappa + # Set the kappamax value + if kappamax is not None: + self.kappamax = abs(kappamax) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + objective=self.objective, + seed=self.seed, + kappa=self.kappa, + kappamax=self.kappamax, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class AcqLCB(AcqUCB): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted lower confidence interval (lcb) as the acquisition function. + """ + + def __init__( + self, + objective="min", + seed=None, + kappa=2.0, + kappamax=3.0, + **kwargs, + ): + super().__init__( + objective=objective, + seed=seed, + kappa=kappa, + kappamax=kappamax, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + "Calculate the acquisition function value as the predicted ucb." + kappa = self.get_kappa() + return energy - kappa * uncertainty + + +class AcqIter(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted energy or uncertainty dependent on the iteration as + the acquisition function. + The energy is used every niter iterations, otherwise the uncertainty. + """ + + def __init__(self, objective="max", seed=None, niter=2, **kwargs): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + niter: int + The number of iterations after which the energy is used + as the acquisition function. + If niter is 1, the energy is used every iteration. + If niter is 2, the energy is used every second iteration, + etc. + """ + super().__init__(objective=objective, seed=seed, niter=niter, **kwargs) + self.iter = 0 + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as + the predicted energy or uncertainty. + """ + self.iter += 1 + if (self.iter) % self.niter == 0: + return energy + return uncertainty + + def update_arguments( + self, + objective=None, + seed=None, + niter=None, + **kwargs, + ): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + niter: int + The number of iterations after which the energy is used + as the acquisition function. + If niter is 1, the energy is used every iteration. + If niter is 2, the energy is used every second iteration, + etc. + """ + # Set the parameters in the parent class + super().update_arguments( + objective=objective, + seed=seed, + ) + # Set the number of iterations + if niter is not None: + self.niter = abs(niter) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + objective=self.objective, + seed=self.seed, + niter=self.niter, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class AcqUME(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted uncertainty when it is larger than unc_convergence + else predicted energy as the acquisition function. + """ + + def __init__( + self, + objective="max", + seed=None, + unc_convergence=0.05, + **kwargs, + ): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + unc_convergence: float + The convergence threshold for the uncertainty. + If the uncertainty is below this value, the predicted energy + is used as the acquisition function. + """ + super().__init__( + objective=objective, + seed=seed, + unc_convergence=unc_convergence, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as the predicted uncertainty + when it is is larger than unc_convergence else predicted energy. + """ + if max_([uncertainty]) < self.unc_convergence: + return energy + return self.objective_value(uncertainty) + + def update_arguments( + self, + objective=None, + seed=None, + unc_convergence=None, + **kwargs, + ): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + unc_convergence: float + The convergence threshold for the uncertainty. + If the uncertainty is below this value, the predicted energy + is used as the acquisition function. + """ + # Set the parameters in the parent class + super().update_arguments( + objective=objective, + seed=seed, + ) + # Set the unc_convergence value + if unc_convergence is not None: + self.unc_convergence = abs(unc_convergence) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + objective=self.objective, + seed=self.seed, + unc_convergence=self.unc_convergence, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class AcqUUCB(AcqUCB): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted uncertainty when it is larger than unc_convergence + else upper confidence interval (ucb) as the acquisition function. + """ + + def __init__( + self, + objective="max", + seed=None, + kappa=2.0, + kappamax=3.0, + unc_convergence=0.05, + **kwargs, + ): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + kappa: float or str + The kappa value for the upper confidence interval. + If a string, a random value between 0 and kappamax is used. + kappamax: float + The maximum kappa value for the upper confidence interval. + If kappa is a string, a random value between 0 and this value + is used. + unc_convergence: float + The convergence threshold for the uncertainty. + If the uncertainty is below this value, the ucb is used + as the acquisition function. + """ + self.update_arguments( + objective=objective, + seed=seed, + kappa=kappa, + kappamax=kappamax, + unc_convergence=unc_convergence, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as the predicted uncertainty + when it is is larger than unc_convergence else ucb. + """ + if max_([uncertainty]) < self.unc_convergence: + kappa = self.get_kappa() + return energy + kappa * uncertainty + return uncertainty + + def update_arguments( + self, + objective=None, + seed=None, + kappa=None, + kappamax=None, + unc_convergence=None, + **kwargs, + ): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + kappa: float or str + The kappa value for the upper confidence interval. + If a string, a random value between 0 and kappamax is used. + kappamax: float + The maximum kappa value for the upper confidence interval. + If kappa is a string, a random value between 0 and this value + is used. + unc_convergence: float + The convergence threshold for the uncertainty. + If the uncertainty is below this value, the ucb is used + as the acquisition function. + """ + # Set the parameters in the parent class + super().update_arguments( + objective=objective, + seed=seed, + kappa=kappa, + kappamax=kappamax, + ) + # Set the unc_convergence value + if unc_convergence is not None: + self.unc_convergence = abs(unc_convergence) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + objective=self.objective, + seed=self.seed, + kappa=self.kappa, + kappamax=self.kappamax, + unc_convergence=self.unc_convergence, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class AcqULCB(AcqUUCB): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted uncertainty when it is larger than unc_convergence + else lower confidence interval (lcb) as the acquisition function. + """ + + def __init__( + self, + objective="min", + seed=None, + kappa=2.0, + kappamax=3.0, + unc_convergence=0.05, + **kwargs, + ): + self.update_arguments( + objective=objective, + seed=seed, + kappa=kappa, + kappamax=kappamax, + unc_convergence=unc_convergence, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as the predicted uncertainty + when it is is larger than unc_convergence else lcb. + """ + if max_([uncertainty]) < self.unc_convergence: + kappa = self.get_kappa() + return energy - kappa * uncertainty + return -uncertainty + + +class AcqEI(Acquisition): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted expected improvement as the acquisition function. + """ + + def __init__(self, objective="max", seed=None, ebest=None, **kwargs): + """ + Initialize the Acquisition instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + ebest: float + The best energy value found so far. + This is used as the reference energy for the expected + improvement. + """ + self.update_arguments( + objective=objective, + seed=seed, + ebest=ebest, + **kwargs, + ) + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as + the predicted expected improvement. + """ + z = (energy - self.ebest) / uncertainty + a = (energy - self.ebest) * norm.cdf(z) + uncertainty * norm.pdf(z) + return self.objective_value(a) + + def update_arguments( + self, + objective=None, + seed=None, + ebest=None, + **kwargs, + ): + """ + Set the parameters of the Acquisition function instance. + + Parameters: + objective: string + How to sort a list of acquisition functions + Available: + - 'min': Sort after the smallest values. + - 'max': Sort after the largest values. + - 'random': Sort randomly + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + ebest: float + The best energy value found so far. + This is used as the reference energy for the expected + improvement. + """ + # Set the parameters in the parent class + super().update_arguments( + objective=objective, + seed=seed, + ) + # Set the ebest value + if ebest is not None or not hasattr(self, "ebest"): + self.ebest = ebest + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + objective=self.objective, + seed=self.seed, + ebest=self.ebest, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class AcqPI(AcqEI): + """ + The Acquisition class is used to calculate the acquisition function + values and to sort the candidates. + The predicted probability of improvement as the acquisition function. + """ + + def calculate(self, energy, uncertainty=None, **kwargs): + """ + Calculate the acquisition function value as + the predicted expected improvement. + """ + z = (energy - self.ebest) / uncertainty + return self.objective_value(norm.cdf(z)) diff --git a/catlearn/activelearning/activelearning.py b/catlearn/activelearning/activelearning.py new file mode 100644 index 00000000..56075d26 --- /dev/null +++ b/catlearn/activelearning/activelearning.py @@ -0,0 +1,2154 @@ +from numpy import ( + asarray, + max as max_, + mean as mean_, + nan, + nanmax, + ndarray, + sqrt, +) +from numpy.linalg import norm +from numpy.random import default_rng, Generator, RandomState +from ase.io import read +from ase.parallel import world, broadcast +from ase.io.trajectory import TrajectoryWriter +import datetime +from time import time +import warnings +from ..regression.gp.calculator import BOCalculator, compare_atoms, copy_atoms +from ..regression.gp.means import Prior_max +from ..regression.gp.baseline import BornRepulsionCalculator + + +class ActiveLearning: + """ + An active learner that is used for accelerating quantum mechanincal + simulation methods with an active learning approach. + """ + + def __init__( + self, + method, + ase_calc, + mlcalc=None, + acq=None, + is_minimization=True, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + use_fmax_convergence=True, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=True, + check_unc=True, + check_energy=True, + check_fmax=True, + max_unc_restart=0.05, + n_evaluations_each=1, + min_data=3, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=False, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + Initialize the ActiveLearning instance. + + Parameters: + method: OptimizationMethod instance + The quantum mechanincal simulation method instance. + ase_calc: ASE calculator instance + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + acq: Acquisition class instance + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + is_minimization: bool + Whether it is a minimization that is performed. + Alternative is a maximization. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in the active learning + (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + Number of evaluations for each iteration. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str (optional) + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Setup the ASE calculator + self.ase_calc = ase_calc + # Set the initial parameters + self.reset() + # Setup the method + self.setup_method(method) + # Setup the ML calculator + self.setup_mlcalc( + mlcalc, + save_memory=save_memory, + verbose=verbose, + **default_mlcalc_kwargs, + ) + # Setup the acquisition function + self.setup_acq( + acq, + is_minimization=is_minimization, + unc_convergence=unc_convergence, + ) + # Set the arguments + self.update_arguments( + is_minimization=is_minimization, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=use_fmax_convergence, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + max_unc_restart=max_unc_restart, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + # Restart the active learning + prev_calculations = self.restart_optimization( + restart, + prev_calculations, + ) + # Use previous calculations to train ML calculator + self.use_prev_calculations(prev_calculations) + + def run( + self, + fmax=0.05, + steps=200, + ml_steps=1000, + max_unc=0.3, + dtrust=None, + **kwargs, + ): + """ + Run the active learning optimization. + + Parameters: + fmax: float + Convergence criteria (in eV/Angs). + steps: int + Maximum number of evaluations. + ml_steps: int + Maximum number of steps for the optimization method + on the predicted landscape. + max_unc: float (optional) + Maximum uncertainty for continuation of the optimization. + If max_unc is None, then the optimization is performed + without the maximum uncertainty. + dtrust: float (optional) + The trust distance for the optimization method. + + Returns: + converged: bool + Whether the active learning is converged. + """ + # Check if there are any training data + self.extra_initial_data() + # Run the active learning + for step in range(1, steps + 1): + # Check if the method is converged + if self.converged(): + self.message_system("Active learning is converged.") + self.save_trajectory( + self.converged_trajectory, + self.best_structures, + mode="w", + ) + break + # Train and optimize ML model + self.train_mlmodel() + # Run the method + candidates, method_converged = self.find_next_candidates( + fmax=self.scale_fmax * fmax, + step=step, + ml_steps=ml_steps, + max_unc=max_unc, + dtrust=dtrust, + ) + # Evaluate candidate + self.evaluate_candidates(candidates) + # Print the results for this iteration + self.print_statement() + # Check for convergence + self._converged = self.check_convergence( + fmax, + method_converged, + ) + # State if the active learning did not converge + if not self.converged(): + self.message_system("Active learning did not converge!") + # Return and broadcast the best atoms + self.broadcast_best_structures() + return self.converged() + + def converged(self, *args, **kwargs): + "Whether the active learning is converged." + return self._converged + + def get_number_of_steps(self): + """ + Get the number of steps that have been run. + """ + return self.steps + + def reset(self, **kwargs): + """ + Reset the initial parameters for the active learner. + """ + # Set initial parameters + self.steps = 0 + self._converged = False + self.unc = nan + self.energy_pred = nan + self.pred_energies = [] + self.uncertainties = [] + self.ml_train_time = nan + self.method_time = nan + self.eval_time = nan + # Set the header for the summary table + self.make_hdr_table() + # Set the writing mode + self.mode = "w" + return self + + def setup_method(self, method, **kwargs): + """ + Setup the optimization method. + + Parameters: + method: OptimizationMethod instance. + The quantum mechanincal simulation method instance. + + Returns: + self: The object itself. + """ + # Save the method + self.method = method + # Set the seed for the method + if hasattr(self, "seed"): + self.set_method_seed(self.seed) + # Get the structures + self.structures = self.get_structures(allow_calculation=False) + if isinstance(self.structures, list): + self.n_structures = len(self.structures) + self.natoms = len(self.structures[0]) + else: + self.n_structures = 1 + self.natoms = len(self.structures) + self.best_structures = self.get_structures(allow_calculation=False) + self._converged = self.method.converged() + # Set the evaluated candidate and its calculator + self.candidate = self.get_candidates()[0].copy() + self.candidate.calc = self.ase_calc + # Store the best candidate data + self.bests_data = { + "atoms": self.candidate.copy(), + "energy": None, + "fmax": None, + "uncertainty": None, + } + return self + + def setup_mlcalc( + self, + mlcalc=None, + verbose=True, + **default_mlcalc_kwargs, + ): + """ + Setup the ML calculator. + + Parameters: + mlcalc: ML-calculator instance (optional) + The ML-calculator instance used as surrogate surface. + A default ML-model is used if mlcalc is None. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + + Returns: + self: The object itself. + """ + # Check if the ML calculator is given + if mlcalc is not None: + self.mlcalc = mlcalc + # Set the verbose for the ML calculator + if verbose is not None: + self.set_verbose(verbose=verbose) + else: + self.mlcalc = self.setup_default_mlcalc( + verbose=verbose, + **default_mlcalc_kwargs, + ) + # Check if the seed is given + if hasattr(self, "seed"): + # Set the seed for the ML calculator + self.set_mlcalc_seed(self.seed) + # Check if the dtype is given + if hasattr(self, "dtype"): + # Set the dtype for the ML calculator + self.mlcalc.set_dtype(self.dtype) + return self + + def setup_default_mlcalc( + self, + atoms=None, + save_memory=False, + model="tp", + fp=None, + baseline=BornRepulsionCalculator(), + prior=Prior_max(add=1.0), + use_derivatives=True, + optimize_hp=True, + database_reduction=False, + use_ensemble=False, + calc_forces=True, + round_pred=5, + bayesian=True, + kappa=2.0, + reuse_mlcalc_data=False, + verbose=True, + calc_kwargs={}, + **mlmodel_kwargs, + ): + """ + Setup the ML calculator. + + Parameters: + atoms: Atoms instance (optional if fp is not None) + The Atoms instance from the optimization method. + It is used to setup the fingerprint if it is None. + save_memory: bool + Whether to only train the ML calculator and store + all instances on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI instance is used. + model: str or Model class instance + Either the tp that gives the Students T process or + gp that gives the Gaussian process. + fp: Fingerprint class instance (optional) + The fingerprint instance used for the ML model. + The default InvDistances instance is used if fp is None. + baseline: Baseline class instance (optional) + The baseline instance used for the ML model. + The default is the BornRepulsionCalculator. + prior: Prior class instance (optional) + The prior mean instance used for the ML model. + The default prior is the Prior_max. + use_derivatives: bool + Whether to use derivatives of the targets in the ML model. + optimize_hp: bool + Whether to optimize the hyperparameters when the model is + trained. + database_reduction: bool + Whether to reduce the training database size. + A reduction can avoid memory issues and speed up the training. + use_ensemble: bool + Whether to use an ensemble model with clustering. + The use of ensemble models can avoid memory issues and speed up + the training. + calc_forces: bool + Whether to calculate the forces for all energy predictions. + round_pred: int (optional) + The number of decimals to round the predictions to. + If None, the predictions are not rounded. + bayesian: bool + Whether to use the Bayesian optimization calculator. + kappa: float + The scaling of the uncertainty relative to the energy. + The uncertainty is added to the predicted energy. + reuse_mlcalc_data: bool + Whether to reuse the data from a previous mlcalc. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + calc_kwargs: dict + The keyword arguments for the ML calculator. + mlmodel_kwargs: dict + Additional keyword arguments for the function + to create the MLModel instance. + + Returns: + self: The instance itself. + """ + # Create the ML calculator + from ..regression.gp.calculator.default_model import ( + get_default_mlmodel, + ) + from ..regression.gp.calculator.mlcalc import MLCalculator + from ..regression.gp.fingerprint.invdistances import InvDistances + + # Check if the save_memory is given + if save_memory is None: + try: + save_memory = self.save_memory + except NameError: + raise NameError("The save_memory is not given.") + # Setup the fingerprint + if fp is None: + # Check if the Atoms instance is given + if atoms is None: + try: + atoms = self.get_structures( + get_all=False, + allow_calculation=False, + ) + except NameError: + raise NameError("The Atoms object is not given or stored.") + # Can only use distances if there are more than one atom + if len(atoms) > 1: + if atoms.pbc.any(): + periodic_softmax = True + else: + periodic_softmax = False + fp = InvDistances( + reduce_dimensions=True, + use_derivatives=True, + periodic_softmax=periodic_softmax, + wrap=True, + ) + # Setup the ML model + mlmodel = get_default_mlmodel( + model=model, + prior=prior, + fp=fp, + baseline=baseline, + use_derivatives=use_derivatives, + parallel=(not save_memory), + optimize_hp=optimize_hp, + database_reduction=database_reduction, + use_ensemble=use_ensemble, + verbose=verbose, + **mlmodel_kwargs, + ) + # Get the data from a previous mlcalc if requested and it exist + if reuse_mlcalc_data: + if hasattr(self, "mlcalc"): + data = self.get_data_atoms() + else: + data = [] + # Setup the ML calculator + if bayesian: + mlcalc = BOCalculator( + mlmodel=mlmodel, + calc_forces=calc_forces, + round_pred=round_pred, + kappa=kappa, + **calc_kwargs, + ) + if not use_derivatives and kappa > 0.0: + if world.rank == 0: + warnings.warn( + "The Bayesian optimization calculator " + "with a positive kappa value and no derivatives " + "is not recommended!" + ) + else: + mlcalc = MLCalculator( + mlmodel=mlmodel, + calc_forces=calc_forces, + round_pred=round_pred, + **calc_kwargs, + ) + # Reuse the data from a previous mlcalc if requested + if reuse_mlcalc_data: + if len(data): + mlcalc.add_training(data) + return mlcalc + + def setup_acq( + self, + acq=None, + is_minimization=True, + kappa=2.0, + unc_convergence=0.05, + **kwargs, + ): + """ + Setup the acquisition function. + + Parameters: + acq: Acquisition class instance. + The Acquisition instance used for calculating the acq. function + and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + is_minimization: bool + Whether it is a minimization that is performed. + kappa: float + The kappa parameter in the acquisition function. + unc_convergence: float + Maximum uncertainty for convergence (in eV). + """ + # Select an acquisition function + if acq is None: + # Setup the acquisition function + if is_minimization: + from .acquisition import AcqULCB + + self.acq = AcqULCB( + objective="min", + kappa=kappa, + unc_convergence=unc_convergence, + ) + else: + from .acquisition import AcqUUCB + + self.acq = AcqUUCB( + objective="max", + kappa=kappa, + unc_convergence=unc_convergence, + ) + else: + self.acq = acq.copy() + # Check if the objective is the same + objective = self.get_objective_str() + if acq.objective != objective: + raise ValueError( + "The objective of the acquisition function " + "does not match the active learner." + ) + # Set the seed for the acquisition function + if hasattr(self, "seed"): + self.set_acq_seed(self.seed) + return self + + def get_structures( + self, + get_all=True, + properties=["forces", "energy", "uncertainty"], + allow_calculation=True, + **kwargs, + ): + """ + Get the list of ASE Atoms object from the method. + + Parameters: + get_all: bool + Whether to get all structures or just the first one. + properties: list of str + The names of the requested properties. + If not given, the properties is not calculated. + allow_calculation: bool + Whether the properties are allowed to be calculated. + + Returns: + Atoms object or list of Atoms objects. + """ + return self.method.get_structures( + get_all=get_all, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + + def get_candidates(self): + """ + Get the list of candidates from the method. + The candidates are used for the evaluation. + + Returns: + List of Atoms objects. + """ + return self.method.get_candidates() + + def copy_candidates( + self, + properties=["fmax", "forces", "energy", "uncertainty"], + allow_calculation=True, + **kwargs, + ): + """ + Get the candidate structure instances with copied properties. + It is used for active learning. + + Parameters: + properties: list of str + The names of the requested properties. + allow_calculation: bool + Whether the properties are allowed to be calculated. + + Returns: + candidates_copy: list of Atoms instances + The candidates with copied properties. + """ + return self.method.copy_candidates( + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + + def use_prev_calculations(self, prev_calculations=None, **kwargs): + """ + Use previous calculations to restart ML calculator. + + Parameters: + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + """ + if prev_calculations is None: + return self + if isinstance(prev_calculations, str): + prev_calculations = read(prev_calculations, ":") + if isinstance(prev_calculations, list) and len(prev_calculations) == 0: + return self + # Add calculations to the ML model + self.add_training(prev_calculations) + return self + + def update_method(self, structures, **kwargs): + """ + Update the method with structures. + Add the ML calculator to the structures in the optimization method. + + Parameters: + structures: Atoms instance or list of Atoms instances + The structures that the optimizable instance is dependent on. + + Returns: + self: The object itself. + """ + # Initiate the method with given structure(s) + self.method.update_optimizable(structures) + # Set the ML calculator in the method + self.set_mlcalc() + return self + + def reset_method(self, **kwargs): + """ + Reset the stps and convergence of the optimization method. + Add the ML calculator to the structures in the optimization method. + """ + # Reset the optimization method + self.method.reset_optimization() + # Set the ML calculator in the method + self.set_mlcalc() + return self + + def set_mlcalc(self, copy_calc=None, **kwargs): + """ + Set the ML calculator in the method. + """ + # Set copy_calc if it is not given + if copy_calc is None: + copy_calc = self.copy_calc + # Set the ML calculator in the method + self.method.set_calculator(self.mlcalc, copy_calc=copy_calc) + return self + + def get_data_atoms(self, **kwargs): + """ + Get the list of atoms in the database. + + Returns: + list: A list of the saved ASE Atoms objects. + """ + return self.mlcalc.get_data_atoms() + + def update_arguments( + self, + method=None, + ase_calc=None, + mlcalc=None, + acq=None, + is_minimization=None, + save_memory=None, + parallel_run=None, + copy_calc=None, + verbose=None, + apply_constraint=None, + force_consistent=None, + scale_fmax=None, + use_fmax_convergence=None, + unc_convergence=None, + use_method_unc_conv=None, + use_restart=None, + check_unc=None, + check_energy=None, + check_fmax=None, + max_unc_restart=None, + n_evaluations_each=None, + min_data=None, + use_database_check=None, + data_perturb=None, + data_tol=None, + save_properties_traj=None, + to_save_mlcalc=None, + save_mlcalc_kwargs=None, + trajectory=None, + trainingset=None, + pred_evaluated=None, + converged_trajectory=None, + initial_traj=None, + tabletxt=None, + timetxt=None, + seed=None, + dtype=None, + comm=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + method: OptimizationMethod instance + The quantum mechanincal simulation method instance. + ase_calc: ASE calculator instance + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + acq: Acquisition class instance + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + is_minimization: bool + Whether it is a minimization that is performed. + Alternative is a maximization. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + Number of evaluations for each iteration. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str (optional) + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + + Returns: + self: The updated object itself. + """ + # Set parallelization + if save_memory is not None: + self.save_memory = save_memory + if comm is not None or not hasattr(self, "comm"): + # Setup parallelization + self.parallel_setup(comm) + if parallel_run is not None: + self.parallel_run = parallel_run + if self.parallel_run and self.save_memory: + raise ValueError( + "The save_memory and parallel_run can not " + "be True at the same time!" + ) + # Set the verbose + if verbose is not None: + # Whether to have the full output + self.set_verbose(verbose=verbose) + elif not hasattr(self, "verbose"): + self.set_verbose(verbose=False) + # Set parameters + if is_minimization is not None: + self.is_minimization = is_minimization + if use_database_check is not None: + self.use_database_check = use_database_check + if data_perturb is not None: + self.data_perturb = abs(float(data_perturb)) + if data_tol is not None: + self.data_tol = abs(float(data_tol)) + if self.use_database_check: + if self.data_perturb < self.data_tol: + self.message_system( + "It is not recommended that the data_perturb " + "is smaller than the data_tol.", + is_warning=True, + ) + if copy_calc is not None: + self.copy_calc = copy_calc + if apply_constraint is not None: + self.apply_constraint = apply_constraint + elif not hasattr(self, "apply_constraint"): + self.apply_constraint = True + if force_consistent is not None: + self.force_consistent = force_consistent + elif not hasattr(self, "force_consistent"): + self.force_consistent = False + if scale_fmax is None and not hasattr(self, "scale_fmax"): + scale_fmax = 1.0 + if scale_fmax is not None: + self.scale_fmax = abs(float(scale_fmax)) + self.scale_fmax_org = self.scale_fmax + if use_fmax_convergence is not None: + self.use_fmax_convergence = use_fmax_convergence + if unc_convergence is not None: + self.unc_convergence = abs(float(unc_convergence)) + if use_method_unc_conv is not None: + self.use_method_unc_conv = use_method_unc_conv + if use_restart is not None: + self.use_restart = use_restart + if check_unc is not None: + self.check_unc = check_unc + if check_energy is not None: + self.check_energy = check_energy + if check_fmax is not None: + self.check_fmax = check_fmax + if max_unc_restart is not None: + self.max_unc_restart = abs(float(max_unc_restart)) + if n_evaluations_each is not None: + self.n_evaluations_each = int(abs(n_evaluations_each)) + if self.n_evaluations_each < 1: + self.n_evaluations_each = 1 + if min_data is not None: + self.min_data = int(abs(min_data)) + if save_properties_traj is not None: + self.save_properties_traj = save_properties_traj + if to_save_mlcalc is not None: + self.to_save_mlcalc = to_save_mlcalc + if save_mlcalc_kwargs is not None: + self.save_mlcalc_kwargs = save_mlcalc_kwargs + if trajectory is not None or not hasattr(self, "trajectory"): + self.trajectory = trajectory + if trainingset is not None or not hasattr(self, "trainingset"): + self.trainingset = trainingset + if pred_evaluated is not None or not hasattr(self, "pred_evaluated"): + self.pred_evaluated = pred_evaluated + if converged_trajectory is not None or not hasattr( + self, "converged_trajectory" + ): + self.converged_trajectory = converged_trajectory + if initial_traj is not None or not hasattr(self, "initial_traj"): + self.initial_traj = initial_traj + if tabletxt is not None: + self.tabletxt = str(tabletxt) + elif not hasattr(self, "tabletxt"): + self.tabletxt = None + if timetxt is not None: + self.timetxt = str(timetxt) + elif not hasattr(self, "timetxt"): + self.timetxt = None + # Set ASE calculator + if ase_calc is not None: + self.ase_calc = ase_calc + if method is None: + self.setup_method(self.method) + # Update the optimization method + if method is not None: + self.setup_method(method) + # Set the machine learning calculator + if mlcalc is not None: + self.setup_mlcalc(mlcalc) + # Set the acquisition function + if acq is not None: + self.setup_acq( + acq, + is_minimization=self.is_minimization, + unc_convergence=self.unc_convergence, + ) + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) + # Check if the method and BO is compatible + self.check_attributes() + return self + + def find_next_candidates( + self, + fmax=0.05, + step=1, + ml_steps=200, + max_unc=None, + dtrust=None, + **kwargs, + ): + "Run the method on the ML surrogate surface." + # Convergence of the method + method_converged = False + # Check if the method is running in parallel + if not self.parallel_run and self.rank != 0: + return None, method_converged + # Check if the previous structure were better + self.initiate_structure(step=step) + # Run the method + method_converged = self.run_method( + fmax=fmax, + ml_steps=ml_steps, + max_unc=max_unc, + dtrust=dtrust, + ) + # Get the candidates + candidates = self.choose_candidates() + return candidates, method_converged + + def run_method( + self, + fmax=0.05, + ml_steps=750, + max_unc=None, + dtrust=None, + **kwargs, + ): + "Run the method on the surrogate surface." + # Set the uncertainty convergence for the method + if self.use_method_unc_conv: + unc_convergence = self.unc_convergence + else: + unc_convergence = None + # Start the method time + self.method_time = time() + # Run the method + self.method.run( + fmax=fmax, + steps=ml_steps, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + **kwargs, + ) + # Store the method time + self.method_time = time() - self.method_time + # Check if the method converged + method_converged = self.method.converged() + # Get the atoms from the method run + self.structures = self.get_structures() + # Write atoms to trajectory + self.save_trajectory(self.trajectory, self.structures, mode=self.mode) + return method_converged + + def initiate_structure(self, step=1, **kwargs): + "Initiate the method with right structure." + # Define boolean for using the temporary structure + use_tmp = True + # Do not use the temporary structure + if not self.use_restart or step == 1: + self.message_system("The initial structure is used.") + use_tmp = False + # Reuse the temporary structure if it passes tests + if use_tmp: + self.update_method(self.structures) + # Get uncertainty and fmax + uncmax_tmp, energy_tmp, fmax_tmp = self.get_predictions() + # Check uncertainty is low enough + if self.check_unc: + if uncmax_tmp > self.max_unc_restart: + self.message_system( + "The uncertainty is too large to " + "use the last structure." + ) + use_tmp = False + # Check fmax is lower than previous structure + if use_tmp and (self.check_fmax or self.check_energy): + self.update_method(self.best_structures) + _, energy_best, fmax_best = self.get_predictions() + if self.check_fmax: + if fmax_tmp > fmax_best: + self.message_system( + "The fmax is too large to use the last structure." + ) + use_tmp = False + if use_tmp and self.check_energy: + if energy_tmp > energy_best: + self.message_system( + "The energy is too large to use the last structure." + ) + use_tmp = False + # Check if the temporary structure passed the tests + if use_tmp: + self.update_method(self.structures) + self.message_system("The last structure is used.") + else: + self.update_method(self.best_structures) + # Store the best structures with the ML calculator + self.copy_best_structures() + # Save the initial trajectory + if step == 1 and self.initial_traj is not None: + self.save_trajectory(self.initial_traj, self.best_structures) + return + + def get_predictions(self, **kwargs): + "Get the maximum uncertainty, energy, and fmax prediction." + uncmax = None + energy = None + fmax = None + if self.check_unc: + uncmax = self.method.get_uncertainty() + if self.check_energy: + energy = self.method.get_potential_energy() + if self.check_fmax: + fmax = max_(self.method.get_fmax()) + return uncmax, energy, fmax + + def get_candidate_predictions(self, candidates, **kwargs): + """ + Get the energies, uncertainties, and fmaxs with the ML calculator + for the candidates. + """ + energies = [] + uncertainties = [] + fmaxs = [] + for candidate in candidates: + energies.append(self.get_true_predicted_energy(candidate)) + uncertainties.append(candidate.calc.results["uncertainty"]) + fmaxs.append(sqrt((candidate.get_forces() ** 2).sum(axis=1).max())) + return ( + asarray(energies).reshape(-1), + asarray(uncertainties).reshape(-1), + asarray(fmaxs).reshape(-1), + ) + + def parallel_setup(self, comm, **kwargs): + "Setup the parallelization." + if comm is None: + self.comm = world + else: + self.comm = comm + self.rank = self.comm.rank + self.size = self.comm.size + return self + + def remove_parallel_setup(self): + "Remove the parallelization by removing the communicator." + self.comm = None + self.rank = 0 + self.size = 1 + return self + + def add_training(self, atoms_list, **kwargs): + "Add atoms_list data to ML model on rank=0." + self.mlcalc.add_training(atoms_list) + return self.mlcalc + + def train_mlmodel(self, point_interest=None, **kwargs): + "Train the ML model" + # Start the training time + self.ml_train_time = time() + # Check if the model should be trained on all CPUs + if self.save_memory: + if self.rank != 0: + return self.mlcalc + # Update database with the points of interest + if point_interest is not None: + self.update_database_arguments(point_interest=point_interest) + else: + self.update_database_arguments(point_interest=self.best_structures) + # Train the ML model + self.mlcalc.train_model() + # Store the training time + self.ml_train_time = time() - self.ml_train_time + # Save the ML calculator if requested + if self.to_save_mlcalc: + self.save_mlcalc(**self.save_mlcalc_kwargs) + return self.mlcalc + + def save_data(self, **kwargs): + "Save the training data to a file." + if self.steps > 1: + self.mlcalc.save_data( + trajectory=self.trainingset, + mode="a", + write_last=True, + **kwargs, + ) + else: + self.mlcalc.save_data(trajectory=self.trainingset, **kwargs) + return self + + def save_trajectory(self, trajectory, structures, mode="w", **kwargs): + "Save the trajectory of the data." + if trajectory is None: + return self + if isinstance(trajectory, str): + with TrajectoryWriter(trajectory, mode=mode) as traj: + self.save_traj(traj, structures, **kwargs) + elif isinstance(trajectory, TrajectoryWriter): + self.save_traj(trajectory, structures, **kwargs) + else: + self.message_system( + "The trajectory type is not supported. " + "The trajectory is not saved!" + ) + return self + + def save_traj(self, traj, structures, **kwargs): + "Save the trajectory of the data with the TrajectoryWriter." + if not isinstance(structures, list): + structures = [structures] + for struc in structures: + if struc is not None: + if self.save_properties_traj: + if hasattr(struc.calc, "results"): + struc.info["results"] = struc.calc.results + else: + struc.info["results"] = {} + traj.write(struc) + return traj + + def evaluate_candidates(self, candidates, **kwargs): + "Evaluate the candidates." + # Check if the candidates are a list + if not isinstance(candidates, (list, ndarray)): + candidates = [candidates] + # Evaluate the candidates + for candidate in candidates: + # Ensure that the candidate is not already in the database + if self.use_database_check: + candidate = self.ensure_candidate_not_in_database( + candidate, + show_message=True, + ) + # Broadcast the predictions + self.broadcast_predictions() + # Evaluate the candidate + self.evaluate(candidate, is_predicted=True) + # Set the mode to append + self.mode = "a" + return self + + def evaluate(self, candidate, is_predicted=False, **kwargs): + "Evaluate the ASE atoms with the ASE calculator." + # Ensure that the candidate is not already in the database + if self.use_database_check and not is_predicted: + candidate, _ = self.ensure_not_in_database(candidate) + # Update the evaluated candidate + self.update_candidate(candidate) + # Start the evaluation time + self.eval_time = time() + # Calculate the energies and forces + self.message_system("Performing evaluation.", end="\r") + forces = self.candidate.get_forces( + apply_constraint=self.apply_constraint + ) + self.energy_true = self.candidate.get_potential_energy( + force_consistent=self.force_consistent + ) + self.message_system("Single-point calculation finished.") + # Store the evaluation time + self.eval_time = time() - self.eval_time + # Save deviation, fmax, and update steps + self.e_dev = abs(self.energy_true - self.energy_pred) + self.true_fmax = nanmax(norm(forces, axis=1)) + self.steps += 1 + # Store the data + if is_predicted: + # Store the candidate with predicted properties + self.save_trajectory( + self.pred_evaluated, + candidate, + mode=self.mode, + ) + self.add_training([self.candidate]) + self.save_data() + # Make a reference energy + if self.steps == 1: + atoms_ref = self.get_data_atoms()[0] + self.e_ref = atoms_ref.get_potential_energy() + # Store the best evaluated candidate + self.store_best_data(self.candidate) + # Make the summary table + self.make_summary_table() + return + + def update_candidate(self, candidate, dtol=1e-8, **kwargs): + "Update the evaluated candidate with given candidate." + # Broadcast the system to all cpus + if self.rank == 0: + candidate = candidate.copy() + candidate = broadcast(candidate, root=0, comm=self.comm) + # Update the evaluated candidate with given candidate + # Set positions + self.candidate.set_positions(candidate.get_positions()) + # Set cell + cell_old = self.candidate.get_cell() + cell_new = candidate.get_cell() + if norm(cell_old - cell_new) > dtol: + self.candidate.set_cell(cell_new) + # Set pbc + pbc_old = self.candidate.get_pbc() + pbc_new = candidate.get_pbc() + if (pbc_old == pbc_new).all(): + self.candidate.set_pbc(pbc_new) + # Set initial charges + ini_charge_old = self.candidate.get_initial_charges() + ini_charge_new = candidate.get_initial_charges() + if norm(ini_charge_old - ini_charge_new) > dtol: + self.candidate.set_initial_charges(ini_charge_new) + # Set initial magmoms + ini_magmom_old = self.candidate.get_initial_magnetic_moments() + ini_magmom_new = candidate.get_initial_magnetic_moments() + if norm(ini_magmom_old - ini_magmom_new) > dtol: + self.candidate.set_initial_magnetic_moments(ini_magmom_new) + # Set momenta + momenta_old = self.candidate.get_momenta() + momenta_new = candidate.get_momenta() + if norm(momenta_old - momenta_new) > dtol: + self.candidate.set_momenta(momenta_new) + # Set velocities + velocities_old = self.candidate.get_velocities() + velocities_new = candidate.get_velocities() + if norm(velocities_old - velocities_new) > dtol: + self.candidate.set_velocities(velocities_new) + return candidate + + def broadcast_predictions(self, **kwargs): + "Broadcast the predictions." + # Get energy and uncertainty and remove it from the list + if self.rank == 0: + self.energy_pred = self.pred_energies[0] + self.pred_energies = self.pred_energies[1:] + self.unc = self.uncertainties[0] + self.uncertainties = self.uncertainties[1:] + # Broadcast the predictions + self.energy_pred = broadcast(self.energy_pred, root=0, comm=self.comm) + self.unc = broadcast(self.unc, root=0, comm=self.comm) + self.pred_energies = broadcast( + self.pred_energies, + root=0, + comm=self.comm, + ) + self.uncertainties = broadcast( + self.uncertainties, + root=0, + comm=self.comm, + ) + return self + + def extra_initial_data(self, **kwargs): + """ + Get an initial structure for the active learning + if the ML calculator does not have any training points. + """ + # Get the number of training data + n_data = self.get_training_set_size() + # Check if the training set is empty + if n_data >= 2: + return self + # Get the initial structure + atoms = self.get_structures(get_all=False, allow_calculation=False) + # Rattle if the initial structure is calculated + if n_data == 1: + atoms = self.rattle_atoms(atoms, data_perturb=0.02) + # Evaluate the structure + self.evaluate(atoms) + # Print summary table + self.print_statement() + # Check if another initial data is needed + if n_data == 0: + self.extra_initial_data(**kwargs) + return self + + def update_database_arguments(self, point_interest=None, **kwargs): + "Update the arguments in the database." + self.mlcalc.update_database_arguments( + point_interest=point_interest, + **kwargs, + ) + return self + + def ensure_not_in_database( + self, + atoms, + show_message=True, + **kwargs, + ): + "Ensure the ASE Atoms instance is not in database by perturb it." + # Return atoms if it does not exist + if atoms is None: + return atoms + # Boolean for checking if the atoms instance was in database + was_in_database = False + # Check if atoms instance is in the database + while self.is_in_database(atoms, dtol=self.data_tol, **kwargs): + # Atoms instance was in database + was_in_database = True + # Rattle the atoms + atoms = self.rattle_atoms(atoms, data_perturb=self.data_perturb) + # Print message if requested + if show_message: + self.message_system( + "The system is rattled, since it is already in " + "the database." + ) + return atoms, was_in_database + + def rattle_atoms(self, atoms, data_perturb, **kwargs): + "Rattle the ASE Atoms instance positions." + # Get positions + pos = atoms.get_positions() + # Rattle the positions + pos_new = pos + self.rng.normal( + loc=0.0, + scale=data_perturb, + size=pos.shape, + ) + # Set the new positions + atoms.set_positions(pos_new) + return atoms + + def ensure_candidate_not_in_database( + self, + candidate, + show_message=True, + **kwargs, + ): + "Ensure the candidate is not in database by perturb it." + # Check if the method is running in parallel + if not self.parallel_run and self.rank != 0: + return None + # Ensure that the candidate is not already in the database + candidate, was_in_database = self.ensure_not_in_database( + candidate, + show_message=show_message, + ) + # Calculate the properties if it was in the database + if was_in_database: + candidate.calc = self.mlcalc + candidate = self.method.copy_atoms( + candidate, + properties=["fmax", "uncertainty", "energy"], + allow_calculation=True, + ) + self.pred_energies[0] = self.get_true_predicted_energy(candidate) + self.uncertainties[0] = candidate.calc.results["uncertainty"] + # Rescale the fmax criterion + self.scale_fmax *= self.scale_fmax_org + return candidate + + def store_best_data(self, atoms, **kwargs): + "Store the best candidate." + update = True + # Check if the energy is better than the previous best + if self.is_minimization: + best_energy = self.bests_data["energy"] + if best_energy is not None and self.energy_true > best_energy: + update = False + # Update the best data + if update: + self.bests_data["atoms"] = atoms.copy() + self.bests_data["energy"] = self.energy_true + self.bests_data["fmax"] = self.true_fmax + self.bests_data["uncertainty"] = self.unc + return self + + def get_training_set_size(self): + "Get the size of the training set" + return self.mlcalc.get_training_set_size() + + def choose_candidates(self, **kwargs): + "Use acquisition functions to chose the next training points" + # Get the candidates + candidates = self.copy_candidates() + # Get the energies and uncertainties + energies, uncertainties, fmaxs = self.get_candidate_predictions( + candidates + ) + # Store the uncertainty predictions + self.umax = max_(uncertainties) + self.umean = mean_(uncertainties) + # Calculate the acquisition function for each candidate + acq_values = self.acq.calculate( + energy=energies, + uncertainty=uncertainties, + fmax=fmaxs, + ) + # Chose the candidates given by the Acq. class + i_cand = self.acq.choose(acq_values) + i_cand = i_cand[: self.n_evaluations_each] + # Reverse the order of the candidates so the best is last + if self.n_evaluations_each > 1: + i_cand = i_cand[::-1] + # The next training points + candidates = [candidates[i] for i in i_cand] + self.pred_energies = energies[i_cand] + self.uncertainties = uncertainties[i_cand] + return candidates + + def check_convergence(self, fmax, method_converged, **kwargs): + "Check if the convergence criteria are fulfilled" + converged = True + if self.rank == 0: + # Check if the method converged + if not method_converged: + converged = False + # Check if the minimum number of trained data points is reached + if self.get_training_set_size() - 1 < self.min_data: + converged = False + # Check the force criterion is met if it is requested + if self.use_fmax_convergence and self.true_fmax > fmax: + converged = False + # Check the uncertainty criterion is met + if self.umax > self.unc_convergence: + converged = False + # Check the true energy deviation + # match the uncertainty prediction + uci = 2.0 * self.unc_convergence + if self.e_dev > uci: + converged = False + # Check if the energy is the minimum + if self.is_minimization: + e_dif = abs(self.energy_true - self.bests_data["energy"]) + if e_dif > uci: + converged = False + # Broadcast convergence statement if MPI is used + converged = broadcast(converged, root=0, comm=self.comm) + # Check the convergence + if converged: + self.copy_best_structures() + return converged + + def copy_best_structures( + self, + get_all=True, + properties=["forces", "energy", "uncertainty"], + allow_calculation=True, + **kwargs, + ): + """ + Copy the best structures. + + Parameters: + properties: list of str + The names of the requested properties. + If not given, the properties is not calculated. + allow_calculation: bool + Whether the properties are allowed to be calculated. + + Returns: + list of ASE Atoms objects: The best structures. + """ + # Check if the method is running in parallel + if not self.parallel_run and self.rank != 0: + return self.best_structures + # Get the best structures with calculated properties + self.best_structures = self.get_structures( + get_all=get_all, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + return self.best_structures + + def get_best_structures(self): + "Get the best structures." + return self.best_structures + + def broadcast_best_structures(self): + "Broadcast the best structures." + self.best_structures = broadcast( + self.best_structures, + root=0, + comm=self.comm, + ) + return self.best_structures + + def copy_atoms(self, atoms): + "Copy the ASE Atoms instance with calculator." + return copy_atoms(atoms) + + def compare_atoms( + self, + atoms0, + atoms1, + tol=1e-8, + properties_to_check=["atoms", "positions", "cell", "pbc"], + **kwargs, + ): + """ + Compare two ASE Atoms instances. + """ + is_same = compare_atoms( + atoms0, + atoms1, + tol=tol, + properties_to_check=properties_to_check, + **kwargs, + ) + return is_same + + def get_objective_str(self, **kwargs): + "Get what the objective is for the active learning." + if not self.is_minimization: + return "max" + return "min" + + def set_verbose(self, verbose, **kwargs): + "Set verbose of MLModel." + self.verbose = verbose + self.mlcalc.mlmodel.update_arguments(verbose=verbose) + return self + + def is_in_database(self, atoms, **kwargs): + "Check if the ASE Atoms is in the database." + return self.mlcalc.is_in_database(atoms, **kwargs) + + def get_true_predicted_energy(self, atoms, **kwargs): + """ + Get the true predicted energy of the atoms. + Since the BOCalculator will return the predicted energy and + the uncertainty times the kappa value, this should be avoided. + """ + energy = atoms.get_potential_energy() + if hasattr(atoms.calc, "results"): + if "predicted energy" in atoms.calc.results: + energy = atoms.calc.results["predicted energy"] + return energy + + def save_mlcalc(self, filename="mlcalc.pkl", **kwargs): + """ + Save the ML calculator object to a file. + + Parameters: + filename: str + The name of the file where the object is saved. + + Returns: + self: The object itself. + """ + if self.rank == 0: + self.mlcalc.save_mlcalc(filename, **kwargs) + return self + + def get_mlcalc(self, copy_mlcalc=True, **kwargs): + """ + Get the ML calculator instance. + + Parameters: + copy_mlcalc: bool + Whether to copy the instance. + + Returns: + MLCalculator: The ML calculator instance. + """ + if copy_mlcalc: + return self.mlcalc.copy() + return self.mlcalc + + def check_attributes(self, **kwargs): + """ + Check that the active learning and the method + agree upon the attributes. + """ + if self.parallel_run != self.method.parallel_run: + raise ValueError( + "Active learner and Optimization method does " + "not agree whether to run in parallel!" + ) + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + # Set the random seed for the optimization method + self.set_method_seed(self.seed) + # Set the random seed for the acquisition function + self.set_acq_seed(self.seed) + # Set the random seed for the ML calculator + self.set_mlcalc_seed(self.seed) + return self + + def set_method_seed(self, seed=None, **kwargs): + """ + Set the random seed for the optimization method. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.method.set_seed(seed) + return self + + def set_acq_seed(self, seed=None, **kwargs): + """ + Set the random seed for the acquisition function. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.acq.set_seed(seed) + return self + + def set_mlcalc_seed(self, seed=None, **kwargs): + """ + Set the random seed for the ML calculator. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.mlcalc.set_seed(seed) + return self + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the data type of the mlcalc + self.mlcalc.set_dtype(dtype) + return self + + def set_kappa(self, kappa, **kwargs): + """ + Set the kappa value for the acquisition function. + Kappa is used to scale the uncertainty in the acquisition function. + Furthermore, set the kappa value for the ML calculator + if it is a BOCalculator. + + Parameters: + kappa: float + The kappa value to set for the acquisition function and + the ML calculator. + + Returns: + self: The instance itself. + """ + # Set the kappa value for the acquisition function + self.acq.set_kappa() + # Set the kappa value for the ML calculator if it is a BOCalculator + if isinstance(self.mlcalc, BOCalculator): + self.mlcalc.set_kappa(kappa) + return self + + def message_system(self, message, obj=None, end="\n", is_warning=False): + "Print output once." + if self.verbose is True and self.rank == 0: + if is_warning: + warnings.warn(message) + else: + if obj is None: + print(message, end=end) + else: + print(message, obj, end=end) + return + + def make_hdr_table(self, **kwargs): + "Make the header of the summary tables for the optimization process." + # Make the header to the summary table + hdr_list = [ + " {:<6} ".format("Step"), + " {:<11s} ".format("Date"), + " {:<16s} ".format("True energy/[eV]"), + " {:<16s} ".format("Uncertainty/[eV]"), + " {:<15s} ".format("True error/[eV]"), + " {:<16s} ".format("True fmax/[eV/Å]"), + ] + # Write the header + hdr = "|" + "|".join(hdr_list) + "|" + self.print_list = [hdr] + # Make the header to the time summary table + hdr_list = [ + " {:<6} ".format("Step"), + " {:<11s} ".format("Date"), + " {:<16s} ".format("ML training/[s]"), + " {:<16s} ".format("ML run/[s]"), + " {:<16s} ".format("Evaluation/[s]"), + ] + # Write the header to the time summary table + hdr_time = "|" + "|".join(hdr_list) + "|" + self.print_list_time = [hdr_time] + return hdr + + def make_summary_table(self, **kwargs): + "Make the summary of the optimization process as table." + if self.rank != 0: + return None, None + now = datetime.datetime.now().strftime("%d %H:%M:%S") + # Make the row for the summary table + msg = [ + " {:<6d} ".format(self.steps), + " {:<11s} ".format(now), + " {:16.4f} ".format(self.energy_true - self.e_ref), + " {:16.4f} ".format(self.unc), + " {:15.4f} ".format(self.e_dev), + " {:16.4f} ".format(self.true_fmax), + ] + msg = "|" + "|".join(msg) + "|" + self.print_list.append(msg) + msg = "\n".join(self.print_list) + # Make the row for the time summary table + msg_time = [ + " {:<6d} ".format(self.steps), + " {:<11s} ".format(now), + " {:16.4f} ".format(self.ml_train_time), + " {:16.4f} ".format(self.method_time), + " {:16.4f} ".format(self.eval_time), + ] + msg_time = "|" + "|".join(msg_time) + "|" + self.print_list_time.append(msg_time) + msg_time = "\n".join(self.print_list_time) + return msg, msg_time + + def save_summary_table(self, msg=None, **kwargs): + "Save the summary table in the .txt file." + if self.tabletxt is not None: + with open(self.tabletxt, "w") as thefile: + if msg is None: + msg = "\n".join(self.print_list) + thefile.writelines(msg) + if self.timetxt is not None: + with open(self.timetxt, "w") as thefile: + msg = "\n".join(self.print_list_time) + thefile.writelines(msg) + return + + def print_statement(self, **kwargs): + "Print the active learning process as a table." + msg = "" + if self.rank == 0: + msg = "\n".join(self.print_list) + self.save_summary_table(msg) + self.message_system(msg) + return msg + + def restart_optimization( + self, + restart=False, + prev_calculations=None, + **kwargs, + ): + "Restart the active learning." + # Check if the optimization should be restarted + if not restart: + return prev_calculations + # Load the previous calculations from trajectory + # Test if the restart is possible + structure = read(self.trajectory, "0") + if len(structure) != self.natoms: + raise ValueError( + "The number of atoms in the trajectory does not match " + "the number of atoms in given." + ) + # Load the predicted structures + if self.n_structures == 1: + index = "-1" + else: + index = f"-{self.n_structures}:" + self.structures = read( + self.trajectory, + index, + ) + # Load the previous training data + prev_calculations = read(self.trainingset, ":") + # Update the method with the structures + self.update_method(self.structures) + self.copy_best_structures(allow_calculation=False) + # Set the writing mode + self.mode = "a" + # Load the summary table + if self.tabletxt is not None: + with open(self.tabletxt, "r") as thefile: + self.print_list = [line.replace("\n", "") for line in thefile] + # Update the total steps + self.steps = len(self.print_list) - 1 + # Make a reference energy + atoms_ref = self.copy_atoms(prev_calculations[0]) + self.e_ref = atoms_ref.get_potential_energy() + # Load the time summary table + if self.timetxt is not None: + with open(self.timetxt, "r") as thefile: + self.print_list_time = [ + line.replace("\n", "") for line in thefile + ] + # Update the total steps + if self.tabletxt is None: + self.steps = len(self.print_list_time) - 1 + return prev_calculations + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + method=self.method, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + acq=self.acq, + is_minimization=self.is_minimization, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + use_fmax_convergence=self.use_fmax_convergence, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + max_unc_restart=self.max_unc_restart, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + def copy(self): + "Copy the object." + # Get all arguments + arg_kwargs, constant_kwargs, object_kwargs = self.get_arguments() + # Make a clone + clone = self.__class__(**arg_kwargs) + # Check if constants have to be saved + if len(constant_kwargs.keys()): + for key, value in constant_kwargs.items(): + clone.__dict__[key] = value + # Check if objects have to be saved + if len(object_kwargs.keys()): + for key, value in object_kwargs.items(): + clone.__dict__[key] = value.copy() + return clone + + def __repr__(self): + arg_kwargs = self.get_arguments()[0] + str_kwargs = ",".join( + [f"{key}={value}" for key, value in arg_kwargs.items()] + ) + return "{}({})".format(self.__class__.__name__, str_kwargs) diff --git a/catlearn/activelearning/adsorption.py b/catlearn/activelearning/adsorption.py new file mode 100644 index 00000000..fb7e3cc9 --- /dev/null +++ b/catlearn/activelearning/adsorption.py @@ -0,0 +1,471 @@ +from ase.parallel import world +from .activelearning import ActiveLearning +from ..optimizer import AdsorptionOptimizer +from ..optimizer import ParallelOptimizer +from ..regression.gp.baseline import BornRepulsionCalculator, MieCalculator + + +class AdsorptionAL(ActiveLearning): + """ + An active learner that is used for accelerating global adsorption search + using simulated annealing with an active learning approach. + The adsorbate is optimized on a surface, where the bond-lengths of the + adsorbate atoms are fixed and the slab atoms are fixed. + """ + + def __init__( + self, + slab, + adsorbate, + ase_calc, + mlcalc=None, + adsorbate2=None, + bounds=None, + opt_kwargs={}, + bond_tol=1e-8, + chains=None, + acq=None, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + use_fmax_convergence=True, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=False, + check_unc=True, + check_energy=True, + check_fmax=False, + n_evaluations_each=1, + min_data=5, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=False, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + Initialize the ActiveLearning instance. + + Parameters: + slab: Atoms instance + The slab structure. + Can either be a surface or a nanoparticle. + adsorbate: Atoms instance + The adsorbate structure. + ase_calc: ASE calculator instance. + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance. + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + Optimize both adsorbates simultaneously. + The two adsorbates will have different tags. + bounds: (6,2) array or (12,2) array (optional) + The bounds for the optimization. + The first 3 rows are the x, y, z scaled coordinates for + the center of the adsorbate. + The next 3 rows are the three rotation angles in radians. + If two adsorbates are optimized, the next 6 rows are for + the second adsorbate. + opt_kwargs: dict + The keyword arguments for the simulated annealing optimizer. + bond_tol: float + The bond tolerance used for the FixBondLengths. + chains: int (optional) + The number of optimization that will be run in parallel. + It is only used if parallel_run=True. + acq: Acquisition class instance. + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + Be aware that restart and low max_unc can result in only the + initial structure passing the maximum uncertainty criterion. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + n_evaluations_each: int + Number of evaluations for each candidate. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Build the optimizer method + method = self.build_method( + slab=slab, + adsorbate=adsorbate, + adsorbate2=adsorbate2, + bounds=bounds, + opt_kwargs=opt_kwargs, + bond_tol=bond_tol, + chains=chains, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Initialize the BayesianOptimizer + super().__init__( + method=method, + ase_calc=ase_calc, + mlcalc=mlcalc, + acq=acq, + is_minimization=True, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=use_fmax_convergence, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + default_mlcalc_kwargs=default_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + prev_calculations=prev_calculations, + restart=restart, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + + def build_method( + self, + slab, + adsorbate, + adsorbate2=None, + bounds=None, + opt_kwargs={}, + bond_tol=1e-8, + chains=None, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + "Build the optimization method." + # Save the instances for creating the adsorption optimizer + self.slab = self.copy_atoms(slab) + self.adsorbate = self.copy_atoms(adsorbate) + if adsorbate2 is not None: + self.adsorbate2 = self.copy_atoms(adsorbate2) + else: + self.adsorbate2 = None + self.bounds = bounds + self.opt_kwargs = opt_kwargs.copy() + self.bond_tol = bond_tol + self.chains = chains + # Build the optimizer method + method = AdsorptionOptimizer( + slab=slab, + adsorbate=adsorbate, + adsorbate2=adsorbate2, + bounds=bounds, + opt_kwargs=opt_kwargs, + bond_tol=bond_tol, + parallel_run=False, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Run the method in parallel if requested + if parallel_run: + method = ParallelOptimizer( + method, + chains=chains, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + return method + + def extra_initial_data(self, **kwargs): + # Get the number of training data + n_data = self.get_training_set_size() + # Check if the training set is empty + if n_data >= 2: + return self + # Get the initial structures from baseline potentials + if n_data == 0: + self.method.set_calculator(BornRepulsionCalculator(r_scale=1.0)) + else: + self.method.set_calculator(MieCalculator(r_scale=1.2, denergy=0.2)) + self.method.run(fmax=0.05, steps=1000) + atoms = self.method.get_candidates()[0] + # Evaluate the structure + self.evaluate(atoms) + # Print summary table + self.print_statement() + # Check if another initial data is needed + if n_data == 0: + self.extra_initial_data(**kwargs) + return self + + def setup_default_mlcalc( + self, + atoms=None, + fp=None, + baseline=BornRepulsionCalculator(), + use_derivatives=True, + calc_forces=False, + kappa=-1.0, + calc_kwargs={}, + **kwargs, + ): + from ..regression.gp.fingerprint import SortedInvDistances + + # Setup the fingerprint + if fp is None: + # Check if the Atoms object is given + if atoms is None: + try: + atoms = self.get_structures( + get_all=False, + allow_calculation=False, + ) + except NameError: + raise NameError("The Atoms object is not given or stored.") + # Can only use distances if there are more than one atom + if len(atoms) > 1: + if atoms.pbc.any(): + periodic_softmax = True + else: + periodic_softmax = False + fp = SortedInvDistances( + reduce_dimensions=True, + use_derivatives=True, + periodic_softmax=periodic_softmax, + wrap=True, + use_tags=True, + ) + # Set a limit for the uncertainty + if "max_unc" not in calc_kwargs.keys(): + calc_kwargs["max_unc"] = 2.0 + return super().setup_default_mlcalc( + fp=fp, + atoms=atoms, + baseline=baseline, + use_derivatives=use_derivatives, + calc_forces=calc_forces, + kappa=kappa, + calc_kwargs=calc_kwargs, + **kwargs, + ) + + def get_constraints(self, structure, **kwargs): + "Get the constraints of the structures in the method." + constraints = [c.copy() for c in structure.constraints] + return constraints + + def get_constraints_indices(self, structure, **kwargs): + "Get the indices of the constraints of the structures in the method." + indices = [i for c in structure.constraints for i in c.get_indices()] + return indices + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + slab=self.slab, + adsorbate=self.adsorbate, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + adsorbate2=self.adsorbate2, + bounds=self.bounds, + opt_kwargs=self.opt_kwargs, + bond_tol=self.bond_tol, + chains=self.chains, + acq=self.acq, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + use_fmax_convergence=self.use_fmax_convergence, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/activelearning/local.py b/catlearn/activelearning/local.py new file mode 100644 index 00000000..2ecf1771 --- /dev/null +++ b/catlearn/activelearning/local.py @@ -0,0 +1,374 @@ +from ase.optimize import FIRE +from ase.parallel import world +from .activelearning import ActiveLearning +from ..optimizer import LocalOptimizer + + +class LocalAL(ActiveLearning): + """ + An active learner that is used for accelerating local optimization + of an atomic structure with an active learning approach. + """ + + def __init__( + self, + atoms, + ase_calc, + mlcalc=None, + local_opt=FIRE, + local_opt_kwargs={}, + acq=None, + is_minimization=True, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + use_fmax_convergence=True, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=True, + check_unc=True, + check_energy=True, + check_fmax=False, + max_unc_restart=0.05, + n_evaluations_each=1, + min_data=3, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=False, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + Initialize the ActiveLearning instance. + + Parameters: + atoms: Atoms instance + The instance to be optimized. + ase_calc: ASE calculator instance. + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance. + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + acq: Acquisition class instance. + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + is_minimization: bool + Whether it is a minimization that is performed. + Alternative is a maximization. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + Number of evaluations for each structure. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Build the optimizer method + method = self.build_method( + atoms=atoms, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + ) + # Initialize the BayesianOptimizer + super().__init__( + method=method, + ase_calc=ase_calc, + mlcalc=mlcalc, + acq=acq, + is_minimization=is_minimization, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=use_fmax_convergence, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + max_unc_restart=max_unc_restart, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + default_mlcalc_kwargs=default_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + prev_calculations=prev_calculations, + restart=restart, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + + def build_method( + self, + atoms, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + **kwargs, + ): + "Build the optimization method." + # Save the instances for creating the local optimizer + self.atoms = self.copy_atoms(atoms) + self.local_opt = local_opt + self.local_opt_kwargs = local_opt_kwargs + # Build the optimizer method + method = LocalOptimizer( + atoms, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + ) + return method + + def extra_initial_data(self, **kwargs): + # Get the number of training data + n_data = self.get_training_set_size() + # Check if the training set is empty + if n_data >= 2: + return self + # Check if the initial structure is calculated + if n_data == 0: + if self.atoms.calc is not None: + results = self.atoms.calc.results + if "energy" in results and "forces" in results: + if self.atoms.calc.atoms is not None: + is_same = self.compare_atoms( + self.atoms, + self.atoms.calc.atoms, + ) + if is_same: + self.use_prev_calculations([self.atoms]) + self.extra_initial_data(**kwargs) + return self + # Get the initial structure + atoms = self.atoms.copy() + # Rattle if the initial structure is calculated + if n_data == 1: + atoms = self.rattle_atoms(atoms, data_perturb=0.02) + # Evaluate the structure + self.evaluate(atoms) + # Print summary table + self.print_statement() + # Check if another initial data is needed + if n_data == 0: + self.extra_initial_data(**kwargs) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + atoms=self.atoms, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + acq=self.acq, + is_minimization=self.is_minimization, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + use_fmax_convergence=self.use_fmax_convergence, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + max_unc_restart=self.max_unc_restart, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/activelearning/mlgo.py b/catlearn/activelearning/mlgo.py new file mode 100644 index 00000000..473cfdb9 --- /dev/null +++ b/catlearn/activelearning/mlgo.py @@ -0,0 +1,605 @@ +from ase.io import read +from ase.parallel import world +from ase.optimize import FIRE +from .adsorption import AdsorptionAL +from ..optimizer import LocalOptimizer + + +class MLGO(AdsorptionAL): + """ + An active learner that is used for accelerating global adsorption search + using simulated annealing and local optimization with an active learning + approach. + The adsorbate is optimized on a surface, where the bond-lengths of the + adsorbate atoms are fixed and the slab atoms are fixed. + Afterwards, the structure is local optimized with the initial constraints + applied to the adsorbate atoms and the surface atoms. + """ + + def __init__( + self, + slab, + adsorbate, + ase_calc, + mlcalc=None, + mlcalc_local=None, + adsorbate2=None, + bounds=None, + opt_kwargs={}, + bond_tol=1e-8, + chains=None, + local_opt=FIRE, + local_opt_kwargs={}, + reuse_data_local=False, + acq=None, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + use_fmax_convergence=True, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=False, + use_restart_local=True, + check_unc=True, + check_energy=True, + check_fmax=False, + max_unc_restart=0.05, + n_evaluations_each=1, + min_data=3, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=False, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + default_mlcalc_local_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + An active learner that is used for accelerating local optimization + of an atomic structure with an active learning approach. + + Parameters: + slab: Atoms instance + The slab structure. + Can either be a surface or a nanoparticle. + adsorbate: Atoms instance + The adsorbate structure. + ase_calc: ASE calculator instance. + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance. + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + mlcalc_local: ML-calculator instance. + The ML-calculator instance used for the local optimization. + The default BOCalculator instance is used + if mlcalc_local is None. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + Optimize both adsorbates simultaneously. + The two adsorbates will have different tags. + bounds: (6,2) array or (12,2) array (optional) + The bounds for the optimization. + The first 3 rows are the x, y, z scaled coordinates for + the center of the adsorbate. + The next 3 rows are the three rotation angles in radians. + If two adsorbates are optimized, the next 6 rows are for + the second adsorbate. + opt_kwargs: dict + The keyword arguments for the simulated annealing optimizer. + bond_tol: float + The bond tolerance used for the FixBondLengths. + chains: int (optional) + The number of optimization that will be run in parallel. + It is only used if parallel_run=True. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + reuse_data_local: bool + Whether to reuse the data from the global optimization in the + ML-calculator for the local optimization. + If reuse_data_local is False, the last two structures + are used to train the local ML-calculator. + acq: Acquisition class instance. + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + is_minimization: bool + Whether it is a minimization that is performed. + Alternative is a maximization. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration in + the global optimization. + Be aware that restart and low max_unc can result in only the + initial structure passing the maximum uncertainty criterion. + use_restart_local: bool + Use the result from last robust iteration in + the local optimization. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + The number of evaluations for each structure. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + default_mlcalc_local_kwargs: dict + The default keyword arguments for the local ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Save bool for reusing data in the mlcalc_local + self.reuse_data_local = reuse_data_local + # Save the local ML-calculator + self.mlcalc_local = mlcalc_local + self.default_mlcalc_local_kwargs = default_mlcalc_local_kwargs + # Initialize the AdsorptionBO + super().__init__( + slab=slab, + adsorbate=adsorbate, + ase_calc=ase_calc, + mlcalc=mlcalc, + adsorbate2=adsorbate2, + bounds=bounds, + opt_kwargs=opt_kwargs, + bond_tol=bond_tol, + chains=chains, + acq=acq, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=use_fmax_convergence, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + max_unc_restart=max_unc_restart, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + default_mlcalc_kwargs=default_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + prev_calculations=None, + restart=False, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + # Get the atomic structure + atoms = self.get_structures(get_all=False, allow_calculation=False) + # Build the local method + self.build_local_method( + atoms=atoms, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + use_restart=use_restart_local, + ) + # Restart the active learning + prev_calculations = self.restart_optimization( + restart, + prev_calculations, + ) + # Use previous calculations to train ML calculator + self.use_prev_calculations(prev_calculations) + + def build_local_method( + self, + atoms, + local_opt=FIRE, + local_opt_kwargs={}, + use_restart_local=True, + **kwargs, + ): + "Build the local optimization method." + # Save the instances for creating the local optimizer + self.atoms = self.copy_atoms(atoms) + self.local_opt = local_opt + self.local_opt_kwargs = local_opt_kwargs + # Set whether to use the restart in the local optimization + self.use_restart_local = use_restart_local + # Build the local optimizer method + self.local_method = LocalOptimizer( + atoms, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=False, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + return self.local_method + + def setup_mlcalc_local( + self, + *args, + **kwargs, + ): + return super(AdsorptionAL, self).setup_mlcalc(*args, **kwargs) + + def run( + self, + fmax=0.05, + steps=200, + ml_steps=4000, + ml_steps_local=1000, + max_unc=0.3, + dtrust=None, + **kwargs, + ): + """ + Run the active learning optimization. + + Parameters: + fmax: float + Convergence criteria (in eV/Angs). + steps: int + Maximum number of evaluations. + ml_steps: int + Maximum number of steps for the optimization method + on the predicted landscape. + ml_steps_local: int + Maximum number of steps for the local optimization method. + max_unc: float (optional) + Maximum uncertainty for continuation of the optimization. + If max_unc is None, then the optimization is performed + without the maximum uncertainty. + dtrust: float (optional) + The trust distance for the optimization method. + seed: int (optional) + The random seed. + + Returns: + converged: bool + Whether the active learning is converged. + """ + # Check if the global optimization is used + if self.is_global: + # Run the active learning + super().run( + fmax=fmax, + steps=steps, + ml_steps=ml_steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Check if the adsorption active learning is converged + if not self.converged(): + return self.converged() + # Get the data from the active learning + data = self.get_data_atoms() + # Switch to the local optimization + self.switch_to_local(data) + # Adjust the number of steps + steps = steps - self.get_number_of_steps() + if steps <= 0: + return self.converged() + # Run the local active learning + super().run( + fmax=fmax, + steps=steps, + ml_steps=ml_steps_local, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + return self.converged() + + def switch_mlcalcs(self, data, **kwargs): + """ + Switch the ML calculator used for the local optimization. + The data is reused, but without the constraints from Adsorption. + """ + # Get the structures + structures = self.get_structures( + get_all=False, + allow_calculation=False, + ) + # Setup the ML-calculator for the local optimization + self.setup_mlcalc_local( + mlcalc=self.mlcalc_local, + save_memory=self.save_memory, + atoms=structures, + reuse_mlcalc_data=False, + verbose=self.verbose, + **self.default_mlcalc_local_kwargs, + ) + # Add the training data to the local ML-calculator + self.use_prev_calculations(data) + return self + + def switch_to_local(self, data, **kwargs): + "Switch to the local optimization." + # Reset convergence + self._converged = False + # Set the global optimization flag + self.is_global = False + # Switch to the local ML-calculator + self.switch_mlcalcs(data) + # Store the last structures + self.structures = self.get_structures( + get_all=False, + allow_calculation=False, + ) + # Use the last structures for the local optimization + self.local_method.update_optimizable(self.structures) + # Switch to the local optimization + self.setup_method(self.local_method) + # Set whether to use the restart + self.use_restart = self.use_restart_local + return self + + def rm_constraints(self, structure, data, **kwargs): + """ + Remove the constraints from the atoms in the database. + This is used for the local optimization. + """ + # Get the constraints from the structures + constraints = self.get_constraints(structure) + # Remove the constraints + for atoms in data: + atoms.set_constraint(constraints) + return data + + def build_method(self, *args, **kwargs): + # Set the global flag to True + self.is_global = True + # Build the method for the global optimization + return super().build_method(*args, **kwargs) + + def use_prev_calculations(self, prev_calculations=None, **kwargs): + if prev_calculations is None: + return self + if isinstance(prev_calculations, str): + prev_calculations = read(prev_calculations, ":") + if isinstance(prev_calculations, list) and len(prev_calculations) == 0: + return self + # Get the constraints indices if necessary + if self.is_global or not self.reuse_data_local: + # Get the constraints of the first calculation + constraints0 = self.get_constraints_indices(prev_calculations[0]) + # Compare the constraints of the previous calculations + bool_constraints = [ + self.get_constraints_indices(atoms) == constraints0 + for atoms in prev_calculations[1:] + ] + # Check if the prev calculations has the same constraints + if self.is_global: + # Check if all constraints are the same + if not all(bool_constraints): + self.message_system( + "The previous calculations have different constraints. " + "Local optimization will be performed." + ) + # Switch to the local optimization + self.switch_to_local(prev_calculations) + return self + else: + # Check whether to truncate the previous calculations + if not self.reuse_data_local: + # Check if the constraints are different + if False in bool_constraints: + index_local = bool_constraints.index(False) + prev_calculations = prev_calculations[index_local:] + else: + # Use only the last two calculations + prev_calculations = prev_calculations[-2:] + # Remove the constraints from the previous calculations + prev_calculations = self.rm_constraints( + self.get_structures(get_all=False, allow_calculation=False), + prev_calculations, + ) + # Add calculations to the ML model + self.add_training(prev_calculations) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + slab=self.slab, + adsorbate=self.adsorbate, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + mlcalc_local=self.mlcalc_local, + adsorbate2=self.adsorbate2, + bounds=self.bounds, + opt_kwargs=self.opt_kwargs, + bond_tol=self.bond_tol, + chains=self.chains, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + reuse_data_local=self.reuse_data_local, + acq=self.acq, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + use_fmax_convergence=self.use_fmax_convergence, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + use_restart_local=self.use_restart_local, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + max_unc_restart=self.max_unc_restart, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + default_mlcalc_local_kwargs=self.default_mlcalc_local_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict(is_global=self.is_global) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/activelearning/mlneb.py b/catlearn/activelearning/mlneb.py new file mode 100644 index 00000000..a913181f --- /dev/null +++ b/catlearn/activelearning/mlneb.py @@ -0,0 +1,541 @@ +from ase.optimize import FIRE +from ase.parallel import world +from ase.io import read +from .activelearning import ActiveLearning +from ..optimizer import LocalCINEB +from ..structures.neb import ImprovedTangentNEB, OriginalNEB + + +class MLNEB(ActiveLearning): + """ + An active learner that is used for accelerating nudged elastic band + (NEB) optimization with an active learning approach. + """ + + def __init__( + self, + start, + end, + ase_calc, + mlcalc=None, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + n_images=15, + climb=True, + neb_interpolation="linear", + neb_interpolation_kwargs={}, + start_without_ci=True, + reuse_ci_path=True, + local_opt=FIRE, + local_opt_kwargs={}, + acq=None, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=True, + check_unc=True, + check_energy=False, + check_fmax=True, + max_unc_restart=0.05, + n_evaluations_each=1, + min_data=3, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=True, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + Initialize the ActiveLearning instance. + + Parameters: + start: Atoms instance or ASE Trajectory file. + The Atoms must have the calculator attached with energy. + Initial end-point of the NEB path. + end: Atoms instance or ASE Trajectory file. + The Atoms must have the calculator attached with energy. + Final end-point of the NEB path. + ase_calc: ASE calculator instance. + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance. + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + neb_method: NEB class object or str + The NEB implemented class object used for the ML-NEB. + A string can be used to select: + - 'improvedtangentneb' (default) + - 'ewneb' + - 'avgewneb' + neb_kwargs: dict + A dictionary with the arguments used in the NEB object + to create the instance. + Climb must not be included. + n_images: int + Number of images of the path (if not included a path before). + The number of images include the 2 end-points of the NEB path. + climb: bool + Whether to use the climbing image in the NEB. + It is strongly recommended to have climb=True. + neb_interpolation: str or list of ASE Atoms or ASE Trajectory file + The interpolation method used to create the NEB path. + The string can be: + - 'linear' (default) + - 'idpp' + - 'rep' + - 'born + - 'ends' + Otherwise, the premade images can be given as a list of + ASE Atoms. + A string of the ASE Trajectory file that contains the images + can also be given. + neb_interpolation_kwargs: dict + The keyword arguments for the interpolation method. + It is only used when the interpolation method is a string. + start_without_ci: bool + Whether to start the NEB without the climbing image. + If True, the NEB path will be optimized without + the climbing image and afterwards climbing image is used + if climb=True as well. + If False, the NEB path will be optimized with the climbing + image if climb=True as well. + reuse_ci_path: bool + Whether to restart from the climbing image path when the NEB + without climbing image is converged. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + acq: Acquisition class instance. + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + Number of evaluations for each iteration. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Save the end points for creating the NEB + self.setup_endpoints(start, end, prev_calculations) + # Build the optimizer method and NEB within + method = self.build_method( + neb_method=neb_method, + neb_kwargs=neb_kwargs, + climb=climb, + n_images=n_images, + neb_interpolation=neb_interpolation, + neb_interpolation_kwargs=neb_interpolation_kwargs, + start_without_ci=start_without_ci, + reuse_ci_path=reuse_ci_path, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + # Initialize the BayesianOptimizer + super().__init__( + method=method, + ase_calc=ase_calc, + mlcalc=mlcalc, + acq=acq, + is_minimization=False, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=climb, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + max_unc_restart=max_unc_restart, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + default_mlcalc_kwargs=default_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + prev_calculations=self.prev_calculations, + restart=restart, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + + def setup_endpoints( + self, + start, + end, + prev_calculations, + tol=1e-8, + **kwargs, + ): + """ + Setup the start and end points for the NEB calculation. + """ + # Load the start and end points from trajectory files + if isinstance(start, str): + start = read(start) + if isinstance(end, str): + end = read(end) + # Save the start point with calculators + try: + start.get_forces() + except RuntimeError: + raise RuntimeError( + "The start point must have a calculator attached with " + "energy and forces!" + ) + self.start = self.copy_atoms(start) + # Save the end point with calculators + try: + end.get_forces() + except RuntimeError: + raise RuntimeError( + "The end point must have a calculator attached with " + "energy and forces!" + ) + self.end = self.copy_atoms(end) + # Save in previous calculations + self.prev_calculations = [self.start, self.end] + if prev_calculations is not None: + if isinstance(prev_calculations, str): + prev_calculations = read(prev_calculations, ":") + # Check if end points are in the previous calculations + if len(prev_calculations): + is_same = self.compare_atoms( + self.start, + prev_calculations[0], + tol=tol, + ) + if is_same: + prev_calculations = prev_calculations[1:] + if len(prev_calculations): + is_same = self.compare_atoms( + self.end, + prev_calculations[0], + tol=tol, + ) + if is_same: + prev_calculations = prev_calculations[1:] + # Save the previous calculations + self.prev_calculations += list(prev_calculations) + return self + + def build_method( + self, + neb_method, + neb_kwargs={}, + climb=True, + n_images=15, + k=3.0, + remove_rotation_and_translation=False, + mic=True, + neb_interpolation="linear", + neb_interpolation_kwargs={}, + start_without_ci=True, + reuse_ci_path=True, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + "Build the optimization method." + # Save the instances for creating the local optimizer + self.local_opt = local_opt + self.local_opt_kwargs = local_opt_kwargs + # Save the instances for creating the NEB + self.neb_method = neb_method + self.neb_kwargs = dict( + k=k, + remove_rotation_and_translation=remove_rotation_and_translation, + parallel=parallel_run, + ) + if isinstance(neb_method, str) or issubclass(neb_method, OriginalNEB): + self.neb_kwargs.update( + dict( + use_image_permutation=False, + save_properties=True, + mic=mic, + comm=comm, + ) + ) + else: + self.neb_kwargs.update(dict(world=comm)) + self.neb_kwargs.update(neb_kwargs) + self.n_images = n_images + self.neb_interpolation = neb_interpolation + self.neb_interpolation_kwargs = dict( + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + ) + self.neb_interpolation_kwargs.update(neb_interpolation_kwargs) + self.start_without_ci = start_without_ci + self.climb = climb + self.reuse_ci_path = reuse_ci_path + # Build the sequential neb optimizer + method = LocalCINEB( + start=self.start, + end=self.end, + neb_method=self.neb_method, + neb_kwargs=self.neb_kwargs, + n_images=self.n_images, + climb=self.climb, + neb_interpolation=self.neb_interpolation, + neb_interpolation_kwargs=self.neb_interpolation_kwargs, + start_without_ci=self.start_without_ci, + reuse_ci_path=self.reuse_ci_path, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + return method + + def extra_initial_data(self, **kwargs): + # Check if the training set is empty + if self.get_training_set_size() >= 3: + return self + # Get the images + images = self.get_structures(get_all=True, allow_calculation=False) + # Calculate energies of end points + e_start = self.start.get_potential_energy() + e_end = self.end.get_potential_energy() + # Get the image with the potential highest energy + if e_start >= e_end: + i_middle = int((len(images) - 2) / 3.0) + else: + i_middle = int(2.0 * (len(images) - 2) / 3.0) + candidate = images[1 + i_middle].copy() + # Evaluate the structure + self.evaluate(candidate) + # Print summary table + self.print_statement() + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + start=self.start, + end=self.end, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + neb_method=self.neb_method, + neb_kwargs=self.neb_kwargs, + n_images=self.n_images, + climb=self.climb, + neb_interpolation=self.neb_interpolation, + neb_interpolation_kwargs=self.neb_interpolation_kwargs, + start_without_ci=self.start_without_ci, + reuse_ci_path=self.reuse_ci_path, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + acq=self.acq, + is_minimization=self.is_minimization, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + max_unc_restart=self.max_unc_restart, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/activelearning/randomadsorption.py b/catlearn/activelearning/randomadsorption.py new file mode 100644 index 00000000..cac02c51 --- /dev/null +++ b/catlearn/activelearning/randomadsorption.py @@ -0,0 +1,509 @@ +from ase.parallel import world +from ase.optimize import FIRE +from .activelearning import ActiveLearning +from ..optimizer import RandomAdsorptionOptimizer +from ..optimizer import ParallelOptimizer +from ..regression.gp.baseline import BornRepulsionCalculator, MieCalculator + + +class RandomAdsorptionAL(ActiveLearning): + """ + An active learner that is used for accelerating global adsorption search + using random sampling and local optimization with an active learning + approach. + The adsorbate is random sampled in space and the most stable structure is + local optimized. + """ + + def __init__( + self, + slab, + adsorbate, + ase_calc, + mlcalc=None, + adsorbate2=None, + bounds=None, + n_random_draws=200, + use_initial_opt=False, + initial_fmax=0.2, + initial_steps=50, + use_repulsive_check=True, + repulsive_tol=0.1, + repulsive_calculator=BornRepulsionCalculator(), + local_opt=FIRE, + local_opt_kwargs={}, + chains=None, + acq=None, + save_memory=False, + parallel_run=False, + copy_calc=False, + verbose=True, + apply_constraint=True, + force_consistent=False, + scale_fmax=0.8, + use_fmax_convergence=True, + unc_convergence=0.02, + use_method_unc_conv=True, + use_restart=True, + check_unc=True, + check_energy=True, + check_fmax=True, + max_unc_restart=0.05, + n_evaluations_each=1, + min_data=5, + use_database_check=True, + data_perturb=0.001, + data_tol=1e-8, + save_properties_traj=True, + to_save_mlcalc=False, + save_mlcalc_kwargs={}, + default_mlcalc_kwargs={}, + trajectory="predicted.traj", + trainingset="evaluated.traj", + pred_evaluated="predicted_evaluated.traj", + converged_trajectory="converged.traj", + initial_traj="initial_struc.traj", + tabletxt="ml_summary.txt", + timetxt="ml_time.txt", + prev_calculations=None, + restart=False, + seed=1, + dtype=float, + comm=world, + **kwargs, + ): + """ + Initialize the ActiveLearning instance. + + Parameters: + slab: Atoms instance + The slab structure. + Can either be a surface or a nanoparticle. + adsorbate: Atoms instance + The adsorbate structure. + ase_calc: ASE calculator instance. + ASE calculator as implemented in ASE. + mlcalc: ML-calculator instance. + The ML-calculator instance used as surrogate surface. + The default BOCalculator instance is used if mlcalc is None. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + Optimize both adsorbates simultaneously. + The two adsorbates will have different tags. + bounds: (6,2) array or (12,2) array (optional) + The bounds for the optimization. + The first 3 rows are the x, y, z scaled coordinates for + the center of the adsorbate. + The next 3 rows are the three rotation angles in radians. + If two adsorbates are optimized, the next 6 rows are for + the second adsorbate. + n_random_draws: int + The number of random structures to be drawn. + If chains is not None, then the number of random + structures is n_random_draws * chains. + use_initial_opt: bool + If True, the initial structures, drawn from the random + sampling, will be local optimized before the structure + with lowest energy are local optimized. + initial_fmax: float + The maximum force for the initial local optimizations. + initial_steps: int + The maximum number of steps for the initial local + optimizations. + use_repulsive_check: bool + If True, a energy will be calculated for each randomly + drawn structure to check if the energy is not too large. + repulsive_tol: float + The tolerance for the repulsive energy check. + repulsive_calculator: ASE calculator instance + The calculator used for the repulsive energy check. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + chains: int (optional) + The number of optimization that will be run in parallel. + It is only used if parallel_run=True. + acq: Acquisition class instance. + The Acquisition instance used for calculating the + acq. function and choose a candidate to calculate next. + The default AcqUME instance is used if acq is None. + save_memory: bool + Whether to only train the ML calculator and store all objects + on one CPU. + If save_memory==True then parallel optimization of + the hyperparameters can not be achived. + If save_memory==False no MPI object is used. + parallel_run: bool + Whether to run method in parallel on multiple CPUs (True) or + in sequence on 1 CPU (False). + copy_calc: bool + Whether to copy the calculator for each candidate + in the method. + verbose: bool + Whether to print on screen the full output (True) or + not (False). + apply_constraint: bool + Whether to apply the constrains of the ASE Atoms instance + to the calculated forces. + By default (apply_constraint=True) forces are 0 for + constrained atoms and directions. + force_consistent: bool or None. + Use force-consistent energy calls (as opposed to the energy + extrapolated to 0 K). + By default force_consistent=False. + scale_fmax: float + The scaling of the fmax convergence criterion. + It makes the structure(s) converge tighter on surrogate + surface. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + use_fmax_convergence: bool + Whether to use the maximum force as an convergence criterion. + unc_convergence: float + Maximum uncertainty for convergence in + the active learning (in eV). + use_method_unc_conv: bool + Whether to use the unc_convergence as a convergence criterion + in the optimization method. + use_restart: bool + Use the result from last robust iteration. + Be aware that restart and low max_unc can result in only the + initial structure passing the maximum uncertainty criterion. + check_unc: bool + Check if the uncertainty is large for the restarted result and + if it is then use the previous initial. + check_energy: bool + Check if the energy is larger for the restarted result than + the previous. + check_fmax: bool + Check if the maximum force is larger for the restarted result + than the initial interpolation and if so then replace it. + max_unc_restart: float (optional) + Maximum uncertainty (in eV) for using the structure(s) as + the restart in the optimization method. + If max_unc_restart is None, then the optimization is performed + without the maximum uncertainty. + n_evaluations_each: int + Number of evaluations for each candidate. + min_data: int + The minimum number of data points in the training set before + the active learning can converge. + use_database_check: bool + Whether to check if the new structure is within the database. + If it is in the database, the structure is rattled. + Please be aware that the predicted structure will differ from + the structure in the database if the rattling is applied. + If use_database_check is True and the structure is in the + database, then the scale_fmax is multiplied by the original + scale_fmax to give tighter convergence. + data_perturb: float + The perturbation of the data structure if it is in the database + and use_database_check is True. + data_perturb is the standard deviation of the normal + distribution used to rattle the structure. + data_tol: float + The tolerance for the data structure if it is in the database + and use_database_check is True. + save_properties_traj: bool + Whether to save the calculated properties to the trajectory. + to_save_mlcalc: bool + Whether to save the ML calculator to a file after training. + save_mlcalc_kwargs: dict + Arguments for saving the ML calculator, like the filename. + default_mlcalc_kwargs: dict + The default keyword arguments for the ML calculator. + trajectory: str or TrajectoryWriter instance + Trajectory filename to store the predicted data. + Or the TrajectoryWriter instance to store the predicted data. + trainingset: str or TrajectoryWriter instance + Trajectory filename to store the evaluated training data. + Or the TrajectoryWriter instance to store the evaluated + training data. + pred_evaluated: str or TrajectoryWriter instance (optional) + Trajectory filename to store the evaluated training data + with predicted properties. + Or the TrajectoryWriter instance to store the evaluated + training data with predicted properties. + If pred_evaluated is None, then the predicted data is + not saved. + converged_trajectory: str or TrajectoryWriter instance + Trajectory filename to store the converged structure(s). + Or the TrajectoryWriter instance to store the converged + structure(s). + initial_traj: str or TrajectoryWriter instance + Trajectory filename to store the initial structure(s). + Or the TrajectoryWriter instance to store the initial + structure(s). + tabletxt: str + Name of the .txt file where the summary table is printed. + It is not saved to the file if tabletxt=None. + timetxt: str (optional) + Name of the .txt file where the time table is printed. + It is not saved to the file if timetxt=None. + prev_calculations: Atoms list or ASE Trajectory file. + The user can feed previously calculated data + for the same hypersurface. + The previous calculations must be fed as an Atoms list + or Trajectory filename. + restart: bool + Whether to restart the active learning. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + comm: MPI communicator. + The MPI communicator. + """ + # Build the optimizer method + method = self.build_method( + slab=slab, + adsorbate=adsorbate, + adsorbate2=adsorbate2, + bounds=bounds, + n_random_draws=n_random_draws, + use_initial_struc=use_restart, + use_initial_opt=use_initial_opt, + initial_fmax=initial_fmax, + initial_steps=initial_steps, + use_repulsive_check=use_repulsive_check, + repulsive_tol=repulsive_tol, + repulsive_calculator=repulsive_calculator, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + chains=chains, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Initialize the BayesianOptimizer + super().__init__( + method=method, + ase_calc=ase_calc, + mlcalc=mlcalc, + acq=acq, + is_minimization=True, + save_memory=save_memory, + parallel_run=parallel_run, + copy_calc=copy_calc, + verbose=verbose, + apply_constraint=apply_constraint, + force_consistent=force_consistent, + scale_fmax=scale_fmax, + use_fmax_convergence=use_fmax_convergence, + unc_convergence=unc_convergence, + use_method_unc_conv=use_method_unc_conv, + use_restart=use_restart, + check_unc=check_unc, + check_energy=check_energy, + check_fmax=check_fmax, + max_unc_restart=max_unc_restart, + n_evaluations_each=n_evaluations_each, + min_data=min_data, + use_database_check=use_database_check, + data_perturb=data_perturb, + data_tol=data_tol, + save_properties_traj=save_properties_traj, + to_save_mlcalc=to_save_mlcalc, + save_mlcalc_kwargs=save_mlcalc_kwargs, + default_mlcalc_kwargs=default_mlcalc_kwargs, + trajectory=trajectory, + trainingset=trainingset, + pred_evaluated=pred_evaluated, + converged_trajectory=converged_trajectory, + initial_traj=initial_traj, + tabletxt=tabletxt, + timetxt=timetxt, + prev_calculations=prev_calculations, + restart=restart, + seed=seed, + dtype=dtype, + comm=comm, + **kwargs, + ) + + def build_method( + self, + slab, + adsorbate, + adsorbate2=None, + bounds=None, + n_random_draws=20, + use_initial_struc=True, + use_initial_opt=False, + initial_fmax=0.2, + initial_steps=50, + use_repulsive_check=True, + repulsive_tol=0.1, + repulsive_calculator=BornRepulsionCalculator(), + local_opt=FIRE, + local_opt_kwargs={}, + chains=None, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + "Build the optimization method." + # Save the instances for creating the adsorption optimizer + self.slab = self.copy_atoms(slab) + self.adsorbate = self.copy_atoms(adsorbate) + if adsorbate2 is not None: + self.adsorbate2 = self.copy_atoms(adsorbate2) + else: + self.adsorbate2 = None + self.bounds = bounds + self.n_random_draws = n_random_draws + self.use_initial_struc = use_initial_struc + self.use_initial_opt = use_initial_opt + self.initial_fmax = initial_fmax + self.initial_steps = initial_steps + self.use_repulsive_check = use_repulsive_check + self.repulsive_tol = repulsive_tol + self.repulsive_calculator = repulsive_calculator + self.local_opt = local_opt + self.local_opt_kwargs = local_opt_kwargs + self.chains = chains + # Build the optimizer method + method = RandomAdsorptionOptimizer( + slab=slab, + adsorbate=adsorbate, + adsorbate2=adsorbate2, + bounds=bounds, + n_random_draws=n_random_draws, + use_initial_struc=use_initial_struc, + use_initial_opt=use_initial_opt, + initial_fmax=initial_fmax, + initial_steps=initial_steps, + use_repulsive_check=use_repulsive_check, + repulsive_tol=repulsive_tol, + repulsive_calculator=repulsive_calculator, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=False, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Run the method in parallel if requested + if parallel_run: + method = ParallelOptimizer( + method, + chains=chains, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + return method + + def extra_initial_data(self, **kwargs): + # Get the number of training data + n_data = self.get_training_set_size() + # Check if the training set is empty + if n_data >= 2: + return self + # Get the initial structures from baseline potentials + method_extra = self.method.copy() + method_extra.update_arguments( + n_random_draws=20, + use_initial_opt=False, + use_repulsive_check=True, + ) + if n_data == 0: + method_extra.set_calculator(BornRepulsionCalculator(r_scale=1.0)) + else: + method_extra.set_calculator( + MieCalculator(r_scale=1.2, denergy=0.2) + ) + method_extra.run(fmax=0.1, steps=21) + atoms = method_extra.get_candidates()[0] + # Evaluate the structure + self.evaluate(atoms) + # Print summary table + self.print_statement() + # Check if another initial data is needed + if n_data == 0: + self.extra_initial_data(**kwargs) + return self + + def setup_default_mlcalc( + self, + kappa=-1.0, + calc_kwargs={}, + **kwargs, + ): + # Set a limit for the uncertainty + if "max_unc" not in calc_kwargs.keys(): + calc_kwargs["max_unc"] = 2.0 + return super().setup_default_mlcalc( + kappa=kappa, + calc_kwargs=calc_kwargs, + **kwargs, + ) + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + slab=self.slab, + adsorbate=self.adsorbate, + ase_calc=self.ase_calc, + mlcalc=self.mlcalc, + adsorbate2=self.adsorbate2, + bounds=self.bounds, + n_random_draws=self.n_random_draws, + use_initial_opt=self.use_initial_opt, + initial_fmax=self.initial_fmax, + initial_steps=self.initial_steps, + use_repulsive_check=self.use_repulsive_check, + repulsive_tol=self.repulsive_tol, + repulsive_calculator=self.repulsive_calculator, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + chains=self.chains, + acq=self.acq, + save_memory=self.save_memory, + parallel_run=self.parallel_run, + copy_calc=self.copy_calc, + verbose=self.verbose, + apply_constraint=self.apply_constraint, + force_consistent=self.force_consistent, + scale_fmax=self.scale_fmax_org, + use_fmax_convergence=self.use_fmax_convergence, + unc_convergence=self.unc_convergence, + use_method_unc_conv=self.use_method_unc_conv, + use_restart=self.use_restart, + check_unc=self.check_unc, + check_energy=self.check_energy, + check_fmax=self.check_fmax, + max_unc_restart=self.max_unc_restart, + n_evaluations_each=self.n_evaluations_each, + min_data=self.min_data, + use_database_check=self.use_database_check, + data_perturb=self.data_perturb, + data_tol=self.data_tol, + save_properties_traj=self.save_properties_traj, + to_save_mlcalc=self.to_save_mlcalc, + save_mlcalc_kwargs=self.save_mlcalc_kwargs, + trajectory=self.trajectory, + trainingset=self.trainingset, + pred_evaluated=self.pred_evaluated, + converged_trajectory=self.converged_trajectory, + initial_traj=self.initial_traj, + tabletxt=self.tabletxt, + timetxt=self.timetxt, + seed=self.seed, + dtype=self.dtype, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimize/acquisition.py b/catlearn/optimize/acquisition.py deleted file mode 100644 index a06f729c..00000000 --- a/catlearn/optimize/acquisition.py +++ /dev/null @@ -1,409 +0,0 @@ -import numpy as np -from scipy.stats import norm - - -class Acquisition: - def __init__(self, objective="min", **kwargs): - """ - Acquisition function class. - - Parameters: - objective : string - How to sort a list of acquisition functions - Available: - - 'min': Sort after the smallest values. - - 'max': Sort after the largest values. - - 'random' : Sort randomly - """ - self.update_arguments(objective=objective, **kwargs) - - def calculate(self, energy, uncertainty=None, **kwargs): - "Calculate the acqusition function value." - raise NotImplementedError() - - def choose(self, candidates): - "Sort a list of acquisition function values." - if self.objective == "min": - return np.argsort(candidates) - elif self.objective == "max": - return np.argsort(candidates)[::-1] - return np.random.permutation(list(range(len(candidates)))) - - def objective_value(self, value): - "Return the objective value." - if self.objective == "min": - return -value - return value - - def update_arguments(self, objective=None, **kwargs): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict(objective=self.objective) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - def copy(self): - "Copy the object." - # Get all arguments - arg_kwargs, constant_kwargs, object_kwargs = self.get_arguments() - # Make a clone - clone = self.__class__(**arg_kwargs) - # Check if constants have to be saved - if len(constant_kwargs.keys()): - for key, value in constant_kwargs.items(): - clone.__dict__[key] = value - # Check if objects have to be saved - if len(object_kwargs.keys()): - for key, value in object_kwargs.items(): - clone.__dict__[key] = value.copy() - return clone - - def __repr__(self): - arg_kwargs = self.get_arguments()[0] - str_kwargs = ",".join( - [f"{key}={value}" for key, value in arg_kwargs.items()] - ) - return "{}({})".format(self.__class__.__name__, str_kwargs) - - -class AcqEnergy(Acquisition): - def __init__(self, objective="min", **kwargs): - "The predicted energy as the acqusition function." - super().__init__(objective) - - def calculate(self, energy, uncertainty=None, **kwargs): - "Calculate the acqusition function value as the predicted energy." - return energy - - -class AcqUncertainty(Acquisition): - def __init__(self, objective="min", **kwargs): - "The predicted uncertainty as the acqusition function." - super().__init__(objective) - - def calculate(self, energy, uncertainty=None, **kwargs): - "Calculate the acqusition function value as the predicted uncertainty." - return uncertainty - - -class AcqUCB(Acquisition): - def __init__(self, objective="max", kappa=2.0, kappamax=3.0, **kwargs): - """ - The predicted upper confidence interval (ucb) as - the acqusition function. - """ - self.update_arguments( - objective=objective, - kappa=kappa, - kappamax=kappamax, - **kwargs, - ) - - def calculate(self, energy, uncertainty=None, **kwargs): - "Calculate the acqusition function value as the predicted ucb." - kappa = self.get_kappa() - return energy + kappa * uncertainty - - def get_kappa(self): - "Get the kappa value." - if isinstance(self.kappa, str): - return np.random.uniform(0, self.kappamax) - return self.kappa - - def update_arguments( - self, - objective=None, - kappa=None, - kappamax=None, - **kwargs, - ): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - if kappa is not None: - if isinstance(kappa, (float, int)): - kappa = abs(kappa) - self.kappa = kappa - if kappamax is not None: - self.kappamax = abs(kappamax) - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - objective=self.objective, - kappa=self.kappa, - kappamax=self.kappamax, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - -class AcqLCB(AcqUCB): - def __init__(self, objective="min", kappa=2.0, kappamax=3.0, **kwargs): - """ - The predicted lower confidence interval (lcb) as - the acqusition function. - """ - super().__init__( - objective=objective, - kappa=kappa, - kappamax=kappamax, - **kwargs, - ) - - def calculate(self, energy, uncertainty=None, **kwargs): - "Calculate the acqusition function value as the predicted ucb." - kappa = self.get_kappa() - return energy - kappa * uncertainty - - -class AcqIter(Acquisition): - def __init__(self, objective="max", niter=2, **kwargs): - """ - The predicted energy or uncertainty dependent on - the iteration as the acqusition function. - """ - self.update_arguments(objective=objective, niter=niter, **kwargs) - self.iter = 0 - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as - the predicted energy or uncertainty. - """ - self.iter += 1 - if (self.iter) % self.niter == 0: - return energy - return uncertainty - - def update_arguments(self, objective=None, niter=None, **kwargs): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - if niter is not None: - self.niter = abs(niter) - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - objective=self.objective, - niter=self.niter, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - -class AcqUME(Acquisition): - def __init__(self, objective="max", unc_convergence=0.05, **kwargs): - """ - The predicted uncertainty when it is larger than unc_convergence - else predicted energy as the acqusition function. - """ - self.update_arguments( - objective=objective, - unc_convergence=unc_convergence, - **kwargs, - ) - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as the predicted uncertainty - when it is is larger than unc_convergence else predicted energy. - """ - if np.max([uncertainty]) < self.unc_convergence: - return energy - return self.objective_value(uncertainty) - - def update_arguments(self, objective=None, unc_convergence=None, **kwargs): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - if unc_convergence is not None: - self.unc_convergence = abs(unc_convergence) - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - objective=self.objective, - unc_convergence=self.unc_convergence, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - -class AcqUUCB(AcqUCB): - def __init__( - self, - objective="max", - kappa=2.0, - kappamax=3.0, - unc_convergence=0.05, - **kwargs, - ): - """ - The predicted uncertainty when it is larger than unc_convergence - else upper confidence interval (ucb) as the acqusition function. - """ - self.update_arguments( - objective=objective, - kappa=kappa, - kappamax=kappamax, - unc_convergence=unc_convergence, - **kwargs, - ) - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as the predicted uncertainty - when it is is larger than unc_convergence else ucb. - """ - if np.max([uncertainty]) < self.unc_convergence: - kappa = self.get_kappa() - return energy + kappa * uncertainty - return uncertainty - - def update_arguments( - self, - objective=None, - kappa=None, - kappamax=None, - unc_convergence=None, - **kwargs, - ): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - if kappa is not None: - if isinstance(kappa, (float, int)): - kappa = abs(kappa) - self.kappa = kappa - if kappamax is not None: - self.kappamax = abs(kappamax) - if unc_convergence is not None: - self.unc_convergence = abs(unc_convergence) - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - objective=self.objective, - kappa=self.kappa, - kappamax=self.kappamax, - unc_convergence=self.unc_convergence, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - -class AcqULCB(AcqUUCB): - def __init__( - self, - objective="min", - kappa=2.0, - kappamax=3.0, - unc_convergence=0.05, - **kwargs, - ): - """ - The predicted uncertainty when it is larger than unc_convergence - else lower confidence interval (lcb) as the acqusition function. - """ - self.update_arguments( - objective=objective, - kappa=kappa, - kappamax=kappamax, - unc_convergence=unc_convergence, - **kwargs, - ) - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as the predicted uncertainty - when it is is larger than unc_convergence else lcb. - """ - if np.max([uncertainty]) < self.unc_convergence: - kappa = self.get_kappa() - return energy - kappa * uncertainty - return -uncertainty - - -class AcqEI(Acquisition): - def __init__(self, objective="max", ebest=None, **kwargs): - """ - The predicted expected improvement as the acqusition function. - """ - self.update_arguments(objective=objective, ebest=ebest, **kwargs) - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as - the predicted expected improvement. - """ - z = (energy - self.ebest) / uncertainty - a = (energy - self.ebest) * norm.cdf(z) + uncertainty * norm.pdf(z) - return self.objective_value(a) - - def update_arguments(self, objective=None, ebest=None, **kwargs): - "Set the parameters of the Acquisition function class." - if objective is not None: - self.objective = objective.lower() - if ebest is not None: - self.ebest = ebest - return self - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - objective=self.objective, - ebest=self.ebest, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs - - -class AcqPI(AcqEI): - def __init__(self, objective="max", ebest=None, **kwargs): - """ - The predicted probability of improvement as the acqusition function. - """ - self.update_arguments(objective=objective, ebest=ebest, **kwargs) - - def calculate(self, energy, uncertainty=None, **kwargs): - """ - Calculate the acqusition function value as - the predicted expected improvement. - """ - z = (energy - self.ebest) / uncertainty - return self.objective_value(norm.cdf(z)) diff --git a/catlearn/optimize/mlgo.py b/catlearn/optimize/mlgo.py deleted file mode 100644 index 7ee79c61..00000000 --- a/catlearn/optimize/mlgo.py +++ /dev/null @@ -1,991 +0,0 @@ -import numpy as np -import ase -from ase.io import read -from scipy.optimize import dual_annealing -import datetime -from ase.parallel import world, broadcast -from ..regression.gp.calculator.copy_atoms import copy_atoms -from ..regression.gp.baseline.repulsive import RepulsionCalculator - - -class MLGO: - def __init__( - self, - slab, - ads, - ase_calc, - ads2=None, - mlcalc=None, - acq=None, - prev_calculations=None, - use_database_check=True, - apply_constraint=True, - force_consistent=None, - scale_fmax=0.8, - save_memory=False, - local_opt=None, - local_opt_kwargs={}, - opt_kwargs={}, - bounds=None, - initial_points=2, - norelax_points=10, - min_steps=8, - trajectory="evaluated.traj", - tabletxt="mlgo_summary.txt", - full_output=False, - **kwargs, - ): - """ - Machine learning accelerated global adsorption optimization - with active learning. - - Parameters: - slab : ASE Atoms object. - The object of the surface or nanoparticle that - the adsorbate is adsorped to. - The energy and forces for the structure is not needed. - ads : ASE Atoms object. - The object of the adsorbate in vacuum with same cell size and - pbc as for the slab. - The energy and forces for the structure is not needed. - ase_calc : ASE calculator instance. - ASE calculator as implemented in ASE. - See: - https://wiki.fysik.dtu.dk/ase/ase/calculators/calculators.html - ads2 : ASE Atoms object (optional). - The object of a second adsorbate in vacuum that - is adsorbed simultaneously with the other adsorbate. - mlcalc : ML-calculator instance. - The ML-calculator instance used as surrogate surface. - A default ML-model is used if mlcalc is None. - acq : Acquisition instance. - The Acquisition instance used for calculating - the acq. function and choose a candidate to calculate next. - A default Acquisition instance is used if acq is None. - prev_calculations : Atoms list or ASE Trajectory file. - (optional) The user can feed previously calculated data for the - same hypersurface. The previous calculations must be fed as an - Atoms list or Trajectory file. - use_database_check : bool - Whether to check if the new structure is within the database. - If it is in the database, the structure is rattled. - apply_constraint : boolean - Whether to apply the constrains of the ASE Atoms instance - to the calculated forces. - By default (apply_constraint=True) forces are 0 for - constrained atoms and directions. - force_consistent : boolean or None. - Use force-consistent energy calls (as opposed to the energy - extrapolated to 0 K). By default (force_consistent=None) uses - force-consistent energies if available in the calculator, but - falls back to force_consistent=False if not. - scale_fmax : float - The scaling of the fmax for the ML-NEB runs. - It makes the path converge tighter on surrogate surface. - save_memory : bool - Whether to only train the ML calculator and store - all objects on one CPU. - If save_memory==True then parallel optimization of - the hyperparameters can not be achived. - If save_memory==False no MPI object is used. - local_opt : ASE local optimizer Object. - A local optimizer object from ASE. - If None is given then FIRE is used. - local_opt_kwargs : dict. - Arguments used for the ASE local optimizer. - bounds : (6,2) or (12,2) ndarray (optional). - The boundary conditions used for the global optimization in - form of the simulated annealing. - The boundary conditions are the x, y, and z coordinates of - the center of the adsorbate and 3 rotations. - Same boundary conditions can be set for the second adsorbate - if chosen. - initial_points : int. - Number of generated initial structures used for training - the ML calculator if no previous data is given. - norelax_points : int. - The number of structures used for training before - local relaxation of the structures after - the global optimization is activated. - min_steps : int. - The minimum number of iterations before convergence is checked. - opt_kwargs : dict. - Arguments used for the simulated annealing method. - trajectory : string. - Trajectory filename to store the evaluated training data. - tabletxt : string - Name of the .txt file where the summary table is printed. - It is not saved to the file if tabletxt=None. - full_output : bool. - Whether to print on screen the full output (True). - """ - # Setup parallelization - self.parallel_setup(save_memory) - # Setup given parameters - self.setup_slab_ads(slab, ads, ads2) - self.opt_kwargs = opt_kwargs - self.norelax_points = norelax_points - self.min_steps = min_steps - self.use_database_check = use_database_check - self.initial_points = initial_points - self.full_output = full_output - # Set candidate instance with ASE calculator - self.candidate = self.slab_ads.copy() - self.candidate.calc = ase_calc - self.apply_constraint = apply_constraint - self.force_consistent = force_consistent - # Set initial parameters - self.step = 0 - self.error = 0 - self.energies = [] - self.emin = np.inf - self.best_candidate = None - # Boundary conditions for adsorbate position and angles - if bounds is None: - self.bounds = np.array( - [ - [0.0, 1.0], - [0.0, 1.0], - [0.0, 1.0], - [0.0, 2 * np.pi], - [0.0, 2 * np.pi], - [0.0, 2 * np.pi], - ] - ) - else: - self.bounds = bounds.copy() - if len(self.bounds) == 6 and self.ads2 is not None: - self.bounds = np.concatenate([self.bounds, self.bounds], axis=0) - # Make trajectory file for calculated structures - self.trajectory = trajectory - # Summary table file name - self.tabletxt = tabletxt - # Setup the ML calculator - self.set_mlcalc(mlcalc, save_memory=save_memory) - self.set_verbose(verbose=full_output) - # Select an acquisition function - self.set_acq(acq) - # Scale the fmax on the surrogate surface - self.scale_fmax = scale_fmax - # Use restart structures or make one initial point - self.use_prev_calculations(prev_calculations) - # Set local optimizer - self.set_local_opt( - local_opt=local_opt, - local_opt_kwargs=local_opt_kwargs, - ) - - def run( - self, - fmax=0.05, - unc_convergence=0.025, - steps=200, - max_unc=0.25, - ml_steps=2000, - ml_chains=3, - relax=True, - local_steps=500, - seed=0, - **kwargs, - ): - """ - Run the ML adsorption optimizer - - Parameters: - fmax : float - Convergence criteria (in eV/Angs). - unc_convergence: float - Maximum uncertainty for convergence (in eV). - steps : int - Maximum number of evaluations. - max_unc : float - Early stopping criteria. - Maximum uncertainty allowed before local optimization. - ml_steps : int - Maximum number of steps for the global optimization - on the predicted landscape. - ml_chains : int - The number of parallel chains in the simulated annealing. - relax : bool - Whether to perform local optimization after - the global optimization. - local_steps : int - Maximum number of steps for the local optimization - on the predicted landscape. - seed : int (optional) - The random seed. - """ - # Set the random seed - np.random.seed(seed) - # Update the acquisition function - self.acq.update_arguments(unc_convergence=unc_convergence) - # Calculate initial data if enough data is not given - self.extra_initial_data(self.initial_points) - # Run global search - for step in range(1, steps + 1): - # Train ML-Model - self.train_mlmodel() - # Search after and find the next candidate for calculation - candidate = self.find_next_candidate( - ml_chains, - ml_steps, - max_unc, - relax, - fmax * self.scale_fmax, - local_steps, - ) - # Evaluate candidate - self.evaluate(candidate) - # Make print of table - self.print_statement(step) - # Check for convergence - self.converging = self.check_convergence(unc_convergence, fmax) - if self.converging: - break - if self.converging is False: - self.message_system("MLGO did not converge!") - return self.best_candidate - - def get_atoms(self): - "Return the best candidate structure." - return self.best_candidate - - def setup_slab_ads(self, slab, ads, ads2=None): - "Setup slab and adsorbate with their constrains" - # Setup slab - self.slab = slab.copy() - self.slab.set_tags(0) - # Setup adsorbate - self.ads = ads.copy() - self.ads.set_tags(1) - # Center adsorbate structure - pos = self.ads.get_positions() - self.ads.set_positions(pos - np.mean(pos, axis=0)) - self.ads.cell = self.slab.cell.copy() - self.ads.pbc = self.slab.pbc.copy() - # Setup second adsorbate - if ads2: - self.ads2 = ads2.copy() - self.ads2.set_tags(2) - # Center adsorbate structure - pos = self.ads2.get_positions() - self.ads2.set_positions(pos - np.mean(pos, axis=0)) - self.ads2.cell = self.slab.cell.copy() - self.ads2.pbc = self.slab.pbc.copy() - else: - self.ads2 = None - # Number of atoms and the constraint used - self.slab_ads = self.slab.copy() - self.slab_ads.extend(self.ads.copy()) - if self.ads2: - self.slab_ads.extend(self.ads2.copy()) - self.number_atoms = len(self.slab_ads) - return - - def parallel_setup(self, save_memory=False, **kwargs): - "Setup the parallelization." - self.save_memory = save_memory - self.rank = world.rank - self.size = world.size - return self - - def place_ads(self, pos_angles): - "Place the adsorbate in the cell of the surface" - if self.ads2: - ( - x, - y, - z, - theta1, - theta2, - theta3, - x2, - y2, - z2, - theta12, - theta22, - theta32, - ) = pos_angles - else: - x, y, z, theta1, theta2, theta3 = pos_angles - ads = self.rotation_matrix(self.ads.copy(), [theta1, theta2, theta3]) - spos = ads.get_scaled_positions() - ads.set_scaled_positions(spos + np.array([x, y, z])) - slab_ads = self.slab.copy() - slab_ads.extend(ads) - if self.ads2: - ads2 = self.rotation_matrix( - self.ads2.copy(), - [theta12, theta22, theta32], - ) - spos = ads2.get_scaled_positions() - ads2.set_scaled_positions(spos + np.array([x2, y2, z2])) - slab_ads.extend(ads2) - slab_ads.wrap() - return slab_ads - - def rotation_matrix(self, ads, angles): - "Rotate the adsorbate" - theta1, theta2, theta3 = angles - Rz = np.array( - [ - [np.cos(theta1), -np.sin(theta1), 0.0], - [np.sin(theta1), np.cos(theta1), 0.0], - [0.0, 0.0, 1.0], - ] - ) - Ry = np.array( - [ - [np.cos(theta2), 0.0, np.sin(theta2)], - [0.0, 1.0, 0.0], - [-np.sin(theta2), 0.0, np.cos(theta2)], - ] - ) - R = np.matmul(Ry, Rz) - Rz = np.array( - [ - [np.cos(theta3), -np.sin(theta3), 0.0], - [np.sin(theta3), np.cos(theta3), 0.0], - [0.0, 0.0, 1.0], - ] - ) - R = np.matmul(Rz, R).T - ads.set_positions(np.matmul(ads.get_positions(), R)) - return ads - - def evaluate(self, candidate): - "Caculate energy and forces and add training system to ML-model" - # Ensure that the candidate is not already in the database - if self.use_database_check: - candidate = self.ensure_not_in_database(candidate) - # Broadcast the system to all cpus - if self.rank == 0: - candidate = candidate.copy() - candidate = broadcast(candidate, root=0) - # Calculate the energies and forces - self.message_system("Performing evaluation.", end="\r") - self.candidate.set_positions(candidate.get_positions()) - forces = self.candidate.get_forces( - apply_constraint=self.apply_constraint - ) - self.energy_true = self.candidate.get_potential_energy( - force_consistent=self.force_consistent - ) - self.step += 1 - self.message_system("Single-point calculation finished.") - # Store the data - self.max_abs_forces = np.nanmax(np.linalg.norm(forces, axis=1)) - self.add_training([self.candidate]) - self.mlcalc.save_data(trajectory=self.trajectory) - # Best new point - self.best_new_point(self.candidate, self.energy_true) - return - - def add_training(self, atoms_list): - "Add atoms_list data to ML model on rank=0." - self.mlcalc.add_training(atoms_list) - return self.mlcalc - - def best_new_point(self, candidate, energy): - "Best new candidate due to energy" - if self.rank == 0: - if energy <= self.emin: - self.emin = energy - self.best_candidate = copy_atoms(candidate) - self.best_x = self.x.copy() - # Save the energy - self.energies.append(energy) - # Broadcast convergence statement if MPI is used - self.best_candidate, self.emin = broadcast( - [self.best_candidate, self.emin], - root=0, - ) - return self.best_candidate - - def add_random_ads(self): - "Generate a random slab-adsorbate structure from bounds" - sol = dual_annealing( - self.dual_func_random, - self.bounds, - maxfun=100, - **self.opt_kwargs, - ) - self.x = sol["x"].copy() - slab_ads = self.place_ads(sol["x"]) - return slab_ads - - def dual_func_random(self, pos_angles): - "Dual annealing object function for random structure" - slab_ads = self.place_ads(pos_angles) - slab_ads.calc = RepulsionCalculator( - r_scale=0.7, - reduce_dimensions=True, - power=10, - periodic_softmax=True, - wrap=True, - ) - energy = slab_ads.get_potential_energy() - return energy - - def use_prev_calculations(self, prev_calculations): - "Use previous calculations to restart ML calculator." - if prev_calculations is None: - return - if isinstance(prev_calculations, str): - prev_calculations = read(prev_calculations, ":") - # Add calculations to the ML model - self.add_training(prev_calculations) - return - - def set_verbose(self, verbose, **kwargs): - "Set verbose of MLModel." - self.mlcalc.mlmodel.update_arguments(verbose=verbose) - return - - def train_mlmodel(self): - "Train the ML model." - if self.save_memory: - if self.rank != 0: - return self.mlcalc - # Update database with the points of interest - self.update_database_arguments(point_interest=self.best_candidate) - # Train the ML model - self.mlcalc.train_model() - return self.mlcalc - - def is_in_database(self, atoms, **kwargs): - "Check if the ASE Atoms is in the database." - return self.mlcalc.is_in_database(atoms, **kwargs) - - def update_database_arguments(self, point_interest=None, **kwargs): - "Update the arguments in the database." - self.mlcalc.update_database_arguments( - point_interest=point_interest, - **kwargs, - ) - return self - - def ensure_not_in_database(self, atoms, perturb=0.01, **kwargs): - """ - Ensure the ASE Atoms object is not in database by perturb it - if it is. - """ - # Return atoms if it does not exist - if atoms is None: - return atoms - # Check if atoms object is in the database - if self.is_in_database(atoms, **kwargs): - # Get positions - pos = atoms.get_positions() - # Rattle the positions - pos = pos + np.random.uniform( - low=-perturb, - high=perturb, - size=pos.shape, - ) - atoms.set_positions(pos) - self.message_system( - "The system is rattled, since it is already in the database." - ) - return atoms - - def find_next_candidate( - self, - ml_chains, - ml_steps, - max_unc, - relax, - fmax, - local_steps, - **kwargs, - ): - """ - Find the next candidates by using simulated annealing and - then chose the candidate from acquisition. - """ - # Return None if memory is saved and therefore not in parallel - if self.save_memory and self.rank != 0: - return None - # Initialize candidate dictionary - candidate, energy, unc, x = None, None, None, None - candidates = { - "candidates": [], - "energies": [], - "uncertainties": [], - "x": [], - } - r = 0 - # Perform multiple optimizations - for chain in range(ml_chains): - # Set a unique optimization for each chain - np.random.seed(chain) - if not self.save_memory: - r = chain % self.size - if self.rank == r: - # Find candidates from a global simulated annealing search - self.message_system( - "Starting global search!", end="\r", rank=r - ) - candidate, energy, unc, x = self.dual_annealing( - maxiter=ml_steps, - **self.opt_kwargs, - ) - self.message_system("Global search converged", rank=r) - # Do a local relaxation if the conditions are met - if relax and ( - self.get_training_set_size() >= self.norelax_points - ): - if unc <= max_unc: - self.message_system( - "Starting local relaxation", end="\r", rank=r - ) - candidate, energy, unc = self.local_relax( - candidate, - fmax, - max_unc, - local_steps=local_steps, - rank=r, - ) - else: - self.message_system( - "No local relaxation due to high uncertainty", - rank=r, - ) - # Append the newest candidate - candidates = self.append_candidates( - candidates, - candidate, - energy, - unc, - x, - ) - # Broadcast all the candidates - if not self.save_memory: - candidates = self.broadcast_candidates(candidates) - # Print the energies and uncertainties for the new candidates - self.message_system( - "Candidates energies: " + str(candidates["energies"]) - ) - self.message_system( - "Candidates uncertainties: " + str(candidates["uncertainties"]) - ) - # Find the new best candidate from the acquisition function - candidate = self.choose_candidate(candidates) - return candidate - - def choose_candidate(self, candidates): - "Use acquisition functions to chose the next training point" - # Calculate the acquisition function for each candidate - acq_values = self.acq.calculate( - np.array(candidates["energies"]), - np.array(candidates["uncertainties"]), - ) - # Chose the minimum value given by the Acq. class - i_min = self.acq.choose(acq_values)[0] - # The next training point - candidate = candidates["candidates"][i_min].copy() - self.energy = candidates["energies"][i_min] - self.unc = np.abs(candidates["uncertainties"][i_min]) - self.x = candidates["x"][i_min].copy() - return candidate - - def check_convergence(self, unc_convergence, fmax): - "Check if the convergence criteria are fulfilled" - converged = False - if self.rank == 0: - # Check the minimum number of steps have been performed - if self.min_steps <= self.get_training_set_size(): - # Check the force and uncertainty criteria are met - if self.max_abs_forces <= fmax and self.unc < unc_convergence: - # Check the true energy deviation match - # the uncertainty prediction - e_dif = np.abs(self.energy_true - self.energy) - if e_dif <= 2.0 * unc_convergence: - # Check the predicted structure has - # the lowest observed energy - em_dif = np.abs(self.energy - self.emin) - if em_dif <= 2.0 * unc_convergence: - self.message_system("Optimization is converged.") - converged = True - # Broadcast convergence statement if MPI is used - converged = broadcast(converged, root=0) - return converged - - def dual_annealing(self, maxiter=5000, **opt_kwargs): - """ - Find the candidates structures, energy and forces using dual annealing. - """ - # Deactivate force predictions - self.mlcalc.update_arguments(calculate_forces=False) - # Perform simulated annealing - sol = dual_annealing( - self.dual_func, - bounds=self.bounds, - maxfun=maxiter, - **opt_kwargs, - ) - # Reconstruct the final structure - slab_ads = self.place_ads(sol["x"]) - # Get the energy and uncertainty predictions - slab_ads.calc = self.mlcalc - energy, unc = self.get_predictions(slab_ads) - return slab_ads.copy(), energy, unc, sol["x"].copy() - - def dual_func(self, pos_angles): - "Dual annealing object function" - # Construct the structure - slab_ads = self.place_ads(pos_angles) - # Predict the energy and uncertainty - slab_ads.calc = self.mlcalc - energy = slab_ads.get_potential_energy() - unc = slab_ads.calc.get_uncertainty(slab_ads) - # Calculate the acquisition function - return self.acq.calculate(energy, uncertainty=unc) - - def local_relax( - self, - candidate, - fmax, - max_unc, - local_steps=200, - rank=0, - **kwargs, - ): - "Perform a local relaxation of the candidate" - # Activate force predictions and reset calculator - self.mlcalc.update_arguments(calculate_forces=True) - self.mlcalc.reset() - candidate = candidate.copy() - candidate.calc = self.mlcalc - # Initialize local optimization - with self.local_opt(candidate, **self.local_opt_kwargs) as dyn: - if max_unc is False or max_unc is None: - converged, candidate = self.local_relax_no_max_unc( - dyn, - candidate, - fmax=fmax, - local_steps=local_steps, - **kwargs, - ) - else: - converged, candidate = self.local_relax_max_unc( - dyn, - candidate, - fmax=fmax, - max_unc=max_unc, - local_steps=local_steps, - rank=rank, - **kwargs, - ) - # Calculate the energy and uncertainty - energy, unc = self.get_predictions(candidate) - return candidate.copy(), energy, unc - - def local_relax_no_max_unc( - self, - dyn, - candidate, - fmax, - local_steps=200, - **kwargs, - ): - "Run the local optimization without checking uncertainties." - dyn.run(fmax=fmax, steps=local_steps) - return dyn.converged(), candidate - - def local_relax_max_unc( - self, - dyn, - candidate, - fmax, - max_unc, - local_steps=200, - rank=0, - **kwargs, - ): - "Run the local optimization with checking uncertainties." - for i in range(1, local_steps + 1): - candidate_backup = candidate.copy() - # Take a step in local relaxation on surrogate surface - if ase.__version__ >= "3.23": - dyn.run(fmax=fmax, steps=1) - else: - dyn.run(fmax=fmax, steps=i) - energy, unc = self.get_predictions(candidate) - # Check if the uncertainty is too large - if unc >= max_unc: - self.message_system( - "Relaxation on surrogate surface stopped due " - "to high uncertainty!", - rank=rank, - ) - break - # Check if there is a problem with prediction - if np.isnan(energy): - candidate = candidate_backup.copy() - candidate.calc = self.mlcalc - self.message_system( - "Stopped due to NaN value in prediction!", rank=rank - ) - break - # Check if the optimization is converged on the predicted surface - if dyn.converged(): - self.message_system( - "Relaxation on surrogate surface converged!", rank=rank - ) - break - # Check the number of steps - if dyn.get_number_of_steps() >= local_steps: - break - return dyn.converged(), candidate - - def get_predictions(self, candidate): - "Calculate the energies and uncertainties with the ML calculator" - unc = candidate.calc.get_uncertainty(candidate) - energy = candidate.get_potential_energy() - return energy, unc - - def get_training_set_size(self): - "Get the size of the training set" - return self.mlcalc.get_training_set_size() - - def extra_initial_data(self, initial_points): - """ - If only initial and final state is given then a third data point - is calculated. - """ - candidate = None - while self.get_training_set_size() < initial_points: - candidate = self.add_random_ads() - self.evaluate(candidate) - return self.get_training_set_size() - - def append_candidates( - self, - candidates, - candidate, - energy, - unc, - x, - **kwargs, - ): - "Update the candidates by appending the newest one." - candidates["candidates"].append(candidate) - candidates["energies"].append(energy) - candidates["uncertainties"].append(unc) - candidates["x"].append(x) - return candidates - - def broadcast_candidates(self, candidates, **kwargs): - "Broadcast candidates with energies, uncertainties, and positions." - candidates_broad = { - "candidates": [], - "energies": [], - "uncertainties": [], - "x": [], - } - for r in range(self.size): - cand_r = broadcast(candidates, root=r) - for n in range(len(cand_r["candidates"])): - candidates_broad = self.append_candidates( - candidates_broad, - cand_r["candidates"][n], - cand_r["energies"][n], - cand_r["uncertainties"][n], - cand_r["x"][n], - ) - return candidates_broad - - def get_energy_deviation(self, **kwargs): - """ - Get the absolute energy difference between - the predicted and true energy. - """ - return np.abs(self.energy_true - self.energy) - - def message_system(self, message, obj=None, end="\n", rank=0): - "Print output once." - if self.full_output is True: - if self.rank == rank: - if obj is None: - print(message, end=end) - else: - print(message, obj, end=end) - else: - if self.rank == 0: - if obj is None: - print(message, end=end) - else: - print(message, obj, end=end) - return - - def converged(self): - "Whether MLGO is converged." - return self.converging - - def set_mlcalc(self, mlcalc, save_memory=None, **kwargs): - """ - Setup the ML calculator. - - Parameters: - mlcalc : ML-calculator instance. - The ML-calculator instance used as surrogate surface. - A default ML-model is used if mlcalc is None. - save_memory : bool - Whether to only train the ML calculator and store - all objects on one CPU. - If save_memory==True then parallel optimization of - the hyperparameters can not be achived. - If save_memory==False no MPI object is used. - - Returns: - self: The object itself. - """ - if mlcalc is None: - from ..regression.gp.calculator import ( - get_default_mlmodel, - MLCalculator, - ) - from ..regression.gp.fingerprint import ( - SortedDistances, - ) - - # Check if the save_memory is given - if save_memory is None: - try: - save_memory = self.save_memory - except Exception: - raise Exception("The save_memory is not given.") - - fp = SortedDistances( - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=True, - wrap=True, - ) - baseline = RepulsionCalculator( - reduce_dimensions=True, - power=10, - periodic_softmax=True, - wrap=True, - ) - mlmodel = get_default_mlmodel( - model="gp", - fp=fp, - baseline=baseline, - use_derivatives=True, - parallel=(not save_memory), - database_reduction=False, - ) - self.mlcalc = MLCalculator(mlmodel=mlmodel) - else: - self.mlcalc = mlcalc - return self - - def set_acq(self, acq=None, **kwargs): - """ - Set the acquisition function. - - Parameters: - acq : Acquisition class instance. - The Acquisition instance used for calculating - the acq. function and choose a candidate to calculate next. - If None is given then LCB is used. - - Returns: - self: The object itself. - """ - if acq is None: - from .acquisition import AcqLCB - - self.acq = AcqLCB(objective="min", kappa=3.0) - else: - self.acq = acq.copy() - return self - - def set_local_opt(self, local_opt=None, local_opt_kwargs={}, **kwargs): - """ - Save local optimizer. - - Parameters: - local_opt : ASE local optimizer Object. - A local optimizer object from ASE. - If None is given then FIRE is used. - local_opt_kwargs : dict - Arguments used for the ASE local optimizer. - - Returns: - self: The object itself. - """ - local_opt_kwargs_default = dict() - if not self.full_output: - local_opt_kwargs_default["logfile"] = None - if local_opt is None: - from ase.optimize import FIRE - - local_opt = FIRE - local_opt_kwargs_default.update( - dict( - dt=0.05, - maxstep=0.2, - a=1.0, - astart=1.0, - fa=0.999, - downhill_check=True, - ) - ) - self.local_opt = local_opt - local_opt_kwargs_default.update(local_opt_kwargs) - self.local_opt_kwargs = local_opt_kwargs_default.copy() - return self - - def save_mlcalc(self, filename="mlcalc.pkl", **kwargs): - """ - Save the ML calculator object to a file. - - Parameters: - filename : str - The name of the file where the object is saved. - - Returns: - self: The object itself. - """ - self.mlcalc.save_mlcalc(filename, **kwargs) - return self - - def make_summary_table(self, step, **kwargs): - "Make the summary of the Global optimization process as table." - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - try: - len(self.print_list) - except Exception: - self.print_list = [ - "| Step | Time | True energy | " - "Uncertainty | True error | fmax |" - ] - msg = "|{0:6d}| ".format(step) - msg += "{} |".format(now) - msg += "{0:23f}|".format(self.energy_true) - msg += "{0:13f}|".format(self.unc) - msg += "{0:14f}|".format(self.get_energy_deviation()) - msg += "{0:10f}|".format(self.max_abs_forces) - self.print_list.append(msg) - msg = "\n".join(self.print_list) - return msg - - def save_summary_table(self, **kwargs): - "Save the summary table in the .txt file." - if self.tabletxt is not None: - with open(self.tabletxt, "w") as thefile: - msg = "\n".join(self.print_list) - thefile.writelines(msg) - return - - def print_statement(self, step, **kwargs): - "Print the Global optimization process as a table" - msg = "" - if self.rank == 0: - msg = self.make_summary_table(step, **kwargs) - self.save_summary_table() - self.message_system(msg) - return msg diff --git a/catlearn/optimize/mlneb.py b/catlearn/optimize/mlneb.py deleted file mode 100644 index b262bdcf..00000000 --- a/catlearn/optimize/mlneb.py +++ /dev/null @@ -1,1071 +0,0 @@ -import numpy as np -import ase -from ase.io import read -from ase.io.trajectory import TrajectoryWriter -from ase.parallel import world, broadcast -import datetime -from .neb.improvedneb import ImprovedTangentNEB -from .neb.nebimage import NEBImage -from .neb.interpolate_band import make_interpolation -from ..regression.gp.calculator.copy_atoms import copy_atoms - - -class MLNEB: - def __init__( - self, - start, - end, - ase_calc, - mlcalc=None, - acq=None, - interpolation="idpp", - interpolation_kwargs=dict(), - climb=True, - neb_method=ImprovedTangentNEB, - neb_kwargs=dict(), - n_images=15, - prev_calculations=None, - use_database_check=True, - use_restart_path=True, - check_path_unc=True, - check_path_fmax=True, - use_low_unc_ci=True, - reuse_ci_path=False, - save_memory=False, - apply_constraint=True, - force_consistent=None, - scale_fmax=0.8, - local_opt=None, - local_opt_kwargs=dict(), - trainingset="evaluated_structures.traj", - trajectory="MLNEB.traj", - last_path=None, - final_path="final_path.traj", - tabletxt="mlneb_summary.txt", - restart=False, - full_output=False, - **kwargs, - ): - """ - Nudged elastic band (NEB) with Machine Learning as active learning. - - Parameters: - start : Atoms object with calculated energy or ASE Trajectory file. - Initial end-point of the NEB path. - end : Atoms object with calculated energy or ASE Trajectory file. - Final end-point of the NEB path. - ase_calc : ASE calculator instance. - ASE calculator as implemented in ASE. - See: - https://wiki.fysik.dtu.dk/ase/ase/calculators/calculators.html - mlcalc : ML-calculator instance. - The ML-calculator instance used as surrogate surface. - A default ML-model is used if mlcalc is None. - acq : Acquisition class instance. - The Acquisition instance used for calculating - the acq. function and choose a candidate to calculate next. - A default Acquisition instance is used if acq is None. - interpolation : string or list of ASE Atoms or ASE Trajectory file. - Automatic interpolation can be done ('idpp' and 'linear' as - implemented in ASE). - See https://wiki.fysik.dtu.dk/ase/ase/neb.html. - Manual: Trajectory file (in ASE format) or list of Atoms. - interpolation_kwargs : dict. - A dictionary with the arguments used in the interpolation. - See https://wiki.fysik.dtu.dk/ase/ase/neb.html. - climb : bool - Whether to use climbing image in the ML-NEB. - It is strongly recommended to have climb=True. - It is only activated when the uncertainty is low and - a NEB without climbing image can converge. - neb_method : class object or str - The NEB implemented class object used for the ML-NEB. - A string can be used to select: - - 'improvedtangentneb' (default) - - 'ewneb' - neb_kwargs : dict. - A dictionary with the arguments used in the NEB object - to create the instance. - Climb must not be included. - See https://wiki.fysik.dtu.dk/ase/ase/neb.html. - n_images : int. - Number of images of the path (if not included a path before). - The number of images include the 2 end-points of the NEB path. - prev_calculations : Atoms list or ASE Trajectory file. - (optional) The user can feed previously calculated data for the - same hypersurface. The previous calculations must be fed as an - Atoms list or Trajectory file. - use_database_check : bool - Whether to check if the new structure is within the database. - If it is in the database, the structure is rattled. - use_restart_path : bool - Use the path from last robust iteration (low uncertainty). - check_path_unc : bool - Check if the uncertainty is large for the restarted path and - if it is then use the initial interpolation. - check_path_fmax : bool - Check if the maximum perpendicular force is larger for - the restarted path than the initial interpolation and - if so then replace it. - use_low_unc_ci : bool - Whether to only activative climbing image NEB - when the uncertainties of all images are below unc_convergence. - If use_low_unc_ci=False, the climbing image is activated - without checking the uncertainties. - reuse_ci_path : bool - Whether to reuse the path from the climbing image NEB. - It is only recommended to be used if use_low_unc_ci=True. - save_memory : bool - Whether to only train the ML calculator and store - all objects on one CPU. - If save_memory==True then parallel optimization of - the hyperparameters can not be achived. - If save_memory==False no MPI object is used. - apply_constraint : boolean - Whether to apply the constrains of the ASE Atoms instance - to the calculated forces. - By default (apply_constraint=True) forces are 0 for - constrained atoms and directions. - force_consistent : boolean or None. - Use force-consistent energy calls (as opposed to the energy - extrapolated to 0 K). By default (force_consistent=None) uses - force-consistent energies if available in the calculator, but - falls back to force_consistent=False if not. - scale_fmax : float - The scaling of the fmax for the ML-NEB runs. - It makes the path converge tighter on surrogate surface. - local_opt : ASE local optimizer Object. - A local optimizer object from ASE. - If None is given then FIRE is used. - local_opt_kwargs : dict - Arguments used for the ASE local optimizer. - trainingset : string. - Trajectory filename to store the evaluated training data. - trajectory : string - Trajectory filename to store the predicted NEB path. - last_path : string - Trajectory filename to store the last MLNEB path. - If last_path=None, the last path is not saved. - final_path : string - Trajectory filename to store the final MLNEB path. - If final_path=None, the final path is not saved. - tabletxt : string - Name of the .txt file where the summary table is printed. - It is not saved to the file if tabletxt=None. - restart : bool - Whether to restart the MLNEB from a previous run. - It is only possible to restart the MLNEB - if the previous run was performed in same directory. - The previous and current run must have the same parameters. - The trainingset and trajectory file is used - to restart the MLNEB. - Therefore, prev_calculations has to be None. - full_output : boolean - Whether to print on screen the full output (True). - """ - # Setup parallelization - self.parallel_setup(save_memory) - # NEB parameters - self.interpolation = interpolation - self.interpolation_kwargs = dict( - mic=True, - remove_rotation_and_translation=False, - ) - self.interpolation_kwargs.update(interpolation_kwargs) - self.n_images = n_images - self.climb = climb - self.set_neb_method(neb_method) - self.neb_kwargs = dict(k=3.0, remove_rotation_and_translation=False) - self.neb_kwargs.update(neb_kwargs) - # General parameter settings - self.use_database_check = use_database_check - self.use_restart_path = use_restart_path - self.check_path_unc = check_path_unc - self.check_path_fmax = check_path_fmax - self.reuse_ci_path = reuse_ci_path - self.use_low_unc_ci = use_low_unc_ci - # Set initial parameters - self.step = 0 - self.converging = False - # Setup the ML calculator - self.set_mlcalc(mlcalc, start=start, save_memory=save_memory) - # Whether to have the full output - self.full_output = full_output - self.set_verbose(verbose=full_output) - # Set an acquisition function - self.set_acq(acq) - # Save initial and final state - self.set_up_endpoints(start, end) - # Set candidate instance with ASE calculator - self.candidate = self.start.copy() - self.candidate.calc = ase_calc - self.apply_constraint = apply_constraint - self.force_consistent = force_consistent - # Scale the fmax on the surrogate surface - self.scale_fmax = scale_fmax - # Set local optimizer - self.set_local_opt( - local_opt=local_opt, - local_opt_kwargs=local_opt_kwargs, - ) - # Trajectories - self.trainingset = trainingset - self.trajectory = trajectory - self.last_path = last_path - self.final_path = final_path - # Summary table file name - self.tabletxt = tabletxt - # Restart the MLNEB - if restart: - if prev_calculations is not None: - self.message_system( - "Warning: Given previous calculations does " - "not work with restarting MLNEB!" - ) - try: - self.interpolation = read( - self.trajectory, - "-{}:".format(self.n_images), - ) - prev_calculations = read(self.trainingset, "2:") - except Exception: - self.message_system( - "Warning: Restarting MLNEB is not possible! " - "Reinitalizing MLNEB." - ) - # Load previous calculations to the ML model - self.use_prev_calculations(prev_calculations) - # Define the last images that can be used to restart the interpolation - self.last_images = self.make_interpolation( - interpolation=self.interpolation - ) - # CI restart path activation - self.climb_active = False - - def run( - self, - fmax=0.05, - unc_convergence=0.05, - steps=200, - ml_steps=1500, - max_unc=0.25, - **kwargs, - ): - """ - Run the active learning NEB process. - - Parameters: - fmax : float - Convergence criteria (in eV/Angs). - unc_convergence : float - Maximum uncertainty for convergence (in eV). - steps : int - Maximum number of evaluations. - ml_steps : int - Maximum number of steps for the NEB optimization on the - predicted landscape. - max_unc : float (optional) - Early stopping criteria. - Maximum uncertainty before stopping the optimization - on the surrogate surface. - If it is None or False, it will run to convergence. - """ - # Active learning parameters - candidate = None - self.acq.update_arguments(unc_convergence=unc_convergence) - # Define the images - self.images = [copy_atoms(image) for image in self.last_images] - # Define the temporary last images that can be used - # to restart the interpolation - self.last_images_tmp = None - # Calculate a extra data point if only start and end is given - self.extra_initial_data() - # Save MLNEB path trajectory - with TrajectoryWriter( - self.trajectory, - mode="w", - properties=["energy", "forces", "uncertainty"], - ) as self.trajectory_neb: - # Save the initial interpolation - self.save_last_path(self.last_path, self.images, properties=None) - # Run the active learning - for step in range(1, steps + 1): - # Train and optimize ML model - self.train_mlmodel() - # Perform NEB on ML surrogate surface - candidate, neb_converged = self.run_mlneb( - fmax=fmax * self.scale_fmax, - ml_steps=ml_steps, - max_unc=max_unc, - unc_convergence=unc_convergence, - ) - # Evaluate candidate - self.evaluate(candidate) - # Share the images between all CPUs - self.share_images() - # Print the results for this iteration - self.print_statement(step) - # Check convergence - self.converging = self.check_convergence( - fmax, unc_convergence, neb_converged - ) - if self.converging: - self.save_last_path(self.final_path, self.images) - self.message_system("MLNEB is converged.") - self.print_cite() - break - if not self.converging: - self.message_system("MLNEB did not converge!") - return self - - def get_images(self): - "Get the images." - return self.images - - def set_up_endpoints(self, start, end, **kwargs): - "Load and calculate the intial and final states" - # Load initial and final states - if isinstance(start, str): - start = read(start) - if isinstance(end, str): - end = read(end) - # Add initial and final states to ML model - self.add_training([start, end]) - # Store the initial and final energy - start.get_forces() - self.start_energy = start.get_potential_energy() - self.start = copy_atoms(start) - end.get_forces() - self.end_energy = end.get_potential_energy() - self.end = copy_atoms(end) - return - - def use_prev_calculations(self, prev_calculations, **kwargs): - "Use previous calculations to restart ML calculator." - if prev_calculations is not None: - # Use a trajectory file - if isinstance(prev_calculations, str): - prev_calculations = read(prev_calculations, ":") - # Add calculations to the ML model - self.add_training(prev_calculations) - return - - def make_interpolation(self, interpolation="idpp", **kwargs): - "Make the NEB interpolation path" - # Make the interpolation path - images = make_interpolation( - self.start.copy(), - self.end.copy(), - n_images=self.n_images, - method=interpolation, - **self.interpolation_kwargs, - ) - # Check interpolation has the right number of images - if len(images) != self.n_images: - raise Exception( - "The interpolated path has the wrong number of images!" - ) - # Attach the ML calculator to all images - images = self.attach_mlcalc(images) - return images - - def make_reused_interpolation( - self, - unc_convergence, - climb=False, - **kwargs, - ): - """ - Make the NEB interpolation path or use the previous path - if it has low uncertainty. - """ - # Whether to reuse the previous path - reuse_path = True - # Make the interpolation from the initial points - if not self.use_restart_path or self.last_images_tmp is None: - if not self.use_restart_path or self.step == 0: - self.message_system( - "The initial interpolation is used as the initial path." - ) - else: - self.message_system( - "The previous initial path is used as the initial path." - ) - reuse_path = False - elif self.check_path_unc or self.check_path_fmax: - # Get uncertainty and max perpendicular force - uncmax_tmp, fmax_tmp = self.get_path_unc_fmax( - interpolation=self.last_images_tmp, - climb=climb, - ) - # Check uncertainty - if self.check_path_unc: - # Check if the uncertainty is too large - if uncmax_tmp > unc_convergence: - reuse_path = False - self.last_images_tmp = None - self.message_system( - "The previous initial path is used as " - "the initial path due to uncertainty." - ) - # Check if the perpendicular force are less for the new path - if self.check_path_fmax and reuse_path: - fmax_last = self.get_path_unc_fmax( - interpolation=self.last_images, - climb=climb, - )[1] - if fmax_tmp > fmax_last: - reuse_path = False - self.last_images_tmp = None - self.message_system( - "The previous initial path is used as " - "the initial path due to fmax." - ) - # Reuse the last path - if reuse_path: - self.message_system("The last path is used as the initial path.") - self.last_images = [image.copy() for image in self.last_images_tmp] - return self.make_interpolation(interpolation=self.last_images) - - def attach_mlcalc(self, imgs, **kwargs): - "Attach the ML calculator to the given images." - images = [copy_atoms(self.start)] - for img in imgs[1:-1]: - image = img.copy() - image.calc = self.mlcalc - images.append(NEBImage(image)) - images.append(copy_atoms(self.end)) - return images - - def parallel_setup(self, save_memory=False, **kwargs): - "Setup the parallelization." - self.save_memory = save_memory - self.rank = world.rank - self.size = world.size - return self - - def evaluate(self, candidate, **kwargs): - "Evaluate the ASE atoms with the ASE calculator." - # Ensure that the candidate is not already in the database - if self.use_database_check: - candidate = self.ensure_not_in_database(candidate) - # Broadcast the system to all cpus - if self.rank == 0: - candidate = candidate.copy() - candidate = broadcast(candidate, root=0) - # Calculate the energies and forces - self.message_system("Performing evaluation.", end="\r") - self.candidate.set_positions(candidate.get_positions()) - forces = self.candidate.get_forces( - apply_constraint=self.apply_constraint - ) - self.energy_true = self.candidate.get_potential_energy( - force_consistent=self.force_consistent - ) - self.step += 1 - self.message_system("Single-point calculation finished.") - # Store the data - self.max_abs_forces = np.nanmax(np.linalg.norm(forces, axis=1)) - self.add_training([self.candidate]) - self.save_data() - return - - def add_training(self, atoms_list, **kwargs): - "Add atoms_list data to ML model on rank=0." - self.mlcalc.add_training(atoms_list) - return self.mlcalc - - def train_mlmodel(self, **kwargs): - "Train the ML model" - if self.save_memory and self.rank != 0: - return self.mlcalc - # Update database with the points of interest - self.update_database_arguments(point_interest=self.last_images[1:-1]) - # Train the ML model - self.mlcalc.train_model() - return self.mlcalc - - def set_verbose(self, verbose, **kwargs): - "Set verbose of MLModel." - self.mlcalc.mlmodel.update_arguments(verbose=verbose) - return - - def is_in_database(self, atoms, **kwargs): - "Check if the ASE Atoms is in the database." - return self.mlcalc.is_in_database(atoms, **kwargs) - - def update_database_arguments(self, point_interest=None, **kwargs): - "Update the arguments in the database." - self.mlcalc.update_database_arguments( - point_interest=point_interest, - **kwargs, - ) - return self - - def ensure_not_in_database(self, atoms, perturb=0.01, **kwargs): - """ - Ensure the ASE Atoms object is not in database by perturb it if it is. - """ - # Return atoms if it does not exist - if atoms is None: - return atoms - # Check if atoms object is in the database - if self.is_in_database(atoms, **kwargs): - # Get positions - pos = atoms.get_positions() - # Rattle the positions - pos = pos + np.random.uniform( - low=-perturb, - high=perturb, - size=pos.shape, - ) - atoms.set_positions(pos) - self.message_system( - "The system is rattled, since it is already in the database." - ) - return atoms - - def extra_initial_data(self, **kwargs): - """ - If only initial and final state is given then a third data point - is calculated. - """ - candidate = None - if self.get_training_set_size() <= 2: - middle = ( - int((self.n_images - 2) / 3.0) - if self.start_energy >= self.end_energy - else int((self.n_images - 2) * 2.0 / 3.0) - ) - candidate = self.last_images[1 + middle].copy() - if candidate is not None: - self.evaluate(candidate) - return candidate - - def run_mlneb( - self, - fmax=0.05, - ml_steps=750, - max_unc=0.25, - unc_convergence=0.05, - **kwargs, - ): - "Run the NEB on the ML surrogate surface" - # Convergence of the NEB - neb_converged = False - # If memeory is saved NEB is only performed on one CPU - if self.rank != 0: - return None, neb_converged - # Make the interpolation from initial path or the previous path - images = self.make_reused_interpolation( - unc_convergence, climb=self.climb_active - ) - # Run the NEB on the surrogate surface - if self.climb_active: - self.message_system( - "Starting NEB with climbing image on surrogate surface." - ) - else: - self.message_system( - "Starting NEB without climbing image on surrogate surface." - ) - images, neb_converged = self.mlneb_opt( - images, - fmax=fmax, - ml_steps=ml_steps, - max_unc=max_unc, - unc_convergence=unc_convergence, - climb=self.climb_active, - ) - self.save_mlneb(images) - self.save_last_path(self.last_path, self.images) - # Get the candidate - candidate = self.choose_candidate(images) - return candidate, neb_converged - - def get_training_set_size(self): - "Get the size of the training set" - return self.mlcalc.get_training_set_size() - - def get_predictions(self, images, **kwargs): - "Calculate the energies and uncertainties with the ML calculator" - energies = [] - uncertainties = [] - for image in images[1:-1]: - uncertainties.append(image.get_property("uncertainty")) - energies.append(image.get_potential_energy()) - return np.array(energies), np.array(uncertainties) - - def get_path_unc_fmax(self, interpolation, climb=False, **kwargs): - """ - Get the maximum uncertainty and fmax prediction from - the NEB interpolation. - """ - uncmax = None - fmax = None - images = self.make_interpolation(interpolation=interpolation) - if self.check_path_unc: - uncmax = np.nanmax(self.get_predictions(images)[1]) - if self.check_path_fmax: - fmax = self.get_fmax_predictions(images, climb=climb) - return uncmax, fmax - - def get_fmax_predictions(self, images, climb=False, **kwargs): - "Calculate the maximum perpendicular force with the ML calculator" - neb = self.neb_method(images, climb=climb, **self.neb_kwargs) - forces = neb.get_forces() - return np.nanmax(np.linalg.norm(forces, axis=1)) - - def choose_candidate(self, images, **kwargs): - "Use acquisition functions to chose the next training point" - # Get the energies and uncertainties - energy_path, unc_path = self.get_predictions(images) - # Store the maximum predictions - self.emax_ml = np.nanmax(energy_path) - self.umax_ml = np.nanmax(unc_path) - self.umean_ml = np.mean(unc_path) - # Calculate the acquisition function for each image - acq_values = self.acq.calculate(energy_path, unc_path) - # Chose the maximum value given by the Acq. class - i_min = int(self.acq.choose(acq_values)[0]) - # The next training point - image = images[1 + i_min].copy() - self.energy_pred = energy_path[i_min] - return image - - def mlneb_opt( - self, - images, - fmax=0.05, - ml_steps=750, - max_unc=0.25, - unc_convergence=0.05, - climb=False, - **kwargs, - ): - "Run the ML NEB with checking uncertainties if selected." - # Run the MLNEB fully without consider the uncertainty - if max_unc is False or max_unc is None: - images, converged = self.mlneb_opt_no_max_unc( - images, - fmax=fmax, - ml_steps=ml_steps, - climb=climb, - **kwargs, - ) - else: - # Stop the MLNEB if the uncertainty becomes too large - images, converged = self.mlneb_opt_max_unc( - images, - fmax=fmax, - ml_steps=ml_steps, - max_unc=max_unc, - unc_convergence=unc_convergence, - climb=climb, - **kwargs, - ) - # Activate climbing when the NEB is converged - if converged: - self.message_system("NEB on surrogate surface converged.") - if not climb and self.climb: - # Check the uncertainty is low enough to do CI-NEB if requested - if not self.use_low_unc_ci or ( - np.max(self.get_predictions(images)[1]) <= unc_convergence - ): - # Use CI from here if reuse_ci_path=True - if self.reuse_ci_path: - self.message_system( - "The restart of the climbing image path" - "is actived." - ) - self.climb_active = True - self.message_system( - "Starting NEB with climbing image on" - "surrogate surface." - ) - return self.mlneb_opt( - images, - fmax=fmax, - ml_steps=ml_steps, - max_unc=max_unc, - unc_convergence=unc_convergence, - climb=True, - ) - return images, converged - - def mlneb_opt_no_max_unc( - self, - images, - fmax=0.05, - ml_steps=750, - climb=False, - **kwargs, - ): - "Run the MLNEB fully without consider the uncertainty." - # Construct the NEB - neb = self.neb_method(images, climb=climb, **self.neb_kwargs) - with self.local_opt(neb, **self.local_opt_kwargs) as neb_opt: - neb_opt.run(fmax=fmax, steps=ml_steps) - if self.reuse_ci_path or not climb: - self.last_images_tmp = [image.copy() for image in images] - # Check if the MLNEB is converged - converged = neb_opt.converged() - return images, converged - - def mlneb_opt_max_unc( - self, - images, - fmax=0.05, - ml_steps=750, - max_unc=0.25, - unc_convergence=0.05, - climb=False, - **kwargs, - ): - "Run the MLNEB, but stop it if the uncertainty becomes too large." - # Construct the NEB - neb = self.neb_method(images, climb=climb, **self.neb_kwargs) - with self.local_opt(neb, **self.local_opt_kwargs) as neb_opt: - for i in range(1, ml_steps + 1): - # Run the NEB on the surrogate surface - if ase.__version__ >= "3.23": - neb_opt.run(fmax=fmax, steps=1) - else: - neb_opt.run(fmax=fmax, steps=i) - # Calculate energy and uncertainty - energy_path, unc_path = self.get_predictions(images) - # Get the maximum uncertainty of the path - max_unc_path = np.max(unc_path) - # Check if the uncertainty is too large - if max_unc_path >= max_unc: - self.message_system( - "NEB on surrogate surface stopped due " - "to high uncertainty." - ) - break - # Check if there is a problem with prediction - if np.isnan(energy_path).any(): - images = self.make_interpolation( - interpolation=self.last_images_tmp - ) - for image in images: - image.get_forces() - self.message_system( - "Warning: Stopped due to NaN value in prediction!" - ) - break - # Make backup of images before the next NEB step, - # which can be used as a restart interpolation - if self.reuse_ci_path or not climb: - if not self.check_path_unc or ( - max_unc_path <= unc_convergence - ): - self.last_images_tmp = [ - image.copy() for image in images - ] - - # Check if the NEB is converged on the predicted surface - if neb_opt.converged(): - break - # Check the number of steps - if neb_opt.get_number_of_steps() >= ml_steps: - break - # Check if the MLNEB is converged - converged = neb_opt.converged() - return images, converged - - def save_mlneb(self, images, **kwargs): - "Save the MLNEB result in the trajectory." - self.images = [] - for image in images: - image = copy_atoms(image) - self.images.append(image) - self.trajectory_neb.write(image) - return self.images - - def share_images(self, **kwargs): - "Share the images between all CPUs." - self.images = broadcast(self.images, root=0) - return - - def save_data(self, **kwargs): - "Save the training data to trajectory file." - self.mlcalc.save_data(trajectory=self.trainingset) - return - - def save_last_path( - self, - trajname, - images, - properties=["energy", "forces", "uncertainty"], - **kwargs, - ): - "Save the final MLNEB path in the trajectory file." - if self.rank == 0 and isinstance(trajname, str) and len(trajname): - with TrajectoryWriter( - trajname, mode="w", properties=properties - ) as trajectory_last: - for image in images: - trajectory_last.write(copy_atoms(image)) - return - - def get_barrier(self, forward=True, **kwargs): - "Get the forward or backward predicted potential energy barrier." - if forward: - return self.emax_ml - self.start_energy - return self.emax_ml - self.end_energy - - def message_system(self, message, obj=None, end="\n"): - "Print output once." - if self.full_output is True: - if self.rank == 0: - if obj is None: - print(message, end=end) - else: - print(message, obj, end=end) - return - - def check_convergence( - self, - fmax, - unc_convergence, - neb_converged, - **kwargs, - ): - """ - Check if the ML-NEB is converged to the final path with - low uncertainty. - """ - converged = False - if self.rank == 0: - # Check if NEB on the predicted energy is converged - if neb_converged: - # Check the force criterion is met if climbing image is used - if not self.climb or self.max_abs_forces <= fmax: - # Check the uncertainty criterion is met - if self.umax_ml <= unc_convergence: - # Check the true energy deviation match - # the uncertainty prediction - e_dif = np.abs(self.energy_pred - self.energy_true) - if e_dif <= 2.0 * unc_convergence: - converged = True - # Broadcast convergence statement - converged = broadcast(converged, root=0) - return converged - - def converged(self): - "Whether MLNEB is converged." - return self.converging - - def set_neb_method(self, neb_method=None, **kwargs): - """ - Set the NEB method. - - Parameters: - neb_method : class object or str - The NEB implemented class object used for the ML-NEB. - A string can be used to select: - - 'improvedtangentneb' (default) - - 'ewneb' - """ - if neb_method is None: - neb_method = ImprovedTangentNEB - elif isinstance(neb_method, str): - if neb_method.lower() == "improvedtangentneb": - neb_method = ImprovedTangentNEB - elif neb_method.lower() == "ewneb": - from .neb.ewneb import EWNEB - - neb_method = EWNEB - else: - raise Exception( - "The NEB method {} is not implemented.".format(neb_method) - ) - self.neb_method = neb_method - return self - - def set_mlcalc(self, mlcalc, start=None, save_memory=None, **kwargs): - """ - Setup the ML calculator. - - Parameters: - mlcalc : ML-calculator instance. - The ML-calculator instance used as surrogate surface. - A default ML-model is used if mlcalc is None. - start : Atoms object - Initial end-point of the NEB path. - save_memory : bool - Whether to only train the ML calculator and store - all objects on one CPU. - If save_memory==True then parallel optimization of - the hyperparameters can not be achived. - If save_memory==False no MPI object is used. - - Returns: - self: The object itself. - """ - if mlcalc is None: - from ..regression.gp.calculator.mlmodel import get_default_mlmodel - from ..regression.gp.calculator.mlcalc import MLCalculator - from ..regression.gp.means.max import Prior_max - from ..regression.gp.fingerprint.invdistances import InvDistances - - # Check if the start Atoms object is given - if start is None: - try: - start = self.start.copy() - except Exception: - raise Exception("The start Atoms object is not given.") - # Check if the save_memory is given - if save_memory is None: - try: - save_memory = self.save_memory - except Exception: - raise Exception("The save_memory is not given.") - - if len(start) > 1: - if start.pbc.any(): - fp = InvDistances( - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=True, - wrap=True, - ) - else: - fp = InvDistances( - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=False, - wrap=False, - ) - else: - fp = None - prior = Prior_max(add=1.0) - mlmodel = get_default_mlmodel( - model="tp", - prior=prior, - fp=fp, - baseline=None, - use_derivatives=True, - parallel=(not save_memory), - database_reduction=False, - ) - self.mlcalc = MLCalculator(mlmodel=mlmodel) - else: - self.mlcalc = mlcalc - return self - - def set_acq(self, acq=None, **kwargs): - """ - Select an acquisition function. - - Parameters: - acq : Acquisition class instance - The acquisition function object. - If None is given then UME is used. - - Returns: - self: The object itself. - """ - if acq is None: - from .acquisition import AcqUME - - self.acq = AcqUME(objective="max", unc_convergence=0.05) - else: - self.acq = acq.copy() - return self - - def set_local_opt(self, local_opt=None, local_opt_kwargs={}, **kwargs): - """ - Save local optimizer. - - Parameters: - local_opt : ASE local optimizer Object. - A local optimizer object from ASE. - If None is given then FIRE is used. - local_opt_kwargs : dict - Arguments used for the ASE local optimizer. - - Returns: - self: The object itself. - """ - local_opt_kwargs_default = dict() - if not self.full_output: - local_opt_kwargs_default["logfile"] = None - if local_opt is None: - from ase.optimize import FIRE - - local_opt = FIRE - local_opt_kwargs_default.update( - dict(dt=0.05, maxstep=0.2, a=1.0, astart=1.0, fa=0.999) - ) - self.local_opt = local_opt - local_opt_kwargs_default.update(local_opt_kwargs) - self.local_opt_kwargs = local_opt_kwargs_default.copy() - return self - - def save_mlcalc(self, filename="mlcalc.pkl", **kwargs): - """ - Save the ML calculator object to a file. - - Parameters: - filename : str - The name of the file where the object is saved. - - Returns: - self: The object itself. - """ - self.mlcalc.save_mlcalc(filename, **kwargs) - return self - - def print_cite(self): - msg = "\n" + "-" * 79 + "\n" - msg += "You are using MLNEB. Please cite: \n" - msg += "[1] J. A. Garrido Torres, M. H. Hansen, P. C. Jennings, " - msg += "J. R. Boes and T. Bligaard. Phys. Rev. Lett. 122, 156001. " - msg += "https://doi.org/10.1103/PhysRevLett.122.156001 \n" - msg += "[2] O. Koistinen, F. B. Dagbjartsdottir, V. Asgeirsson, " - msg += "A. Vehtari and H. Jonsson. J. Chem. Phys. 147, 152720. " - msg += "https://doi.org/10.1063/1.4986787 \n" - msg += "-" * 79 + "\n" - self.message_system(msg) - return - - def make_summary_table(self, step, **kwargs): - "Make the summary of the NEB process as table." - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - try: - len(self.print_neb_list) - except Exception: - self.print_neb_list = [ - "| Step | Time | Pred. barrier (-->) | " - "Pred. barrier (<--) | Max. uncert. | " - "Avg. uncert. | fmax |" - ] - msg = "|{0:6d}| ".format(step) - msg += "{} |".format(now) - msg += "{0:21f}|".format(self.get_barrier(forward=True)) - msg += "{0:21f}|".format(self.get_barrier(forward=False)) - msg += "{0:14f}|".format(self.umax_ml) - msg += "{0:14f}|".format(np.mean(self.umean_ml)) - msg += "{0:10f}|".format(self.max_abs_forces) - self.print_neb_list.append(msg) - msg = "\n".join(self.print_neb_list) - return msg - - def save_summary_table(self, **kwargs): - "Save the summary table in the .txt file." - if isinstance(self.tabletxt, str) and len(self.tabletxt): - with open(self.tabletxt, "w") as thefile: - msg = "\n".join(self.print_neb_list) - thefile.writelines(msg) - return - - def print_statement(self, step, **kwargs): - "Print the NEB process as a table" - msg = "" - if self.rank == 0: - msg = self.make_summary_table(step, **kwargs) - self.save_summary_table() - self.message_system(msg) - return msg diff --git a/catlearn/optimize/neb/avgewneb.py b/catlearn/optimize/neb/avgewneb.py deleted file mode 100644 index fccf066a..00000000 --- a/catlearn/optimize/neb/avgewneb.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np -from .ewneb import EWNEB - - -class AvgEWNEB(EWNEB): - - def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): - energies = self.get_energies() - if self.use_minimum: - e0 = np.min([energies[0], energies[-1]]) - else: - e0 = np.max([energies[0], energies[-1]]) - emax = np.max(energies) - k_l = self.k * self.kl_scale - if e0 < emax: - a = (emax - energies) / (emax - e0) - a = np.where(a < 1.0, a, 1.0) - a = 0.5 * (a[1:] + a[:-1]) - k = ((1.0 - a) * self.k) + (a * k_l) - else: - k = k_l.copy() - forces_parallel = (k[1:] * np.linalg.norm(pos_p, axis=(1, 2))) - ( - k[:-1] * np.linalg.norm(pos_m, axis=(1, 2)) - ) - forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent - return forces_parallel diff --git a/catlearn/optimize/neb/ewneb.py b/catlearn/optimize/neb/ewneb.py deleted file mode 100644 index 6f2c05b6..00000000 --- a/catlearn/optimize/neb/ewneb.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -from .improvedneb import ImprovedTangentNEB - - -class EWNEB(ImprovedTangentNEB): - def __init__( - self, - images, - k=0.1, - kl_scale=0.1, - use_minimum=False, - climb=False, - remove_rotation_and_translation=False, - mic=True, - **kwargs - ): - super().__init__( - images, - k=k, - climb=climb, - remove_rotation_and_translation=remove_rotation_and_translation, - mic=mic, - **kwargs - ) - self.kl_scale = kl_scale - self.use_minimum = use_minimum - - def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): - energies = self.get_energies() - if self.use_minimum: - e0 = np.min([energies[0], energies[-1]]) - else: - e0 = np.max([energies[0], energies[-1]]) - emax = np.max(energies) - k_l = self.k * self.kl_scale - if e0 < emax: - a = (emax - energies[:-1]) / (emax - e0) - k = np.where(a < 1.0, (1.0 - a) * self.k + a * k_l, k_l) - else: - k = k_l.copy() - forces_parallel = (k[1:] * np.linalg.norm(pos_p, axis=(1, 2))) - ( - k[:-1] * np.linalg.norm(pos_m, axis=(1, 2)) - ) - forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent - return forces_parallel diff --git a/catlearn/optimize/neb/improvedneb.py b/catlearn/optimize/neb/improvedneb.py deleted file mode 100644 index fc3e5cb2..00000000 --- a/catlearn/optimize/neb/improvedneb.py +++ /dev/null @@ -1,65 +0,0 @@ -import numpy as np -from .orgneb import OriginalNEB - - -class ImprovedTangentNEB(OriginalNEB): - def __init__( - self, - images, - k=0.1, - climb=False, - remove_rotation_and_translation=False, - mic=True, - **kwargs - ): - super().__init__( - images, - k=k, - climb=climb, - remove_rotation_and_translation=remove_rotation_and_translation, - mic=mic, - **kwargs - ) - - def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): - forces_parallel = (self.k[1:] * np.linalg.norm(pos_p, axis=(1, 2))) - ( - self.k[:-1] * np.linalg.norm(pos_m, axis=(1, 2)) - ) - forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent - return forces_parallel - - def get_tangent(self, pos_p, pos_m, **kwargs): - tangent = np.empty((int(self.nimages - 2), self.natoms, 3)) - energies = self.get_energies() - for i in range(1, self.nimages - 1): - if energies[i + 1] > energies[i] and energies[i] > energies[i - 1]: - tangent[i - 1] = pos_p[i - 1] - elif ( - energies[i + 1] < energies[i] and energies[i] < energies[i - 1] - ): - tangent[i - 1] = pos_m[i - 1] - elif energies[i + 1] > energies[i - 1]: - energy_dif = [ - abs(energies[i + 1] - energies[i]), - abs(energies[i - 1] - energies[i]), - ] - tangent[i - 1] = (pos_p[i - 1] * max(energy_dif)) + ( - pos_m[i - 1] * min(energy_dif) - ) - elif energies[i + 1] < energies[i - 1]: - energy_dif = [ - abs(energies[i + 1] - energies[i]), - abs(energies[i - 1] - energies[i]), - ] - tangent[i - 1] = (pos_p[i - 1] * min(energy_dif)) + ( - pos_m[i - 1] * max(energy_dif) - ) - else: - tangent[i - 1] = ( - pos_p[i - 1] / np.linalg.norm(pos_p[i - 1]) - ) + (pos_m[i - 1] / np.linalg.norm(pos_m[i - 1])) - tangent = tangent / np.linalg.norm( - tangent, - axis=(1, 2), - ).reshape(-1, 1, 1) - return tangent diff --git a/catlearn/optimize/neb/interpolate_band.py b/catlearn/optimize/neb/interpolate_band.py deleted file mode 100644 index 8fe5c765..00000000 --- a/catlearn/optimize/neb/interpolate_band.py +++ /dev/null @@ -1,290 +0,0 @@ -import numpy as np -from ase.optimize import FIRE - - -def interpolate( - start, - end, - ts=None, - n_images=15, - method="linear", - mic=True, - remove_rotation_and_translation=True, - **interpolation_kwargs, -): - """ - Make an interpolation between the start and end structure. - A transition state structure can be given to guide the interpolation. - """ - # The rotation and translation should be removed the end structure - # is optimized compared to start structure - if remove_rotation_and_translation: - from ase.build import minimize_rotation_and_translation - - start.center() - end.center() - minimize_rotation_and_translation(start, end) - # If the transition state is not given then make a regular interpolation - if ts is None: - images = make_interpolation( - start, - end, - n_images=n_images, - method=method, - mic=mic, - remove_rotation_and_translation=remove_rotation_and_translation, - **interpolation_kwargs, - ) - return images - # Get the interpolated path from the start structure to the TS structure - images = make_interpolation( - start, - ts, - n_images=n_images, - method=method, - mic=mic, - remove_rotation_and_translation=remove_rotation_and_translation, - **interpolation_kwargs, - ) - # Get the cumulative distance from the start to the TS structure - dis_st = get_images_distance(images) - # Get the interpolated path from the TS structure to the end structure - images = make_interpolation( - ts, - end, - n_images=n_images, - method=method, - mic=mic, - remove_rotation_and_translation=remove_rotation_and_translation, - **interpolation_kwargs, - ) - # Get the cumulative distance from the TS to the end structure - dis_et = get_images_distance(images) - # Calculate the number of images from start to the TS from the distance - n_images_st = int(n_images * dis_st / (dis_st + dis_et)) - n_images_st = 2 if n_images_st < 2 else n_images_st - # Get the interpolated path from the start structure to - # the TS structure with the correct number of images - images1 = make_interpolation( - start, - ts, - n_images=n_images_st, - method=method, - mic=mic, - remove_rotation_and_translation=remove_rotation_and_translation, - **interpolation_kwargs, - ) - # Get the interpolated path from the TS structure to - # the end structure with the corrct number of images - images2 = make_interpolation( - ts, - end, - n_images=int(n_images - n_images_st + 1), - method=method, - mic=mic, - remove_rotation_and_translation=remove_rotation_and_translation, - **interpolation_kwargs, - )[1:] - return list(images1) + list(images2) - - -def make_interpolation( - start, - end, - n_images=15, - method="linear", - mic=True, - **interpolation_kwargs, -): - "Make the NEB interpolation path." - # Use a premade interpolation path - if isinstance(method, (list, np.ndarray)): - images = method.copy() - elif isinstance(method, str) and method.lower() not in [ - "linear", - "idpp", - "rep", - "ends", - ]: - # Import interpolation from a trajectory file - from ase.io import read - - images = read(method, "-{}:".format(n_images)) - else: - # Make path by the NEB methods interpolation - images = [start.copy() for i in range(n_images - 1)] + [end.copy()] - if method.lower() == "ends": - images = make_end_interpolations( - images, - mic=mic, - **interpolation_kwargs, - ) - else: - images = make_linear_interpolation( - images, - mic=mic, - **interpolation_kwargs, - ) - if method.lower() == "idpp": - images = make_idpp_interpolation( - images, - mic=mic, - **interpolation_kwargs, - ) - elif method.lower() == "rep": - images = make_rep_interpolation( - images, - mic=mic, - **interpolation_kwargs, - ) - return images - - -def make_linear_interpolation(images, mic=False, **kwargs): - "Make the linear interpolation from initial to final state." - from ase.geometry import find_mic - - # Get the position of initial state - pos0 = images[0].get_positions() - # Get the distance to the final state - dist = images[-1].get_positions() - pos0 - # Calculate the minimum-image convention if mic=True - if mic: - dist = find_mic(dist, images[0].get_cell(), images[0].pbc)[0] - # Calculate the distance moved for each image - dist = dist / float(len(images) - 1) - # Set the positions - for i in range(1, len(images) - 1): - images[i].set_positions(pos0 + (i * dist)) - return images - - -def make_idpp_interpolation( - images, - mic=False, - fmax=1.0, - steps=100, - local_opt=FIRE, - local_kwargs={}, - **kwargs, -): - """ - Make the IDPP interpolation from initial to final state - from NEB optimization. - """ - from .improvedneb import ImprovedTangentNEB - from ...regression.gp.baseline import IDPP - - # Get all distances in the system - dist0 = images[0].get_all_distances(mic=mic) - # Calculate the differences in the distances in the system for IDPP - dist = (images[-1].get_all_distances(mic=mic) - dist0) / float( - len(images) - 1 - ) - # Use IDPP as calculator - new_images = [] - for i in range(len(images)): - image = images[i].copy() - target = dist0 + i * dist - image.calc = IDPP(target=target, mic=mic) - new_images.append(image) - # Make default NEB - neb = ImprovedTangentNEB(new_images) - # Set local optimizer arguments - local_kwargs_default = dict(trajectory="idpp.traj", logfile="idpp.log") - if isinstance(local_opt, FIRE): - local_kwargs_default.update( - dict(dt=0.05, a=1.0, astart=1.0, fa=0.999, maxstep=0.2) - ) - local_kwargs_default.update(local_kwargs) - # Optimize NEB path with IDPP - with local_opt(neb, **local_kwargs_default) as opt: - opt.run(fmax=fmax, steps=steps) - return new_images - - -def make_rep_interpolation( - images, - mic=False, - fmax=1.0, - steps=100, - local_opt=FIRE, - local_kwargs={}, - **kwargs, -): - """ - Make a repulsive potential to get the interpolation from NEB optimization. - """ - from .improvedneb import ImprovedTangentNEB - from ...regression.gp.baseline import RepulsionCalculator - - # Use Repulsive potential as calculator - new_images = [] - for i in range(len(images)): - image = images[i].copy() - image.calc = RepulsionCalculator(power=10, mic=mic) - new_images.append(image) - # Make default NEB - neb = ImprovedTangentNEB(new_images) - # Set local optimizer arguments - local_kwargs_default = dict(trajectory="rep.traj", logfile="rep.log") - if isinstance(local_opt, FIRE): - local_kwargs_default.update( - dict(dt=0.05, a=1.0, astart=1.0, fa=0.999, maxstep=0.2) - ) - local_kwargs_default.update(local_kwargs) - # Optimize NEB path with repulsive potential - with local_opt(neb, **local_kwargs_default) as opt: - opt.run(fmax=fmax, steps=steps) - return new_images - - -def make_end_interpolations(images, mic=False, trust_dist=0.2, **kwargs): - """ - Make the linear interpolation from initial to final state, - but place the images at the initial and final states with - the maximum distance as trust_dist. - """ - from ase.geometry import find_mic - - # Get the number of images - n_images = len(images) - # Get the position of initial state - pos0 = images[0].get_positions() - # Get the distance to the final state - dist = images[-1].get_positions() - pos0 - # Calculate the minimum-image convention if mic=True - if mic: - dist = find_mic(dist, images[0].get_cell(), images[0].pbc)[0] - # Calculate the scaled distance - scale_dist = 2.0 * trust_dist / np.linalg.norm(dist) - # Check if the distance is within the trust distance - if scale_dist >= 1.0: - # Calculate the distance moved for each image - dist = dist / float(n_images - 1) - # Set the positions - for i in range(1, n_images - 1): - images[i].set_positions(pos0 + (i * dist)) - return images - # Calculate the distance moved for each image - dist = dist * (scale_dist / float(n_images - 1)) - # Get the position of final state - posn = images[-1].get_positions() - # Set the positions - nfirst = int(0.5 * (n_images - 1)) - for i in range(1, n_images - 1): - if i <= nfirst: - images[i].set_positions(pos0 + (i * dist)) - else: - images[i].set_positions(posn - ((n_images - 1 - i) * dist)) - return images - - -def get_images_distance(images): - "Get the cumulative distacnce of the images." - dis = 0.0 - for i in range(len(images) - 1): - dis += np.linalg.norm( - images[i + 1].get_positions() - images[i].get_positions() - ) - return dis diff --git a/catlearn/optimize/neb/maxewneb.py b/catlearn/optimize/neb/maxewneb.py deleted file mode 100644 index 8b56699e..00000000 --- a/catlearn/optimize/neb/maxewneb.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -from .improvedneb import ImprovedTangentNEB - - -class MaxEWNEB(ImprovedTangentNEB): - def __init__( - self, - images, - k=0.1, - kl_scale=0.1, - dE=0.01, - climb=False, - remove_rotation_and_translation=False, - mic=True, - **kwargs - ): - super().__init__( - images, - k=k, - climb=climb, - remove_rotation_and_translation=remove_rotation_and_translation, - mic=mic, - **kwargs - ) - self.kl_scale = kl_scale - self.dE = dE - - def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): - energies = self.get_energies() - emax = np.max(energies) - e0 = emax - self.dE - k_l = self.k * self.kl_scale - if e0 < emax: - a = (emax - energies[:-1]) / (emax - e0) - k = np.where(a < 1.0, (1.0 - a) * self.k + a * k_l, k_l) - else: - k = k_l.copy() - forces_parallel = (k[1:] * np.linalg.norm(pos_p, axis=(1, 2))) - ( - k[:-1] * np.linalg.norm(pos_m, axis=(1, 2)) - ) - forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent - return forces_parallel diff --git a/catlearn/optimize/neb/nebimage.py b/catlearn/optimize/neb/nebimage.py deleted file mode 100644 index 5cc14d48..00000000 --- a/catlearn/optimize/neb/nebimage.py +++ /dev/null @@ -1,104 +0,0 @@ -from ...regression.gp.calculator.copy_atoms import copy_atoms - - -class NEBImage: - def __init__(self, atoms): - """ - An image for NEB as a wrapper for the Atoms instance. - The calculated results are stored within so multiple - calculations can be avoided. - - Parameters: - atoms : Atoms instance. - The Atoms instance with a calculator. - """ - self.atoms = atoms - self.cell = self.atoms.cell - self.pbc = self.atoms.pbc - self.reset() - - def get_positions(self, *args, **kwargs): - return self.atoms.get_positions(*args, **kwargs) - - def set_positions(self, *args, **kwargs): - output = self.atoms.set_positions(*args, **kwargs) - self.reset() - return output - - def get_property(self, name, allow_calculation=True, **kwargs): - """ - Get or calculate the requested property. - - Parameters: - name : str - The name of the requested property. - allow_calculation : bool - Whether the property is allowed to be calculated. - - Returns: - float or list: The requested property. - """ - if (self.atoms_saved.calc is not None) and ( - name in self.atoms_saved.calc.results - ): - return self.atoms_saved.calc.get_property( - name, allow_calculation=True, **kwargs - ) - output = self.atoms.calc.get_property( - name, - atoms=self.atoms, - allow_calculation=allow_calculation, - **kwargs - ) - self.store_results() - return output - - def get_potential_energy(self, *args, **kwargs): - if (self.atoms_saved.calc is not None) and ( - "energy" in self.atoms_saved.calc.results - ): - return self.atoms_saved.get_potential_energy(*args, **kwargs) - energy = self.atoms.get_potential_energy(*args, **kwargs) - self.store_results() - return energy - - def get_forces(self, *args, **kwargs): - if (self.atoms_saved.calc is not None) and ( - "force" in self.atoms_saved.calc.results - ): - return self.atoms_saved.get_forces(*args, **kwargs) - force = self.atoms.get_forces(*args, **kwargs) - self.store_results() - return force - - def get_atomic_numbers(self): - return self.atoms.get_atomic_numbers() - - def get_cell(self): - return self.atoms.get_cell() - - def get_tags(self): - return self.atoms.get_tags() - - def store_results(self, **kwargs): - """ - Store the calculated results. - """ - self.atoms_saved = copy_atoms(self.atoms) - self.calc = self.atoms_saved.calc - return self.atoms_saved - - def reset(self, **kwargs): - """ - Reset the stored properties. - """ - self.atoms_saved = self.atoms.copy() - self.calc = None - return self - - def __len__(self): - return len(self.atoms) - - def copy(self): - "Copy and get the Atoms instance." - return self.atoms.copy() diff --git a/catlearn/optimize/neb/orgneb.py b/catlearn/optimize/neb/orgneb.py deleted file mode 100644 index 7cb33154..00000000 --- a/catlearn/optimize/neb/orgneb.py +++ /dev/null @@ -1,303 +0,0 @@ -import numpy as np -from ase.calculators.singlepoint import SinglePointCalculator -from ase.build import minimize_rotation_and_translation -from ...regression.gp.fingerprint.geometry import mic_distance - - -class OriginalNEB: - def __init__( - self, - images, - k=0.1, - climb=False, - remove_rotation_and_translation=False, - mic=True, - **kwargs - ): - """ - The orginal Nudged Elastic Band method implementation for the tangent - and parallel force. - - Parameters: - images : List of ASE Atoms instances - The ASE Atoms instances used as the images of the initial path - that is optimized. - k : List of floats or float - The (Nimg-1) spring forces acting between each image. - climb : bool - Whether to use climbing image in the NEB. - remove_rotation_and_translation : bool - Whether to remove rotation and translation in interpolation - and when predicting forces. - mic : bool - Minimum Image Convention (Shortest distances when - periodic boundary conditions are used). - - """ - self.images = images - self.nimages = len(images) - self.natoms = len(images[0]) - if isinstance(k, (int, float)): - self.k = np.full(self.nimages - 1, k) - else: - self.k = k.copy() - self.climb = climb - self.rm_rot_trans = remove_rotation_and_translation - self.mic = mic - self.reset() - - def interpolate(self, method="linear", mic=True, **kwargs): - """ - Make an interpolation between the start and end structure. - - Parameters: - method : str - The method used for performing the interpolation. - The optional methods is {linear, idpp, ends}. - mic : bool - Whether to use the minimum-image convention. - - Returns: - self: The instance itself. - """ - from .interpolate_band import interpolate - - self.images = interpolate( - self.images[0].copy(), - self.images[-1].copy(), - n_images=self.nimages, - method=method, - mic=mic, - remove_rotation_and_translation=self.rm_rot_trans, - **kwargs - ) - return self - - def get_positions(self): - """ - Get the positions of all the moving images in one array. - - Returns: - ((Nimg-2)*Natoms,3) array: Coordinates of all atoms in - all the moving images. - """ - return np.array( - [image.get_positions() for image in self.images[1:-1]] - ).reshape(-1, 3) - - def set_positions(self, positions, **kwargs): - """ - Set the positions of all the images in one array. - - Parameters: - positions : ((Nimg-2)*Natoms,3) array - Coordinates of all atoms in all the moving images. - """ - self.reset() - for i, image in enumerate(self.images[1:-1]): - image.set_positions( - positions[i * self.natoms : (i + 1) * self.natoms] - ) - pass - - def get_potential_energy(self, **kwargs): - """ - Get the potential energy of the NEB as the sum of energies. - - Returns: - float: Sum of energies of moving images. - """ - return np.sum(self.get_energies(**kwargs)[1:-1]) - - def get_forces(self, **kwargs): - """ - Get the forces of the NEB as the stacked forces of the moving images. - - Returns: - ((Nimg-2)*Natoms,3) array: Forces of all the atoms in - all the moving images. - """ - # Remove rotation and translation - if self.rm_rot_trans: - for i in range(1, self.nimages): - minimize_rotation_and_translation( - self.images[i - 1], - self.images[i], - ) - # Get the forces for each image - forces = self.calculate_forces() - # Get change in the coordinates to the previous and later image - position_plus, position_minus = self.get_position_diff() - # Calculate the tangent to the moving images - tangent = self.get_tangent(position_plus, position_minus) - # Calculate the parallel forces between images - parallel_forces = self.get_parallel_forces( - tangent, - position_plus, - position_minus, - ) - # Calculate the perpendicular forces - perpendicular_forces = self.get_perpendicular_forces(tangent, forces) - # Calculate the full force - forces_new = parallel_forces + perpendicular_forces - # Calculate the force of the climbing image - if self.climb: - i_max = np.argmax(self.get_energies()[1:-1]) - forces_new[i_max] = forces[i_max] - ( - (2.0 * np.vdot(forces[i_max], tangent[i_max])) * tangent[i_max] - ) - return forces_new.reshape(-1, 3) - - def get_image_positions(self): - """ - Get the positions of the images. - - Returns: - ((Nimg),Natoms,3) array: The positions for all atoms in - all the images. - """ - return np.array([image.get_positions() for image in self.images]) - - def calculate_forces(self, **kwargs): - "Calculate the forces for all the images separately." - if self.real_forces is None: - self.calculate_properties() - return self.real_forces[1:-1].copy() - - def get_energies(self, **kwargs): - "Get the individual energy for each image." - if self.energies is None: - self.calculate_properties() - return self.energies - - def calculate_properties(self, **kwargs): - "Calculate the energy and forces for each image." - self.real_forces = np.zeros((self.nimages, self.natoms, 3)) - self.energies = np.zeros((self.nimages)) - for i, image in enumerate(self.images): - if (not i == 0) or (not i == self.nimages - 1): - self.real_forces[i] = image.get_forces().copy() - self.energies[i] = image.get_potential_energy() - return self.energies, self.real_forces - - def emax(self, **kwargs): - "Get maximum energy of the moving images." - return np.nanmax(self.get_energies(**kwargs)[1:-1]) - - def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): - "Get the parallel forces between the images." - forces_parallel = np.array( - [ - np.vdot( - (self.k[i + 1] * pos_p[i]) - (self.k[i] * pos_m[i]), - tangent[i], - ) - for i in range(len(tangent)) - ] - ) - forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent - return forces_parallel - - def get_perpendicular_forces(self, tangent, forces, **kwargs): - "Get the perpendicular forces to the images." - return forces - ( - np.sum(forces * tangent, axis=(1, 2)).reshape(-1, 1, 1) * tangent - ) - - def get_position_diff(self): - """ - Get the change in the coordinates relative to - the previous and later image. - """ - positions = self.get_image_positions() - position_diff = positions[1:] - positions[:-1] - pbc = np.array(self.images[0].get_pbc()) - if self.mic and pbc.any(): - cell = np.array(self.images[0].get_cell()) - position_diff = mic_distance( - position_diff, - cell, - pbc, - vector=True, - )[1] - return position_diff[1:], position_diff[:-1] - - def get_tangent(self, pos_p, pos_m, **kwargs): - "Calculate the tangent to the moving images." - # Normalization - tangent_m = pos_m / ( - np.linalg.norm(pos_m, axis=(1, 2)).reshape(-1, 1, 1) - ) - tangent_p = pos_p / ( - np.linalg.norm(pos_p, axis=(1, 2)).reshape(-1, 1, 1) - ) - # Sum them - tangent = tangent_m + tangent_p - tangent = tangent / np.linalg.norm( - tangent, - axis=(1, 2), - ).reshape(-1, 1, 1) - return tangent - - def reset(self): - "Reset the stored properties." - self.energies = None - self.real_forces = None - return self - - def get_residual(self, **kwargs): - "Get the residual of the NEB." - forces = self.get_forces() - return np.max(np.linalg.norm(forces, axis=-1)) - - def set_calculator(self, calculators): - """ - Set the calculators for all the images. - - Parameters: - calculators : List of ASE Calculators or ASE Calculator - The calculator used for all the images if a list is given. - If a single calculator is given, it is used for all images. - """ - if isinstance(calculators, (list, tuple)): - if len(calculators) != self.nimages - 2: - raise Exception( - "The number of calculators must be " - "equal to the number of moving images." - ) - for i, image in enumerate(self.images[1:-1]): - image.calc = calculators[i] - else: - for image in self.images[1:-1]: - image.calc = calculators - return self - - def converged(self, forces, fmax): - return np.linalg.norm(forces, axis=1).max() < fmax - - def is_neb(self): - return True - - def __ase_optimizable__(self): - return self - - def __len__(self): - return int(self.nimages - 2) * self.natoms - - def freeze_results_on_image(self, atoms, **results_to_include): - atoms.calc = SinglePointCalculator(atoms=atoms, **results_to_include) - return atoms - - def iterimages(self): - # Allows trajectory to convert NEB into several images - for i, atoms in enumerate(self.images): - if i == 0 or i == self.nimages - 1: - yield atoms - else: - atoms = atoms.copy() - atoms = self.freeze_results_on_image( - atoms, - energy=self.energies[i], - forces=self.real_forces[i], - ) - yield atoms diff --git a/catlearn/optimizer/__init__.py b/catlearn/optimizer/__init__.py new file mode 100644 index 00000000..33e4f527 --- /dev/null +++ b/catlearn/optimizer/__init__.py @@ -0,0 +1,20 @@ +from .method import OptimizerMethod +from .local import LocalOptimizer +from .localneb import LocalNEB +from .localcineb import LocalCINEB +from .adsorption import AdsorptionOptimizer +from .randomadsorption import RandomAdsorptionOptimizer +from .sequential import SequentialOptimizer +from .parallelopt import ParallelOptimizer + + +__all__ = [ + "OptimizerMethod", + "LocalOptimizer", + "LocalNEB", + "LocalCINEB", + "AdsorptionOptimizer", + "RandomAdsorptionOptimizer", + "SequentialOptimizer", + "ParallelOptimizer", +] diff --git a/catlearn/optimizer/adsorption.py b/catlearn/optimizer/adsorption.py new file mode 100644 index 00000000..de7c2b47 --- /dev/null +++ b/catlearn/optimizer/adsorption.py @@ -0,0 +1,561 @@ +from .method import OptimizerMethod +from ase.parallel import world +from ase.constraints import FixAtoms, FixBondLengths +import itertools +from numpy import array, asarray, concatenate, cos, inf, matmul, pi, sin +from numpy.linalg import norm +from scipy import __version__ as scipy_version +from scipy.optimize import dual_annealing + + +class AdsorptionOptimizer(OptimizerMethod): + """ + The AdsorptionOptimizer is used to run a global optimization of + an adsorption on a surface. + A single structure will be created and optimized. + Simulated annealing will be used to global optimize the structure. + The adsorbate is optimized on a surface, where the bond-lengths of the + adsorbate atoms are fixed and the slab atoms are fixed. + The AdsorptionOptimizer is applicable to be used with + active learning. + """ + + def __init__( + self, + slab, + adsorbate, + adsorbate2=None, + bounds=None, + use_initial_struc=False, + opt_kwargs={}, + bond_tol=1e-8, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + slab: Atoms instance + The slab structure. + adsorbate: Atoms instance + The adsorbate structure. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for the global optimization in + form of the simulated annealing. + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + use_initial_struc: bool + If True, the initial structure is used as one of the drawn + structures. + opt_kwargs: dict + The keyword arguments for the simulated annealing optimization. + bond_tol: float + The bond tolerance used for the FixBondLengths. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the verbose + self.verbose = verbose + # Create the atoms object from the slab and adsorbate + self.create_slab_ads(slab, adsorbate, adsorbate2, bond_tol=bond_tol) + # Create the boundary conditions + self.setup_bounds(bounds) + # Set the parameters + self.update_arguments( + use_initial_struc=use_initial_struc, + opt_kwargs=opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + # Make initial optimizable structure + self.make_initial_structure() + + def get_structures( + self, + get_all=True, + properties=[], + allow_calculation=True, + **kwargs, + ): + structures = self.copy_atoms( + self.optimizable, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + structures.set_constraint(self.constraints_org) + return structures + + def create_slab_ads( + self, + slab, + adsorbate, + adsorbate2=None, + bond_tol=1e-8, + **kwargs, + ): + """ + Create the structure for the adsorption optimization. + + Parameters: + slab: Atoms object + The slab structure. + adsorbate: Atoms object + The adsorbate structure. + adsorbate2: Atoms object (optional) + The second adsorbate structure. + bond_tol: float + The bond tolerance used for the FixBondLengths. + + Returns: + self: object + The object itself. + """ + # Check the slab and adsorbate are given + if slab is None or adsorbate is None: + raise ValueError("The slab and adsorbate must be given!") + # Save the bond length tolerance + self.bond_tol = float(bond_tol) + # Setup the slab + self.n_slab = len(slab) + self.slab = slab.copy() + self.slab.set_tags(0) + optimizable = self.slab.copy() + # Setup the adsorbate + self.n_ads = len(adsorbate) + self.adsorbate = adsorbate.copy() + self.adsorbate.set_tags(1) + self.adsorbate.cell = optimizable.cell.copy() + self.adsorbate.pbc = optimizable.pbc.copy() + pos_ads = self.adsorbate.get_positions() + pos_ads -= pos_ads.mean(axis=0) + self.adsorbate.set_positions(pos_ads) + optimizable.extend(self.adsorbate.copy()) + # Setup the adsorbate2 + if adsorbate2 is not None: + self.n_ads2 = len(adsorbate2) + self.adsorbate2 = adsorbate2.copy() + self.adsorbate2.set_tags(2) + self.adsorbate2.cell = optimizable.cell.copy() + self.adsorbate2.pbc = optimizable.pbc.copy() + pos_ads2 = self.adsorbate2.get_positions() + pos_ads2 -= pos_ads2.mean(axis=0) + self.adsorbate2.set_positions(pos_ads2) + optimizable.extend(self.adsorbate2.copy()) + else: + self.n_ads2 = 0 + self.adsorbate2 = None + # Get the full number of atoms + self.natoms = len(optimizable) + # Store the positions and cell + self.positions0 = optimizable.get_positions().copy() + self.cell = array(optimizable.get_cell()) + # Store the original constraints + self.constraints_org = [c.copy() for c in optimizable.constraints] + # Make constraints for optimization + self.constraints_used = [FixAtoms(indices=list(range(self.n_slab)))] + self.constraints_new = [FixAtoms(indices=list(range(self.n_slab)))] + if self.n_ads > 1: + # Get the fixed bond length pairs + pairs = itertools.combinations( + range(self.n_slab, self.n_slab + self.n_ads), + 2, + ) + pairs = asarray(list(pairs)) + # Get the bond lengths + bondlengths = norm( + self.positions0[pairs[:, 0]] - self.positions0[pairs[:, 1]], + axis=1, + ) + # Add the constraints + self.constraints_new.append( + FixBondLengths( + pairs=pairs, + tolerance=self.bond_tol, + bondlengths=bondlengths, + ) + ) + if self.n_ads2 > 1: + # Get the fixed bond length pairs + pairs = itertools.combinations( + range(self.n_slab + self.n_ads, self.natoms), + 2, + ) + pairs = asarray(list(pairs)) + # Get the bond lengths + bondlengths = norm( + self.positions0[pairs[:, 0]] - self.positions0[pairs[:, 1]], + axis=1, + ) + # Add the constraints + self.constraints_new.append( + FixBondLengths( + pairs=pairs, + tolerance=self.bond_tol, + bondlengths=bondlengths, + ) + ) + optimizable.set_constraint(self.constraints_new) + # Setup the optimizable structure + self.setup_optimizable(optimizable) + return self + + def setup_bounds(self, bounds=None): + """ + Setup the boundary conditions for the global optimization. + + Parameters: + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for the global optimization in + form of the simulated annealing. + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + + Returns: + self: object + The object itself. + """ + # Check the bounds are given + if bounds is None: + # Make default bounds + self.bounds = asarray( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.0, 1.0], + [0.0, 2.0 * pi], + [0.0, 2.0 * pi], + [0.0, 2.0 * pi], + ] + ) + else: + self.bounds = bounds.copy() + # Check the bounds have the correct shape + if self.n_ads2 == 0 and self.bounds.shape != (6, 2): + raise ValueError("The bounds must have shape (6,2)!") + elif self.n_ads2 > 0 and not ( + self.bounds.shape == (6, 2) or self.bounds.shape == (12, 2) + ): + raise ValueError("The bounds must have shape (6,2) or (12,2)!") + # Check if the bounds are for two adsorbates + if self.n_ads2 > 0 and self.bounds.shape[0] == 6: + self.bounds = concatenate([self.bounds, self.bounds], axis=0) + return self + + def run( + self, + fmax=0.05, + steps=1000000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + # Check if the optimization can take any steps + if steps <= 2: + return self._converged + # Use original constraints + self.optimizable.set_constraint(self.constraints_used) + # Initialize the best energy and position + self.best_energy = inf + self.best_pos = None + self.best_energy_no_crit = inf + self.best_pos_no_crit = None + # Calculate the energy of the initial structure if used + if self.use_initial_struc: + # Get the energy of the structure + e = self.optimizable.get_potential_energy() + # Check if the energy is lower than the best energy + self.check_best_structure( + e=e, + pos=self.optimizable.get_positions(), + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + steps -= 1 + # Perform the simulated annealing + dual_annealing( + self.evaluate_value, + args=(max_unc, dtrust), + bounds=self.bounds, + maxfun=steps - 1, + **self.opt_kwargs, + ) + # Return the best position and number of steps + if self.best_energy == inf: + self.message( + "Uncertainty or trust distance is above the maximum allowed." + ) + self.best_pos = self.best_pos_no_crit.copy() + # Set the positions + self.optimizable.set_positions(self.best_pos) + # Get the potential energy + e = self.optimizable.get_potential_energy() + # Set the new constraints + self.optimizable.set_constraint(self.constraints_new) + # Calculate the maximum force to check convergence + if fmax > self.get_fmax(): + # Check if the optimization is converged + self._converged = self.check_convergence( + converged=True, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + ) + return self._converged + + def is_energy_minimized(self): + return True + + def is_parallel_allowed(self): + return False + + def update_arguments( + self, + slab=None, + adsorbate=None, + adsorbate2=None, + bounds=None, + use_initial_struc=None, + opt_kwargs=None, + bond_tol=None, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + slab: Atoms instance + The slab structure. + adsorbate: Atoms instance + The adsorbate structure. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for the global optimization in + form of the simulated annealing. + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + use_initial_struc: bool + If True, the initial structure is used as one of the drawn + structures. + opt_kwargs: dict + The keyword arguments for the simulated annealing optimization. + bond_tol: float + The bond tolerance used for the FixBondLengths. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the optimizer kwargs + if opt_kwargs is not None: + self.opt_kwargs = opt_kwargs.copy() + if bond_tol is not None: + self.bond_tol = float(bond_tol) + # Set the parameters in the parent class + super().update_arguments( + optimizable=None, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Create the atoms object from the slab and adsorbate + if slab is not None or adsorbate is not None or adsorbate2 is not None: + if slab is None: + slab = self.slab.copy() + if adsorbate is None: + adsorbate = self.adsorbate.copy() + if adsorbate2 is None and self.adsorbate2 is not None: + adsorbate2 = self.adsorbate2.copy() + self.create_slab_ads( + slab, + adsorbate, + adsorbate2, + bond_tol=self.bond_tol, + ) + # Create the boundary conditions + if bounds is not None: + self.setup_bounds(bounds) + if use_initial_struc is not None: + self.use_initial_struc = use_initial_struc + return self + + def set_seed(self, seed=None): + super().set_seed(seed) + # Set the seed for the random number generator + if scipy_version < "1.15": + self.opt_kwargs["seed"] = self.seed + else: + self.opt_kwargs["rng"] = self.rng + return self + + def rotation_matrix(self, angles, positions): + "Rotate the adsorbate" + # Get the angles + theta1, theta2, theta3 = angles + # Calculate the trigonometric functions + cos1 = cos(theta1) + sin1 = sin(theta1) + cos2 = cos(theta2) + sin2 = sin(theta2) + cos3 = cos(theta3) + sin3 = sin(theta3) + # Calculate the full rotation matrix + R = asarray( + [ + [cos2 * cos3, cos2 * sin3, -sin2], + [ + sin1 * sin2 * cos3 - cos1 * sin3, + sin1 * sin2 * sin3 + cos1 * cos3, + sin1 * cos2, + ], + [ + cos1 * sin2 * cos3 + sin1 * sin3, + cos1 * sin2 * sin3 - sin1 * cos3, + cos1 * cos2, + ], + ] + ) + # Calculate the rotation of the positions + positions = matmul(positions, R) + return positions + + def get_new_positions(self, x, **kwargs): + "Get the new positions of the adsorbate." + # Get the positions + pos = self.positions0.copy() + # Calculate the positions of the adsorbate + n_slab = self.n_slab + n_all = self.n_slab + self.n_ads + pos_ads = pos[n_slab:n_all] + pos_ads = self.rotation_matrix(x[3:6], pos_ads) + pos_ads += (self.cell * x[:3].reshape(-1, 1)).sum(axis=0) + pos[n_slab:n_all] = pos_ads + # Calculate the positions of the second adsorbate + if self.n_ads2 > 0: + pos_ads2 = pos[n_all:] + pos_ads2 = self.rotation_matrix(x[9:12], pos_ads2) + pos_ads2 += (self.cell * x[6:9].reshape(-1, 1)).sum(axis=0) + pos[n_all:] = pos_ads2 + return pos + + def evaluate_value(self, x, max_unc=None, dtrust=None, **kwargs): + "Evaluate the value of the adsorption." + # Get the new positions of the adsorption + pos = self.get_new_positions(x, **kwargs) + # Set the positions + self.optimizable.set_positions(pos) + # Get the potential energy + e = self.optimizable.get_potential_energy() + # Check if the energy is lower than the best energy + self.check_best_structure( + e=e, + pos=pos, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + return e + + def check_best_structure( + self, + e, + pos, + max_unc=None, + dtrust=None, + **kwargs, + ): + "Check if the structure is the best one." + # Check if the energy is lower than the best energy + if e < self.best_energy: + # Update the best energy and position without criteria + if e < self.best_energy_no_crit: + self.best_energy_no_crit = e + self.best_pos_no_crit = pos.copy() + # Check if criteria are met + is_within_crit = True + # Check if the uncertainty is above the maximum allowed + if max_unc is not None: + unc = self.get_uncertainty() + if unc > max_unc: + is_within_crit = False + # Check if the structures are within the trust distance + if dtrust is not None: + within_dtrust = self.is_within_dtrust(dtrust=dtrust) + if not within_dtrust: + is_within_crit = False + # Update the best energy and position + if is_within_crit: + self.best_energy = e + self.best_pos = pos.copy() + return self.best_energy, self.best_pos + + def make_initial_structure(self, **kwargs): + "Get the initial structure for the optimization." + # Draw a random structure + x = self.rng.uniform(low=self.bounds[:, 0], high=self.bounds[:, 1]) + # Get the new positions of the adsorption + pos = self.get_new_positions(x, **kwargs) + # Set the positions + self.optimizable.set_positions(pos) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + slab=self.slab, + adsorbate=self.adsorbate, + adsorbate2=self.adsorbate2, + bounds=self.bounds, + use_initial_struc=self.use_initial_struc, + opt_kwargs=self.opt_kwargs, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/local.py b/catlearn/optimizer/local.py new file mode 100644 index 00000000..ae050f22 --- /dev/null +++ b/catlearn/optimizer/local.py @@ -0,0 +1,322 @@ +from .method import OptimizerMethod +from ase import __version__ as ase_version +from ase.parallel import world +from ase.optimize import FIRE +from numpy import isnan + + +class LocalOptimizer(OptimizerMethod): + """ + The LocalOptimizer is used to run a local optimization on + a given structure. + The LocalOptimizer is applicable to be used with active learning. + """ + + def __init__( + self, + optimizable, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + optimizable: Atoms instance + The instance to be optimized. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters + self.update_arguments( + optimizable=optimizable, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def run( + self, + fmax=0.05, + steps=1000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + # Check if the optimization can take any steps + if steps <= 0: + return self._converged + # Run the local optimization + converged, _ = self.local_optimize( + atoms=self.optimizable, + fmax=fmax, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Check if the optimization is converged + self._converged = self.check_convergence( + converged=converged, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + ) + # Return whether the optimization is converged + return self._converged + + def local_optimize( + self, + atoms, + fmax=0.05, + steps=1000, + max_unc=None, + dtrust=None, + **kwargs, + ): + """ + Run the local optimization on the given atoms. + + Parameters: + atoms: Atoms instance + The atoms to be optimized. + fmax: float + The maximum force allowed on an atom. + steps: int + The maximum number of steps allowed. + max_unc: float + The maximum uncertainty allowed on a structure. + dtrust: float + The distance trust criterion. + + Returns: + converged: bool + Whether the optimization is converged. + used_steps: int + The number of steps used in the optimization. + """ + # Set the initialization parameters + converged = False + used_steps = 0 + # Run the local optimization + with self.local_opt(atoms, **self.local_opt_kwargs) as optimizer: + if max_unc is None and dtrust is None: + optimizer.run(fmax=fmax, steps=steps) + forces = atoms.get_forces() + converged = self.is_fmax_converged(forces, fmax=fmax) + self.steps += optimizer.get_number_of_steps() + else: + converged = self.run_max_unc( + optimizer=optimizer, + atoms=atoms, + fmax=fmax, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Get the number of steps used in the optimization + used_steps = optimizer.get_number_of_steps() + return converged, used_steps + + def run_max_unc( + self, + optimizer, + atoms, + fmax=0.05, + steps=1000, + max_unc=None, + dtrust=None, + **kwargs, + ): + """ + Run the optimization with a maximum uncertainty. + + Parameters: + optimizer: ASE optimizer object + The optimizer object. + atoms: Atoms instance + The atoms to be optimized. + fmax: float + The maximum force allowed on an atom. + steps: int + The maximum number of steps allowed. + max_unc: float + The maximum uncertainty allowed on a structure. + dtrust: float + The distance trust criterion. + + Returns: + converged: bool + Whether the optimization is converged. + """ + # Set the converged parameter + converged = False + # Make a copy of the atoms + while self.steps < steps: + # Check if the maximum number of steps is reached + if self.steps >= steps: + self.message("The maximum number of steps is reached.") + break + # Run a local optimization step + _converged = self.run_max_unc_step( + optimizer, + atoms=atoms, + fmax=fmax, + **kwargs, + ) + # Check if the uncertainty is above the maximum allowed + if max_unc is not None: + # Get the uncertainty of the atoms + unc = self.get_uncertainty() + if unc > max_unc: + self.message("Uncertainty is above the maximum allowed.") + break + # Check if the structures are within the trust distance + if dtrust is not None: + within_dtrust = self.is_within_dtrust(dtrust=dtrust) + if not within_dtrust: + self.message("Outside of the trust distance.") + break + # Check if there is a problem with the calculation + energy = self.get_potential_energy() + if isnan(energy): + self.message("The energy is NaN.") + break + # Check if the optimization is converged + if _converged: + converged = True + break + return converged + + def setup_local_optimizer(self, local_opt=FIRE, local_opt_kwargs={}): + """ + Setup the local optimizer. + + Parameters: + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + """ + self.local_opt_kwargs = dict() + if not self.verbose: + self.local_opt_kwargs["logfile"] = None + if issubclass(local_opt, FIRE): + self.local_opt_kwargs.update( + dict(dt=0.05, maxstep=0.2, a=1.0, astart=1.0, fa=0.999) + ) + self.local_opt = local_opt + self.local_opt_kwargs.update(local_opt_kwargs) + return self + + def is_energy_minimized(self): + return True + + def is_parallel_allowed(self): + return False + + def update_arguments( + self, + optimizable=None, + local_opt=None, + local_opt_kwargs={}, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + optimizable: Atoms instance + The instance to be optimized. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters in the parent class + super().update_arguments( + optimizable=optimizable, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Set the local optimizer + if local_opt is not None and local_opt_kwargs is not None: + self.setup_local_optimizer(local_opt, local_opt_kwargs) + elif local_opt is not None: + self.setup_local_optimizer(self.local_opt) + elif local_opt_kwargs is not None: + self.setup_local_optimizer(self.local_opt, local_opt_kwargs) + return self + + def run_max_unc_step(self, optimizer, atoms, fmax=0.05, **kwargs): + """ + Run a local optimization step. + The ASE optimizer is dependent on the ASE version. + """ + if ase_version >= "3.23": + optimizer.run(fmax=fmax, steps=1, **kwargs) + else: + optimizer.run(fmax=fmax, steps=self.steps + 1, **kwargs) + self.steps += 1 + forces = atoms.get_forces() + return self.is_fmax_converged(forces, fmax=fmax) + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + optimizable=self.optimizable, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/localcineb.py b/catlearn/optimizer/localcineb.py new file mode 100644 index 00000000..9e93ea94 --- /dev/null +++ b/catlearn/optimizer/localcineb.py @@ -0,0 +1,371 @@ +from ase.parallel import world +from ase.io import read +from ase.optimize import FIRE +from .localneb import LocalNEB +from .sequential import SequentialOptimizer +from ..structures.neb import ( + AvgEWNEB, + EWNEB, + ImprovedTangentNEB, + OriginalNEB, + make_interpolation, +) + + +class LocalCINEB(SequentialOptimizer): + """ + The LocalCINEB is used to run a local optimization of NEB. + First, the NEB is run without climbing image. + Then, the climbing image is started from the converged + non-climbing images if clim=True. + The LocalCINEB is applicable to be used with active learning. + """ + + def __init__( + self, + start, + end, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + n_images=15, + climb=True, + neb_interpolation="linear", + neb_interpolation_kwargs={}, + start_without_ci=True, + reuse_ci_path=False, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + start: Atoms instance or ASE Trajectory file. + The Atoms must have the calculator attached with energy. + Initial end-point of the NEB path. + end: Atoms instance or ASE Trajectory file. + The Atoms must have the calculator attached with energy. + Final end-point of the NEB path. + neb_method: NEB class object or str + The NEB implemented class object used for the ML-NEB. + A string can be used to select: + - 'improvedtangentneb' (default) + - 'ewneb' + - 'avgewneb' + neb_kwargs: dict + A dictionary with the arguments used in the NEB object + to create the instance. + Climb and images must not be included. + n_images: int + Number of images of the path (if not included a path before). + The number of images include the 2 end-points of the NEB path. + climb: bool + Whether to use the climbing image in the NEB. + It is strongly recommended to have climb=True. + neb_interpolation: str or list of ASE Atoms or ASE Trajectory file + The interpolation method used to create the NEB path. + The string can be: + - 'linear' (default) + - 'idpp' + - 'rep' + - 'born' + - 'ends' + Otherwise, the premade images can be given as a list of + ASE Atoms. + A string of the ASE Trajectory file that contains the images + can also be given. + neb_interpolation_kwargs: dict + The keyword arguments for the interpolation method. + It is only used when the interpolation method is a string. + start_without_ci: bool + Whether to start the NEB without the climbing image. + If True, the NEB path will be optimized without + the climbing image and afterwards climbing image is used + if climb=True as well. + If False, the NEB path will be optimized with the climbing + image if climb=True as well. + reuse_ci_path: bool + Whether to remove the non-climbing image method when the NEB + without climbing image is converged. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the verbose + self.verbose = verbose + # Save the end points for creating the NEB + self.setup_endpoints(start, end) + # Build the optimizer methods and NEB within + methods = self.build_method( + neb_method, + neb_kwargs=neb_kwargs, + climb=climb, + n_images=n_images, + neb_interpolation=neb_interpolation, + neb_interpolation_kwargs=neb_interpolation_kwargs, + start_without_ci=start_without_ci, + reuse_ci_path=reuse_ci_path, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + # Set the parameters + self.update_arguments( + methods=methods, + remove_methods=reuse_ci_path, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def setup_endpoints(self, start, end, **kwargs): + """ + Setup the start and end points for the NEB calculation. + """ + # Load the start and end points from trajectory files + if isinstance(start, str): + start = read(start) + if isinstance(end, str): + end = read(end) + # Save the start point with calculators + start.get_forces() + self.start = self.copy_atoms( + start, + properties=["forces", "energy"], + allow_calculation=True, + **kwargs, + ) + # Save the end point with calculators + end.get_forces() + self.end = self.copy_atoms( + end, + properties=["forces", "energy"], + allow_calculation=True, + **kwargs, + ) + return self + + def setup_neb( + self, + neb_method, + neb_kwargs={}, + climb=True, + n_images=15, + k=3.0, + remove_rotation_and_translation=False, + mic=True, + neb_interpolation="linear", + neb_interpolation_kwargs={}, + parallel=False, + comm=None, + seed=None, + **kwargs, + ): + """ + Setup the NEB instance. + """ + # Create the neb method if it is a string + if neb_method is None: + neb_method = ImprovedTangentNEB + elif isinstance(neb_method, str): + if neb_method.lower() == "improvedtangentneb": + neb_method = ImprovedTangentNEB + elif neb_method.lower() == "ewneb": + neb_method = EWNEB + elif neb_method.lower() == "avgewneb": + neb_method = AvgEWNEB + else: + raise ValueError( + "The NEB method {} is not implemented.".format(neb_method) + ) + self.neb_method = neb_method + # Create default dictionary for creating the NEB + self.neb_kwargs = dict( + k=k, + remove_rotation_and_translation=remove_rotation_and_translation, + parallel=parallel, + ) + if isinstance(neb_method, str) or issubclass(neb_method, OriginalNEB): + self.neb_kwargs.update( + dict( + use_image_permutation=True, + save_properties=True, + mic=mic, + comm=comm, + ) + ) + else: + self.neb_kwargs.update(dict(world=comm)) + # Save the dictionary for creating the NEB + self.neb_kwargs.update(neb_kwargs) + # Save the number of images + self.n_images = n_images + # Save the instances for creating the NEB interpolation + self.neb_interpolation = neb_interpolation + # Create default dictionary for creating the NEB interpolation + self.neb_interpolation_kwargs = dict( + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + seed=seed, + ) + # Save the dictionary for creating the NEB interpolation + self.neb_interpolation_kwargs.update(neb_interpolation_kwargs) + # Make the images for the NEB from the interpolation + images = make_interpolation( + start=self.start, + end=self.end, + n_images=self.n_images, + method=self.neb_interpolation, + neb_method=neb_method, + neb_kwargs=self.neb_kwargs, + **self.neb_interpolation_kwargs, + ) + # Create the NEB + neb = self.neb_method(images, climb=climb, **self.neb_kwargs) + return neb + + def build_method( + self, + neb_method, + neb_kwargs={}, + climb=True, + n_images=15, + k=3.0, + remove_rotation_and_translation=False, + mic=True, + neb_interpolation="linear", + neb_interpolation_kwargs={}, + start_without_ci=True, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + "Build the optimization method." + # Save the instances for creating the local optimizer + self.local_opt = local_opt + self.local_opt_kwargs = local_opt_kwargs + # Save the instances for creating the NEB + self.climb = climb + self.start_without_ci = start_without_ci + # Check if climb and start_without_ci are compatible + if not start_without_ci and not climb: + self.message( + "If start_without_ci is False, climb must be True!" + "start_without_ci is set to True.", + is_warning=True, + ) + self.start_without_ci = True + # Set the kwargs for setting up the NEB + setup_neb_kwargs = dict( + neb_method=neb_method, + neb_kwargs=neb_kwargs, + n_images=n_images, + k=k, + remove_rotation_and_translation=remove_rotation_and_translation, + mic=mic, + neb_interpolation=neb_interpolation, + neb_interpolation_kwargs=neb_interpolation_kwargs, + parallel=parallel_run, + comm=comm, + seed=seed, + **kwargs, + ) + # Check if the non-climbing image method should be used + if self.start_without_ci: + # Setup NEB without climbing image + neb_noclimb = self.setup_neb( + climb=False, + **setup_neb_kwargs, + ) + # Build the optimizer method without climbing image + method_noclimb = LocalNEB( + neb_noclimb, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + ) + # Return the method without climbing image + methods = [method_noclimb] + if not climb: + return methods + else: + methods = [] + # Setup NEB with climbing image + neb_climb = self.setup_neb( + climb=True, + **setup_neb_kwargs, + ) + # Build the optimizer method with climbing image + method_climb = LocalNEB( + neb_climb, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + ) + # Return the without and with climbing image + methods.append(method_climb) + return methods + + def is_energy_minimized(self): + return self.methods[-1].is_energy_minimized() + + def is_parallel_allowed(self): + return True + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + start=self.start, + end=self.end, + neb_method=self.neb_method, + neb_kwargs=self.neb_kwargs, + n_images=self.n_images, + climb=self.climb, + neb_interpolation=self.neb_interpolation, + neb_interpolation_kwargs=self.neb_interpolation_kwargs, + start_without_ci=self.start_without_ci, + reuse_ci_path=self.remove_methods, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/localneb.py b/catlearn/optimizer/localneb.py new file mode 100644 index 00000000..81163f69 --- /dev/null +++ b/catlearn/optimizer/localneb.py @@ -0,0 +1,246 @@ +from .local import LocalOptimizer +from ase.parallel import world, broadcast +from ase.optimize import FIRE +from numpy import asarray +from ..structures.neb import OriginalNEB + + +class LocalNEB(LocalOptimizer): + """ + The LocalNEB is used to run a local optimization of NEB. + The LocalNEB is applicable to be used with active learning. + """ + + def __init__( + self, + optimizable, + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + optimizable: NEB instance + The NEB object to be optimized. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters + self.update_arguments( + optimizable=optimizable, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def update_optimizable(self, structures, **kwargs): + # Get the positions of the NEB images + positions = [image.get_positions() for image in structures[1:-1]] + positions = asarray(positions).reshape(-1, 3) + # Set the positions of the NEB images + self.optimizable.set_positions(positions) + # Find the minimum path length if possible and requested + if isinstance(self.optimizable, OriginalNEB): + self.optimizable.permute_images() + # Reset the optimization + self.reset_optimization() + return self + + def get_structures( + self, + get_all=True, + properties=[], + allow_calculation=True, + **kwargs, + ): + # Get only the first image + if not get_all: + return self.copy_atoms( + self.optimizable.images[0], + allow_calculation=False, + **kwargs, + ) + # Get all the images + if self.is_parallel_used(): + return self.get_structures_parallel( + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + structures = [ + self.copy_atoms( + self.optimizable.images[0], allow_calculation=False, **kwargs + ) + ] + structures += [ + self.copy_atoms( + image, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + for image in self.optimizable.images[1:-1] + ] + structures += [ + self.copy_atoms( + self.optimizable.images[-1], allow_calculation=False, **kwargs + ) + ] + return structures + + def get_structures_parallel( + self, + properties=[], + allow_calculation=True, + **kwargs, + ): + "Get the structures in parallel." + # Get the initial structure + structures = [ + self.copy_atoms( + self.optimizable.images[0], + allow_calculation=False, + **kwargs, + ) + ] + # Get the moving images in parallel + for i, image in enumerate(self.optimizable.images[1:-1]): + root = i % self.size + if self.rank == root: + image = self.copy_atoms( + image, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + structures.append(broadcast(image, root=root, comm=self.comm)) + # Get the final structure + structures.append( + self.copy_atoms( + self.optimizable.images[-1], + allow_calculation=False, + **kwargs, + ) + ) + return structures + + def get_candidates(self, **kwargs): + return self.optimizable.images[1:-1] + + def set_calculator(self, calculator, copy_calc=False, **kwargs): + if isinstance(calculator, list): + if len(calculator) != len(self.optimizable.images[1:-1]): + raise Exception( + "The number of calculators should be equal to " + "the number of moving images!" + ) + for image, calc in zip(self.optimizable.images[1:-1], calculator): + if copy_calc: + image.calc = calc.copy() + else: + image.calc = calc + image.calc.reset() + else: + for image in self.optimizable.images[1:-1]: + if copy_calc: + image.calc = calculator.copy() + else: + image.calc = calculator + image.calc.reset() + return self + + def get_calculator(self): + return [image.calc for image in self.optimizable.images[1:-1]] + + def is_energy_minimized(self): + return False + + def is_parallel_allowed(self): + return True + + def update_arguments( + self, + optimizable=None, + local_opt=None, + local_opt_kwargs={}, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + optimizable: NEB instance + The NEB object to be optimized. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters in the parent class + super().update_arguments( + optimizable=optimizable, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + optimizable=self.optimizable, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/method.py b/catlearn/optimizer/method.py new file mode 100644 index 00000000..3e03bf8a --- /dev/null +++ b/catlearn/optimizer/method.py @@ -0,0 +1,902 @@ +from numpy import einsum, max as max_, sqrt +from numpy.random import default_rng, Generator, RandomState +from ase.parallel import world, broadcast +import warnings +from ..regression.gp.calculator.copy_atoms import ( + copy_atoms, + StoredDataCalculator, +) +from ..structures.structure import Structure + + +class OptimizerMethod: + """ + The OptimizerMethod class is a base class for all optimization methods. + The OptimizerMethod is used to run an optimization on a given + optimizable. + The OptimizerMethod is applicable to be used with active learning. + """ + + def __init__( + self, + optimizable, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + optimizable: optimizable instance + The instance to be optimized. + Often, an Atoms or NEB instance. + Here, it assumed to be an Atoms instance. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed can also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters + self.update_arguments( + optimizable=optimizable, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def update_optimizable(self, structures, **kwargs): + """ + Update the optimizable instance by given + dependent structures. + + Parameters: + structures: Atoms instance or list of Atoms instances + The structures that the optimizable instance is dependent on. + """ + # Check if the structures are a list + if isinstance(structures, list): + raise NotImplementedError( + "The method does not support multiple structures" + ) + # Update optimizable by setting the positions of the optimizable + self.optimizable.set_positions(structures.get_positions()) + # Reset the optimization + self.reset_optimization() + return self + + def get_optimizable(self, **kwargs): + """ + Get the optimizable that are considered for the optimizer. + + Returns: + optimizable: The optimizable instance. + Often, an Atoms or NEB instance. + """ + return self.optimizable + + def get_structures( + self, + get_all=True, + properties=[], + allow_calculation=True, + **kwargs, + ): + """ + Get the structures that optimizable instance is dependent on. + + Parameters: + get_all: bool + If True, all structures are returned. + Else, only the first structure is returned. + properties: list of str + The names of the requested properties. + If not given, the properties is not calculated. + allow_calculation: bool + Whether the properties are allowed to be calculated. + + Returns: + structures: Atoms instance or list of Atoms instances + The structures that the optimizable instance is dependent on. + """ + return self.copy_atoms( + self.optimizable, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + + def get_candidates(self, **kwargs): + """ + Get the candidate structure instances. + It is used for active learning. + """ + return [self.optimizable] + + def copy_candidates( + self, + properties=["forces", "energy"], + allow_calculation=True, + **kwargs, + ): + """ + Get the candidate structure instances with copied properties. + It is used for active learning. + + Parameters: + properties: list of str + The names of the requested properties. + allow_calculation: bool + Whether the properties are allowed to be calculated. + + Returns: + candidates_copy: list of Atoms instances + The candidates with copied properties. + """ + # Check if the parallelization is used + is_parallel = self.is_parallel_used() + candidates_copy = [] + for i, atoms in enumerate(self.get_candidates()): + # Check the rank of the process + atoms_new = None + root = i % self.size + if not is_parallel or self.rank == root: + # Get the properties of the atoms instance + atoms_new = self.copy_atoms( + atoms=atoms, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + # Broadcast the atoms instance to all processes + if is_parallel: + atoms_new = broadcast(atoms_new, root=root, comm=self.comm) + candidates_copy.append(atoms_new) + return candidates_copy + + def reset_optimization(self): + """ + Reset the optimization. + """ + self.steps = 0 + self._converged = False + return self + + def setup_optimizable(self, optimizable): + """ + Set the optimizable instance. + + Parameters: + optimizable: optimizable instance + The instance to be optimized. + Often, an Atoms or NEB instance. + """ + self.optimizable = optimizable + self.reset_optimization() + return self + + def set_calculator(self, calculator, copy_calc=False, **kwargs): + """ + Set the calculator for the optimizable instance. + + Parameters: + calculator: ASE calculator instance + The calculator to be set. + copy_calc: bool + If True, the calculator will be copied. + """ + if copy_calc: + self.optimizable.calc = calculator.copy() + else: + self.optimizable.calc = calculator + self.optimizable.calc.reset() + return self + + def get_calculator(self): + """ + Get the calculator of the optimizable instance. + """ + return self.optimizable.calc + + @property + def calc(self): + """ + The calculator instance. + """ + return self.get_calculator() + + @calc.setter + def calc(self, calculators): + return self.set_calculator(calculators) + + def get_potential_energy(self, per_candidate=False, **kwargs): + """ + Get the potential energy of the optimizable. + + Parameters: + per_candidate: bool + If True, the potential energy of each candidate is returned. + Else, the potential energy of the optimizable is returned. + + Returns: + energy: float or list + The potential energy of the optimizable. + """ + if per_candidate: + if self.is_parallel_used(): + return self.get_potential_energy_parallel(**kwargs) + energy = [ + atoms.get_potential_energy(**kwargs) + for atoms in self.get_candidates() + ] + else: + energy = self.optimizable.get_potential_energy(**kwargs) + return energy + + def get_potential_energy_parallel(self, **kwargs): + """ + Get the potential energies of the candidates in parallel. + + Returns: + energy: list of floats + The potential energies of the candidates. + """ + energy = [] + for i, atoms in enumerate(self.get_candidates()): + root = i % self.size + e = None + if self.rank == root: + e = atoms.get_potential_energy(**kwargs) + e = broadcast(e, root=root, comm=self.comm) + energy.append(e) + return energy + + def get_forces(self, per_candidate=False, **kwargs): + """ + Get the forces of the optimizable. + + Parameters: + per_candidate: bool + If True, the forces of each candidate is returned. + Else, the forces of the optimizable is returned + + Returns: + force: (N,3) array or list of (N,3) arrays + The forces of the optimizable. + """ + if per_candidate: + if self.is_parallel_used(): + return self.get_forces_parallel(**kwargs) + forces = [ + atoms.get_forces(**kwargs) for atoms in self.get_candidates() + ] + else: + forces = self.optimizable.get_forces(**kwargs) + return forces + + def get_forces_parallel(self, **kwargs): + """ + Get the forces of the candidates in parallel. + + Returns: + forces: list of (N,3) arrays + The forces of the candidates. + """ + forces = [] + for i, atoms in enumerate(self.get_candidates()): + root = i % self.size + f = None + if self.rank == root: + f = atoms.get_forces(**kwargs) + f = broadcast(f, root=root, comm=self.comm) + forces.append(f) + return forces + + def get_fmax(self, per_candidate=False, **kwargs): + """ + Get the maximum force of an atom in the optimizable. + + Parameters: + per_candidate: bool + If True, the maximum force of each candidate is returned. + Else, the maximum force of the optimizable is returned. + + Returns: + fmax: float or list + The maximum force of the optimizable. + """ + forces = self.get_forces(per_candidate=per_candidate, **kwargs) + if per_candidate: + fmax = sqrt(einsum("ijk,ijk->ij", forces, forces)).max(-1) + else: + fmax = sqrt(einsum("ij,ij->i", forces, forces)).max() + return fmax + + def get_uncertainty(self, per_candidate=False, **kwargs): + """ + Get the uncertainty of the optimizable. + It is used for active learning. + + Parameters: + per_candidate: bool + If True, the uncertainty of each candidate is returned. + Else, the maximum uncertainty of the optimizable is returned. + + Returns: + uncertainty: float or list + The uncertainty of the optimizable. + """ + if self.is_parallel_used(): + uncertainty = self.get_uncertainty_parallel(**kwargs) + else: + uncertainty = [ + ( + atoms.get_uncertainty(**kwargs) + if isinstance(atoms, Structure) + else atoms.calc.get_property( + "uncertainty", + atoms=atoms, + **kwargs, + ) + ) + for atoms in self.get_candidates() + ] + if not per_candidate: + uncertainty = max_(uncertainty) + return uncertainty + + def get_uncertainty_parallel(self, **kwargs): + """ + Get the uncertainty of the candidates in parallel. + It is used for active learning. + + Returns: + uncertainty: list of floats + The uncertainty of the candidates. + """ + uncertainty = [] + for i, atoms in enumerate(self.get_candidates()): + root = i % self.size + unc = None + if self.rank == root: + if isinstance(atoms, Structure): + unc = atoms.get_uncertainty(**kwargs) + else: + unc = atoms.calc.get_property( + "uncertainty", + atoms=atoms, + **kwargs, + ) + unc = broadcast(unc, root=root, comm=self.comm) + uncertainty.append(unc) + return uncertainty + + def get_property( + self, + name, + allow_calculation=True, + per_candidate=False, + **kwargs, + ): + """ + Get or calculate the requested property. + + Parameters: + name: str + The name of the requested property. + allow_calculation: bool + Whether the property is allowed to be calculated. + per_candidate: bool + If True, the property of each candidate is returned. + Else, the property of the optimizable is returned. + + Returns: + float or list: The requested property. + """ + # Check if the parallelization is used + is_parallel = self.is_parallel_used() + # Check if the property is extracted for each candidate + if per_candidate: + output = [] + for i, atoms in enumerate(self.get_candidates()): + # Check the rank of the process + result = None + root = i % self.size + if not is_parallel or self.rank == root: + # Get the properties of the atoms instance + result = self.get_atoms_property( + atoms=atoms, + name=name, + allow_calculation=allow_calculation, + **kwargs, + ) + # Broadcast the property to all processes + if is_parallel: + result = broadcast(result, root=root, comm=self.comm) + output.append(result) + else: + # Get the property of the optimizable instance + output = self.get_atoms_property( + atoms=self.optimizable, + name=name, + allow_calculation=allow_calculation, + **kwargs, + ) + return output + + def get_properties( + self, + properties, + allow_calculation=True, + per_candidate=False, + **kwargs, + ): + """ + Get or calculate the requested properties. + + Parameters: + properties: list of str + The names of the requested properties. + allow_calculation: bool + Whether the properties are allowed to be calculated. + per_candidate: bool + If True, the properties of each candidate are returned. + Else, the properties of the optimizable are returned. + + Returns: + dict: The requested properties. + """ + # Check if the parallelization is used + is_parallel = self.is_parallel_used() + if per_candidate: + results = {name: [] for name in properties} + for i, atoms in enumerate(self.get_candidates()): + # Check the rank of the process + root = i % self.size + for name in properties: + result = None + if not is_parallel or self.rank == root: + # Get the properties of the atoms instance + result = self.get_atoms_property( + atoms=atoms, + name=name, + allow_calculation=allow_calculation, + **kwargs, + ) + # Broadcast the property to all processes + if is_parallel: + result = broadcast(result, root=root, comm=self.comm) + results[name].append(result) + else: + # Get the properties of the optimizable instance + results = {} + for name in properties: + results[name] = self.get_property( + name=name, + allow_calculation=allow_calculation, + per_candidate=per_candidate, + **kwargs, + ) + return results + + def get_atoms_property( + self, + atoms, + name, + allow_calculation=True, + **kwargs, + ): + """ + Get the property of the given atoms instance. + + Parameters: + name: str + The name of the requested property. + allow_calculation: bool + Whether the property is allowed to be calculated. + + Returns: + float: The requested property. + """ + if name == "energy": + result = atoms.get_potential_energy(**kwargs) + elif name == "forces": + result = atoms.get_forces(**kwargs) + elif name == "fmax": + forces = atoms.get_forces(**kwargs) + result = sqrt(einsum("ij,ij->i", forces, forces)).max() + elif name == "uncertainty" and isinstance( + atoms, + Structure, + ): + result = atoms.get_uncertainty(**kwargs) + else: + result = atoms.calc.get_property( + name, + atoms=atoms, + allow_calculation=allow_calculation, + **kwargs, + ) + return result + + def is_within_dtrust(self, per_candidate=False, dtrust=2.0, **kwargs): + """ + Get whether the structures are within a trust distance to the database. + It is used for active learning. + + Parameters: + per_candidate: bool + If True, the distance of each candidate is returned. + Else, the maximum distance of the optimizable is returned. + dtrust: float + The distance trust criterion. + + Returns: + within_dtrust: float or list + Whether the structures are within a trust distance to + the database. + """ + within_dtrust = [] + for atoms in self.get_candidates(): + if isinstance(atoms, Structure): + real_atoms = atoms.get_structure() + within = real_atoms.calc.is_in_database( + real_atoms, + dtol=dtrust, + **kwargs, + ) + else: + within = atoms.calc.is_in_database( + atoms, + dtol=dtrust, + **kwargs, + ) + within_dtrust.append(within) + if not per_candidate: + if False in within_dtrust: + within_dtrust = False + else: + within_dtrust = True + return within_dtrust + + def get_number_of_steps(self): + """ + Get the number of steps that have been run. + """ + return self.steps + + def converged(self, *args, **kwargs): + """ + Check if the optimization is converged. + """ + return self._converged + + def is_fmax_converged(self, forces, fmax, **kwargs): + """ + Check if the optimization is converged based on the maximum force. + + Parameters: + forces: (N,3) array + The forces of the optimizable. + fmax: float + The maximum force allowed on an atom. + + Returns: + converged: bool + Whether the optimization is converged. + """ + forces = forces.reshape(-1, 3) + return sqrt(einsum("ij,ij->i", forces, forces)).max() < fmax + + def is_energy_minimized(self): + """ + Check if the optimization method minimizes the energy. + """ + return True + + def is_parallel_allowed(self): + """ + Check if the optimization method allows parallelization. + """ + return False + + def is_parallel_used(self): + """ + Check if the optimization method uses parallelization. + """ + return self.parallel_run and self.is_parallel_allowed() + + def run( + self, + fmax=0.05, + steps=1000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + """ + Run the optimization. + + Parameters: + fmax: float + The maximum force allowed on an atom. + steps: int + The maximum number of steps allowed. + max_unc: float (optional) + Maximum uncertainty for continuation of the optimization. + dtrust: float (optional) + The distance trust criterion. + unc_convergence: float (optional) + The uncertainty convergence criterion for convergence. + + Returns: + coverged: bool + Whether the optimization is converged. + """ + # Check if the optimization can take any steps + if steps <= 0: + return self._converged + raise NotImplementedError("The run method is not implemented") + + def run_max_unc(self, **kwargs): + """ + Run the optimization with a maximum uncertainty. + The uncertainty is checked at each optimization step if requested. + The trust distance is checked at each optimization step if requested. + It is used for active learning. + """ + raise NotImplementedError("The run_max_unc method is not implemented") + + def check_convergence( + self, + converged, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + """ + Check if the optimization is converged also in terms of uncertainty. + The uncertainty is used for active learning. + + Parameters: + converged: bool + Whether the optimization is converged. + max_unc: float (optional) + The maximum uncertainty allowed. + dtrust: float (optional) + The distance trust criterion. + unc_convergence: float (optional) + The uncertainty convergence criterion for convergence. + + Returns: + converged: bool + Whether the optimization is converged. + """ + # Check if the optimization is converged at all + if not converged: + return False + # Check if the optimization is converged in terms of uncertainty + if max_unc is not None or unc_convergence is not None: + unc = self.get_uncertainty() + if max_unc is not None and unc > max_unc: + return False + if unc_convergence is not None and unc > unc_convergence: + return False + # Check if the optimization is converged in terms of database distance + if dtrust is not None: + within_dtrust = self.is_within_dtrust(dtrust=dtrust) + if not within_dtrust: + return False + return converged + + def save_method(self, filename="method.pkl", **kwargs): + """ + Save the method instance to a file. + + Parameters: + filename: str + The name of the file where the instance is saved. + + Returns: + self: The instance itself. + """ + import pickle + + method_copy = self.copy() + method_copy.remove_parallel_setup() + with open(filename, "wb") as file: + pickle.dump(method_copy, file) + return self + + def update_arguments( + self, + optimizable=None, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + optimizable: optimizable instance + The instance to be optimized. + Often, an Atoms or NEB instance. + Here, it assumed to be an Atoms instance. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed can also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set and check the parallelization + if parallel_run is not None: + self.parallel_run = parallel_run + self.check_parallel() + # Set the communicator + if comm is not None: + self.parallel_setup(comm=comm) + elif not hasattr(self, "comm"): + if self.parallel_run: + self.parallel_setup(comm=None) + else: + self.remove_parallel_setup() + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the verbose + if verbose is not None: + self.verbose = verbose + # Set the optimizable + if optimizable is not None: + self.setup_optimizable(optimizable) + return self + + def parallel_setup(self, comm, **kwargs): + "Setup the parallelization." + if comm is None: + self.comm = world + else: + self.comm = comm + self.rank = self.comm.rank + self.size = self.comm.size + return self + + def remove_parallel_setup(self): + "Remove the parallelization by removing the communicator." + self.comm = None + self.rank = 0 + self.size = 1 + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + + def copy_atoms( + self, + atoms, + properties=[], + allow_calculation=True, + **kwargs, + ): + "Copy an atoms instance." + # Get the properties of the atoms instance + results = {} + if ( + allow_calculation + and atoms.calc is not None + and ( + atoms.calc is not StoredDataCalculator + or isinstance(atoms, Structure) + ) + ): + for name in properties: + self.get_atoms_property( + atoms=atoms, + name=name, + allow_calculation=allow_calculation, + **kwargs, + ) + results.update(atoms.calc.results) + # Copy the atoms instance with all the properties + return copy_atoms(atoms, results=results) + + def message(self, message, is_warning=False): + "Print a message." + if self.verbose and self.rank == 0: + if is_warning: + warnings.warn(message) + else: + print(message) + return self + + def check_parallel(self): + "Check if the parallelization is allowed." + if self.parallel_run and not self.is_parallel_allowed(): + self.message( + "Parallel run is not supported for this method!", + is_warning=True, + ) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + optimizable=self.optimizable, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + def copy(self): + "Copy the object." + # Get all arguments + arg_kwargs, constant_kwargs, object_kwargs = self.get_arguments() + # Make a clone + clone = self.__class__(**arg_kwargs) + # Check if constants have to be saved + if len(constant_kwargs.keys()): + for key, value in constant_kwargs.items(): + clone.__dict__[key] = value + # Check if objects have to be saved + if len(object_kwargs.keys()): + for key, value in object_kwargs.items(): + clone.__dict__[key] = value.copy() + return clone + + def __repr__(self): + arg_kwargs = self.get_arguments()[0] + str_kwargs = ",".join( + [f"{key}={value}" for key, value in arg_kwargs.items()] + ) + return "{}({})".format(self.__class__.__name__, str_kwargs) diff --git a/catlearn/optimizer/parallelopt.py b/catlearn/optimizer/parallelopt.py new file mode 100644 index 00000000..a6908c0b --- /dev/null +++ b/catlearn/optimizer/parallelopt.py @@ -0,0 +1,340 @@ +from .method import OptimizerMethod +from ase.parallel import world, broadcast +from numpy import argmin, inf + + +class ParallelOptimizer(OptimizerMethod): + """ + The ParallelOptimizer is used to run an optimization in parallel. + The ParallelOptimizer is applicable to be used with + active learning. + """ + + def __init__( + self, + method, + chains=None, + parallel_run=True, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + method: OptimizerMethod instance + The optimization method to be used. + chains: int (optional) + The number of optimization that will be run in parallel. + If not given, the number of chains is set to the number of + processors if parallel_run is True, otherwise it is set to 1. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + A different seed is used for each chain if the seed is an + integer. + """ + # Set the parameters + self.update_arguments( + method=method, + chains=chains, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def update_optimizable(self, structures, **kwargs): + if isinstance(structures, list) and len(structures) == self.chains: + self.method.update_optimizable(structures[0]) + for method, structure in zip(self.methods, structures): + method.update_optimizable(structure, **kwargs) + else: + self.method.update_optimizable(structures, **kwargs) + for method in self.methods: + method.update_optimizable(structures, **kwargs) + # Reset the optimization + self.setup_optimizable() + return self + + def get_optimizable(self, **kwargs): + return self.method.get_optimizable(**kwargs) + + def get_structures( + self, + get_all=True, + properties=[], + allow_calculation=True, + **kwargs, + ): + if not get_all: + return self.method.get_structures( + get_all=get_all, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + structures = [] + for chain, method in enumerate(self.methods): + root = chain % self.size + if self.rank == root: + # Get the structure + structure = method.get_structures( + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + else: + structure = None + # Broadcast the structure + structures.append( + broadcast( + structure, + root=root, + comm=self.comm, + ) + ) + return structures + + def get_candidates(self, **kwargs): + candidates = [] + for chain, method in enumerate(self.methods): + root = chain % self.size + if self.rank == root: + # Get the candidate(s) + candidates_tmp = [ + candidate for candidate in method.get_candidates(**kwargs) + ] + else: + candidates_tmp = [] + # Broadcast the candidates + for candidate in broadcast( + candidates_tmp, + root=root, + comm=self.comm, + ): + candidates.append(candidate) + return candidates + + def run( + self, + fmax=0.05, + steps=1000000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + # Check if the optimization can take any steps + if steps <= 0: + return self._converged + # Run the optimizations + converged_list = [ + ( + method.run( + fmax=fmax, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + if self.rank == chain % self.size + else False + ) + for chain, method in enumerate(self.methods) + ] + # Save the structures, values, and used steps + structures = [] + values = [] + for chain, method in enumerate(self.methods): + root = chain % self.size + if self.rank == root: + # Get the structure + structure = method.get_structures() + # Get the value + if self.method.is_energy_minimized(): + value = method.get_potential_energy() + else: + value = method.get_fmax() + else: + structure = None + value = inf + # Broadcast the structure + structures.append( + broadcast( + structure, + root=root, + comm=self.comm, + ) + ) + # Broadcast the values + values.append( + broadcast( + value, + root=root, + comm=self.comm, + ) + ) + # Get the number of steps + self.steps += max( + [ + broadcast( + method.get_number_of_steps(), + root=chain % self.size, + comm=self.comm, + ) + for chain, method in enumerate(self.methods) + ] + ) + # Find the best optimization + chain_min = argmin(values) + root = chain_min % self.size + # Broadcast whether the optimization is converged + converged = broadcast( + converged_list[chain_min], + root=root, + comm=self.comm, + ) + # Get the best structure and update the method + structure = structures[chain_min] + self.method = self.method.update_optimizable(structure) + self.optimizable = self.method.get_optimizable() + # Check if the optimization is converged + self._converged = self.check_convergence( + converged=converged, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + ) + return self._converged + + def set_calculator(self, calculator, copy_calc=False, **kwargs): + self.method.set_calculator(calculator, copy_calc=copy_calc, **kwargs) + for method in self.methods: + method.set_calculator(calculator, copy_calc=copy_calc, **kwargs) + return self + + def is_energy_minimized(self): + return self.method.is_energy_minimized() + + def is_parallel_allowed(self): + return True + + def update_arguments( + self, + method=None, + chains=None, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + method: OptimizerMethod instance + The optimization method to be used. + chains: int + The number of optimization that will be run in parallel. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters in the parent class + super().update_arguments( + optimizable=None, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Set the chains + if chains is not None: + self.chains = chains + elif not hasattr(self, "chains"): + self.chains = self.size + # Set the method + if method is not None: + self.method = method.copy() + self.methods = [method.copy() for _ in range(self.chains)] + self.set_seed(seed=self.seed) + self.setup_optimizable() + # Check if the method is set correctly + if len(self.methods) != self.chains: + self.message( + "The number of chains should be equal to " + "the number of methods!", + is_warning=True, + ) + self.methods = [method.copy() for _ in range(self.chains)] + self.set_seed(seed=self.seed) + self.setup_optimizable() + # Check if the number of chains is optimal + if self.chains % self.size != 0: + self.message( + "The number of chains should be divisible by " + "the number of processors!", + is_warning=True, + ) + return self + + def set_seed(self, seed=None, **kwargs): + # Set the seed for the class + super().set_seed(seed=seed, **kwargs) + # Set the seed for the method + if hasattr(self, "method"): + self.method.set_seed(seed=seed, **kwargs) + # Set the seed for each method + if isinstance(seed, int): + for method in self.methods: + method.set_seed(seed=seed, **kwargs) + seed += 1 + else: + for chain, method in enumerate(self.methods): + method.set_seed(seed=seed, **kwargs) + method.rng.random(size=chain) + return self + + def setup_optimizable(self, **kwargs): + self.optimizable = self.method.get_optimizable() + self.reset_optimization() + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + method=self.method, + chains=self.chains, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/randomadsorption.py b/catlearn/optimizer/randomadsorption.py new file mode 100644 index 00000000..57c430a2 --- /dev/null +++ b/catlearn/optimizer/randomadsorption.py @@ -0,0 +1,661 @@ +from .local import LocalOptimizer +from ase.parallel import world +from ase.optimize import FIRE +from numpy import array, asarray, concatenate, cos, inf, matmul, pi, sin +from ..regression.gp.baseline import BornRepulsionCalculator + + +class RandomAdsorptionOptimizer(LocalOptimizer): + """ + The RandomAdsorptionOptimizer is used to run a global optimization of + an adsorption on a surface. + A single structure will be created and optimized. + Random structures will be sampled and the most stable structure is local + optimized. + The RandomAdsorptionOptimizer is applicable to be used with + active learning. + """ + + def __init__( + self, + slab, + adsorbate, + adsorbate2=None, + bounds=None, + n_random_draws=50, + use_initial_struc=True, + use_initial_opt=False, + initial_fmax=0.2, + initial_steps=50, + use_repulsive_check=True, + repulsive_tol=0.1, + repulsive_calculator=BornRepulsionCalculator(), + local_opt=FIRE, + local_opt_kwargs={}, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + slab: Atoms instance + The slab structure. + adsorbate: Atoms instance + The adsorbate structure. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for drawing the positions + for the adsorbate(s). + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + n_random_draws: int + The number of random structures to be drawn. + use_initial_struc: bool + If True, the initial structure is used as one of the drawn + structures. + use_initial_opt: bool + If True, the initial structures, drawn from the random + sampling, will be local optimized before the structure + with lowest energy are local optimized. + initial_fmax: float + The maximum force for the initial local optimizations. + initial_steps: int + The maximum number of steps for the initial local + optimizations. + use_repulsive_check: bool + If True, a energy will be calculated for each randomly + drawn structure to check if the energy is not too large. + repulsive_tol: float + The tolerance for the repulsive energy check. + repulsive_calculator: ASE calculator instance + The calculator used for the repulsive energy check. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the verbose + self.verbose = verbose + # Create the atoms object from the slab and adsorbate + self.create_slab_ads(slab, adsorbate, adsorbate2) + # Create the boundary conditions + self.setup_bounds(bounds) + # Set the parameters + self.update_arguments( + n_random_draws=n_random_draws, + use_initial_struc=use_initial_struc, + use_initial_opt=use_initial_opt, + initial_fmax=initial_fmax, + initial_steps=initial_steps, + use_repulsive_check=use_repulsive_check, + repulsive_tol=repulsive_tol, + repulsive_calculator=repulsive_calculator, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + # Make initial optimizable structure + self.make_initial_structure() + + def create_slab_ads( + self, + slab, + adsorbate, + adsorbate2=None, + **kwargs, + ): + """ + Create the structure for the adsorption optimization. + + Parameters: + slab: Atoms object + The slab structure. + adsorbate: Atoms object + The adsorbate structure. + adsorbate2: Atoms object (optional) + The second adsorbate structure. + + Returns: + self: object + The object itself. + """ + # Check the slab and adsorbate are given + if slab is None or adsorbate is None: + raise ValueError("The slab and adsorbate must be given!") + # Setup the slab + self.n_slab = len(slab) + self.slab = slab.copy() + self.slab.set_tags(0) + optimizable = self.slab.copy() + # Setup the adsorbate + self.n_ads = len(adsorbate) + self.adsorbate = adsorbate.copy() + self.adsorbate.set_tags(1) + self.adsorbate.cell = optimizable.cell.copy() + self.adsorbate.pbc = optimizable.pbc.copy() + pos_ads = self.adsorbate.get_positions() + pos_ads -= pos_ads.mean(axis=0) + self.adsorbate.set_positions(pos_ads) + optimizable.extend(self.adsorbate.copy()) + # Setup the adsorbate2 + if adsorbate2 is not None: + self.n_ads2 = len(adsorbate2) + self.adsorbate2 = adsorbate2.copy() + self.adsorbate2.set_tags(2) + self.adsorbate2.cell = optimizable.cell.copy() + self.adsorbate2.pbc = optimizable.pbc.copy() + pos_ads2 = self.adsorbate2.get_positions() + pos_ads2 -= pos_ads2.mean(axis=0) + self.adsorbate2.set_positions(pos_ads2) + optimizable.extend(self.adsorbate2.copy()) + else: + self.n_ads2 = 0 + self.adsorbate2 = None + # Get the full number of atoms + self.natoms = len(optimizable) + # Store the positions and cell + self.positions0 = optimizable.get_positions().copy() + self.cell = array(optimizable.get_cell()) + # Setup the optimizable structure + self.setup_optimizable(optimizable) + return self + + def setup_bounds(self, bounds=None): + """ + Setup the boundary conditions for the global optimization. + + Parameters: + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for drawing the positions + for the adsorbate(s). + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + + Returns: + self: object + The object itself. + """ + # Check the bounds are given + if bounds is None: + # Make default bounds + self.bounds = asarray( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.0, 1.0], + [0.0, 2.0 * pi], + [0.0, 2.0 * pi], + [0.0, 2.0 * pi], + ] + ) + else: + self.bounds = bounds.copy() + # Check the bounds have the correct shape + if self.n_ads2 == 0 and self.bounds.shape != (6, 2): + raise ValueError("The bounds must have shape (6,2)!") + elif self.n_ads2 > 0 and not ( + self.bounds.shape == (6, 2) or self.bounds.shape == (12, 2) + ): + raise ValueError("The bounds must have shape (6,2) or (12,2)!") + # Check if the bounds are for two adsorbates + if self.n_ads2 > 0 and self.bounds.shape[0] == 6: + self.bounds = concatenate([self.bounds, self.bounds], axis=0) + return self + + def run( + self, + fmax=0.05, + steps=1000000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + # Check if the optimization can take any steps + if steps <= 0: + return self._converged + # Take initial structure into account + n_random_draws = self.n_random_draws + if self.use_initial_struc: + n_random_draws -= 1 + # Draw random structures + x_drawn = self.draw_random_structures( + n_random_draws=n_random_draws, + **kwargs, + ) + # Get the best drawn structure + best_pos, steps = self.get_best_drawn_structure( + x_drawn, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Set the positions + self.optimizable.set_positions(best_pos) + # Check if the optimization can take any steps + if steps <= 0: + self.message( + "No steps left after drawing random structures.", + is_warning=True, + ) + return self._converged + # Run the local optimization + converged, _ = self.local_optimize( + atoms=self.optimizable, + fmax=fmax, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Check if the optimization is converged + self._converged = self.check_convergence( + converged=converged, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + ) + # Return whether the optimization is converged + return self._converged + + def is_parallel_allowed(self): + return False + + def update_arguments( + self, + slab=None, + adsorbate=None, + adsorbate2=None, + bounds=None, + n_random_draws=None, + use_initial_struc=None, + use_initial_opt=None, + initial_fmax=None, + initial_steps=None, + use_repulsive_check=None, + repulsive_tol=None, + repulsive_calculator=None, + local_opt=None, + local_opt_kwargs=None, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + slab: Atoms instance + The slab structure. + adsorbate: Atoms instance + The adsorbate structure. + adsorbate2: Atoms instance (optional) + The second adsorbate structure. + bounds: (6,2) or (12,2) ndarray (optional). + The boundary conditions used for drawing the positions + for the adsorbate(s). + The boundary conditions are the x, y, and z coordinates of + the center of the adsorbate and 3 rotations. + Same boundary conditions can be set for the second adsorbate + if chosen. + n_random_draws: int + The number of random structures to be drawn. + use_initial_struc: bool + If True, the initial structure is used as one of the drawn + structures. + use_initial_opt: bool + If True, the initial structures, drawn from the random + sampling, will be local optimized before the structure + with lowest energy are local optimized. + initial_fmax: float + The maximum force for the initial local optimizations. + initial_steps: int + The maximum number of steps for the initial local + optimizations. + use_repulsive_check: bool + If True, a energy will be calculated for each randomly + drawn structure to check if the energy is not too large. + repulsive_tol: float + The tolerance for the repulsive energy check. + repulsive_calculator: ASE calculator instance + The calculator used for the repulsive energy check. + local_opt: ASE optimizer object + The local optimizer object. + local_opt_kwargs: dict + The keyword arguments for the local optimizer. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters in the parent class + super().update_arguments( + optimizable=None, + local_opt=local_opt, + local_opt_kwargs=local_opt_kwargs, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + # Create the atoms object from the slab and adsorbate + if slab is not None or adsorbate is not None or adsorbate2 is not None: + if slab is None: + slab = self.slab.copy() + if adsorbate is None: + adsorbate = self.adsorbate.copy() + if adsorbate2 is None and self.adsorbate2 is not None: + adsorbate2 = self.adsorbate2.copy() + self.create_slab_ads( + slab, + adsorbate, + adsorbate2, + ) + # Create the boundary conditions + if bounds is not None: + self.setup_bounds(bounds) + # Set the rest of the parameters + if n_random_draws is not None: + self.n_random_draws = int(n_random_draws) + if use_initial_struc is not None: + self.use_initial_struc = use_initial_struc + if use_initial_opt is not None: + self.use_initial_opt = use_initial_opt + if initial_fmax is not None: + self.initial_fmax = float(initial_fmax) + if initial_steps is not None: + self.initial_steps = int(initial_steps) + if use_repulsive_check is not None: + self.use_repulsive_check = use_repulsive_check + if repulsive_tol is not None: + self.repulsive_tol = float(repulsive_tol) + if repulsive_calculator is not None or not hasattr( + self, "repulsive_calculator" + ): + self.repulsive_calculator = repulsive_calculator + return self + + def draw_random_structures(self, n_random_draws=50, **kwargs): + "Draw random structures for the adsorption optimization." + # Get reference energy + self.e_ref = self.get_reference_energy() + # Initialize the drawn structures + failed_steps = 0 + n_drawn = 0 + x_drawn = [] + # Set a dummy structure for the repulsive check + if self.use_repulsive_check: + dummy_optimizable = self.optimizable.copy() + dummy_optimizable.calc = self.repulsive_calculator + # Draw random structures + while n_drawn < n_random_draws: + # Draw a random structure + x = self.rng.uniform(low=self.bounds[:, 0], high=self.bounds[:, 1]) + # Evaluate the value of the structure + if self.use_repulsive_check: + e = self.evaluate_value(x, atoms=dummy_optimizable) + # Check if the value is not too large + if e - self.e_ref > self.repulsive_tol: + failed_steps += 1 + if failed_steps > 100.0 * n_random_draws: + self.message( + f"{failed_steps} failed drawns. " + "Stopping is recommended!", + is_warning=True, + ) + continue + # Add the structure to the list of drawn structures + x_drawn.append(x) + n_drawn += 1 + return x_drawn + + def get_best_drawn_structure( + self, + x_drawn, + steps=1000, + max_unc=None, + dtrust=None, + **kwargs, + ): + "Get the best drawn structure from the random sampling." + # Initialize the best energy and position + self.best_energy = inf + self.best_pos = None + self.best_energy_no_crit = inf + self.best_pos_no_crit = None + # Calculate the energy of the initial structure if used + if self.use_initial_struc: + # Get the energy of the structure + e = self.optimizable.get_potential_energy() + # Check if the energy is lower than the best energy + self.check_best_structure( + e=e, + pos=self.optimizable.get_positions(), + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Check each drawn structure + for x in x_drawn: + # Get the new positions of the adsorbate + pos = self.get_new_positions(x, **kwargs) + # Set the positions + self.optimizable.set_positions(pos) + # Check if the initial optimization is used + if self.use_initial_opt: + # Run the local optimization + _, used_steps = self.local_optimize( + atoms=self.optimizable, + fmax=self.initial_fmax, + steps=self.initial_steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + steps -= used_steps + self.steps += used_steps + pos = self.optimizable.get_positions() + else: + steps -= 1 + self.steps += 1 + # Get the energy of the structure + e = self.optimizable.get_potential_energy() + # Check if the energy is lower than the best energy + self.check_best_structure( + e=e, + pos=pos, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Return the best position and number of steps + if self.best_energy == inf: + self.message( + "Uncertainty or trust distance is above the maximum allowed." + ) + return self.best_pos_no_crit, steps + return self.best_pos, steps + + def check_best_structure( + self, + e, + pos, + max_unc=None, + dtrust=None, + **kwargs, + ): + "Check if the structure is the best one." + # Check if the energy is lower than the best energy + if e < self.best_energy: + # Update the best energy and position without criteria + if e < self.best_energy_no_crit: + self.best_energy_no_crit = e + self.best_pos_no_crit = pos.copy() + # Check if criteria are met + is_within_crit = True + # Check if the uncertainty is above the maximum allowed + if max_unc is not None: + unc = self.get_uncertainty() + if unc > max_unc: + is_within_crit = False + # Check if the structures are within the trust distance + if dtrust is not None: + within_dtrust = self.is_within_dtrust(dtrust=dtrust) + if not within_dtrust: + is_within_crit = False + # Update the best energy and position + if is_within_crit: + self.best_energy = e + self.best_pos = pos.copy() + return self.best_energy, self.best_pos + + def rotation_matrix(self, angles, positions): + "Rotate the adsorbate" + # Get the angles + theta1, theta2, theta3 = angles + # Calculate the trigonometric functions + cos1 = cos(theta1) + sin1 = sin(theta1) + cos2 = cos(theta2) + sin2 = sin(theta2) + cos3 = cos(theta3) + sin3 = sin(theta3) + # Calculate the full rotation matrix + R = asarray( + [ + [cos2 * cos3, cos2 * sin3, -sin2], + [ + sin1 * sin2 * cos3 - cos1 * sin3, + sin1 * sin2 * sin3 + cos1 * cos3, + sin1 * cos2, + ], + [ + cos1 * sin2 * cos3 + sin1 * sin3, + cos1 * sin2 * sin3 - sin1 * cos3, + cos1 * cos2, + ], + ] + ) + # Calculate the rotation of the positions + positions = matmul(positions, R) + return positions + + def get_new_positions(self, x, **kwargs): + "Get the new positions of the adsorbate." + # Get the positions + pos = self.positions0.copy() + # Calculate the positions of the adsorbate + n_slab = self.n_slab + n_all = self.n_slab + self.n_ads + pos_ads = pos[n_slab:n_all] + pos_ads = self.rotation_matrix(x[3:6], pos_ads) + pos_ads += (self.cell * x[:3].reshape(-1, 1)).sum(axis=0) + pos[n_slab:n_all] = pos_ads + # Calculate the positions of the second adsorbate + if self.n_ads2 > 0: + pos_ads2 = pos[n_all:] + pos_ads2 = self.rotation_matrix(x[9:12], pos_ads2) + pos_ads2 += (self.cell * x[6:9].reshape(-1, 1)).sum(axis=0) + pos[n_all:] = pos_ads2 + return pos + + def evaluate_value(self, x, atoms, **kwargs): + "Evaluate the value of the adsorption." + # Get the new positions of the adsorption + pos = self.get_new_positions(x, **kwargs) + # Set the positions + atoms.set_positions(pos) + # Get the potential energy + return atoms.get_potential_energy() + + def get_reference_energy(self, **kwargs): + "Get the reference energy of the structure." + # If the repulsive check is not used, return 0.0 + if not self.use_repulsive_check: + return 0.0 + # Calculate the energy of the isolated slab + atoms = self.slab.copy() + atoms.calc = self.repulsive_calculator + e_ref = atoms.get_potential_energy() + # Calculate the energy of the isolated adsorbate + atoms = self.adsorbate.copy() + atoms.calc = self.repulsive_calculator + e_ref += atoms.get_potential_energy() + # Calculate the energy of the isolated second adsorbate + if self.adsorbate2 is not None: + atoms = self.adsorbate2.copy() + atoms.calc = self.repulsive_calculator + e_ref += atoms.get_potential_energy() + return e_ref + + def make_initial_structure(self, **kwargs): + "Get the initial structure for the optimization." + x_drawn = self.draw_random_structures(n_random_draws=1, **kwargs) + x_drawn = x_drawn[0] + pos = self.get_new_positions(x_drawn, **kwargs) + self.optimizable.set_positions(pos) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + slab=self.slab, + adsorbate=self.adsorbate, + adsorbate2=self.adsorbate2, + bounds=self.bounds, + n_random_draws=self.n_random_draws, + use_initial_struc=self.use_initial_struc, + use_initial_opt=self.use_initial_opt, + initial_fmax=self.initial_fmax, + initial_steps=self.initial_steps, + use_repulsive_check=self.use_repulsive_check, + repulsive_tol=self.repulsive_tol, + repulsive_calculator=self.repulsive_calculator, + local_opt=self.local_opt, + local_opt_kwargs=self.local_opt_kwargs, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + seed=self.seed, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/optimizer/sequential.py b/catlearn/optimizer/sequential.py new file mode 100644 index 00000000..91a15635 --- /dev/null +++ b/catlearn/optimizer/sequential.py @@ -0,0 +1,224 @@ +from .method import OptimizerMethod +from ase.parallel import world + + +class SequentialOptimizer(OptimizerMethod): + """ + The SequentialOptimizer is used to run multiple optimizations in + sequence for a given structure. + The SequentialOptimizer is applicable to be used with + active learning. + """ + + def __init__( + self, + methods, + remove_methods=False, + parallel_run=False, + comm=world, + verbose=False, + seed=None, + **kwargs, + ): + """ + Initialize the OptimizerMethod instance. + + Parameters: + methods: List of OptimizerMethod objects + The list of optimization methods to be used. + remove_methods: bool + Whether to remove the methods that have converged. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the parameters + self.update_arguments( + methods=methods, + remove_methods=remove_methods, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + **kwargs, + ) + + def update_optimizable(self, structures, **kwargs): + # Update optimizable for the first method + self.methods[0].update_optimizable(structures, **kwargs) + # Reset the optimization and update the optimizable + self.setup_optimizable() + return self + + def get_optimizable(self): + return self.method.get_optimizable() + + def get_structures( + self, + get_all=True, + properties=[], + allow_calculation=True, + **kwargs, + ): + return self.method.get_structures( + get_all=get_all, + properties=properties, + allow_calculation=allow_calculation, + **kwargs, + ) + + def get_candidates(self, **kwargs): + return self.method.get_candidates(**kwargs) + + def run( + self, + fmax=0.05, + steps=1000000, + max_unc=None, + dtrust=None, + unc_convergence=None, + **kwargs, + ): + # Check if the optimization can take any steps + if steps <= 0: + return self._converged + # Get number of methods + n_methods = len(self.methods) + structures = None + # Run the optimizations + for i, self.method in enumerate(self.methods): + # Update the structures if not the first method + if i > 0: + self.method.update_optimizable(structures) + # Run the optimization + converged = self.method.run( + fmax=fmax, + steps=steps, + max_unc=max_unc, + dtrust=dtrust, + **kwargs, + ) + # Get the structures + structures = self.method.get_structures(allow_calculation=False) + self.optimizable = self.method.get_optimizable() + # Update the number of steps + self.steps += self.method.get_number_of_steps() + steps -= self.method.get_number_of_steps() + # Check if the optimization is converged + converged = self.check_convergence( + converged=converged, + max_unc=max_unc, + dtrust=dtrust, + unc_convergence=unc_convergence, + ) + if not converged: + break + # Check if the complete optimization is converged + if i + 1 == n_methods: + self._converged = True + break + # Check if any steps are left + if steps <= 0: + break + # Check if the method should be removed + if self.remove_methods and i + 1 < n_methods: + self.methods = self.methods[1:] + return self._converged + + def set_calculator(self, calculator, copy_calc=False, **kwargs): + self.method.set_calculator(calculator, copy_calc=copy_calc, **kwargs) + for method in self.methods: + method.set_calculator(calculator, copy_calc=copy_calc, **kwargs) + return self + + def setup_optimizable(self, **kwargs): + self.method = self.methods[0] + self.optimizable = self.method.get_optimizable() + self.reset_optimization() + return self + + def is_energy_minimized(self): + return self.methods[-1].is_energy_minimized() + + def is_parallel_allowed(self): + return False + + def update_arguments( + self, + methods=None, + remove_methods=None, + parallel_run=None, + comm=None, + verbose=None, + seed=None, + **kwargs, + ): + """ + Update the instance with its arguments. + The existing arguments are used if they are not given. + + Parameters: + methods: List of OptimizerMethod objects + The list of optimization methods to be used. + remove_methods: bool + Whether to remove the methods that have converged. + parallel_run: bool + If True, the optimization will be run in parallel. + comm: ASE communicator instance + The communicator object for parallelization. + verbose: bool + Whether to print the full output (True) or + not (False). + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + """ + # Set the methods + if methods is not None: + self.methods = methods + self.setup_optimizable() + # Set the remove methods + if remove_methods is not None: + self.remove_methods = remove_methods + # Set the parameters in the parent class + super().update_arguments( + optimizable=None, + parallel_run=parallel_run, + comm=comm, + verbose=verbose, + seed=seed, + ) + return self + + def set_seed(self, seed=None, **kwargs): + # Set the seed for the class + super().set_seed(seed=seed, **kwargs) + # Set the seed for each method + for method in self.methods: + method.set_seed(seed=seed, **kwargs) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + methods=self.methods, + remove_methods=self.remove_methods, + parallel_run=self.parallel_run, + comm=self.comm, + verbose=self.verbose, + ) + # Get the constants made within the class + constant_kwargs = dict(steps=self.steps, _converged=self._converged) + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/README.md b/catlearn/regression/gp/README.md index 26d4536b..c6be059b 100644 --- a/catlearn/regression/gp/README.md +++ b/catlearn/regression/gp/README.md @@ -1,95 +1,95 @@ # Gaussian Process Source Code -The Gaussian process class and the Student T process are implemented to use different classes for prior means, kernels, fingerprints, and hyperparameter fitter. The Gaussian process class itself that can be trained and predict with uncertainties. The derivatives of the targets can be used by using the True bool for the use_derivatives argument in the initialization of the Gaussian process. Furthermore, a noise correction can be added to the covariance matrix to always make it invertible by a True bool for the correction argument. The hyperparameters are in ln values due to robustness. The noise hyperparameter is the relative-noise hyperparameter (or noise-to-signal), which corresponds to a replacement of the noise hyperparameter divided with the prefactor hyperparameter defined as a new free hyperparameter. +The Gaussian process class and the Student T process are implemented to use different classes for prior means, kernels, fingerprints, and hyperparameter fitter. The Gaussian process class itself can be trained and used to predict with uncertainties. The derivatives of the targets can be used by using the True bool for the `use_derivatives` argument in the initialization of the Gaussian process. Furthermore, a noise correction can be added to the covariance matrix to always make it invertible by a True bool for the `use_correction` argument. The hyperparameters are in natural log-scale to enforce robustness. The noise hyperparameter is the relative-noise hyperparameter (or noise-to-signal), which corresponds to a replacement of the noise hyperparameter divided by the prefactor hyperparameter, defined as a new free hyperparameter. The Gaussian process class and the Student T process are imported from the models module. ## Baseline -The baseline class used for the Gaussian process is implemeted in the Baseline module. -The repulisive part of the Lennard-Jones potential as a baseline is implemeted as a baseline class. +The baseline class used for the Gaussian process is implemented in the `baseline` module. +The repulsive part of the Lennard-Jones potential is implemented as the `RepulsionCalculator` class. +The born repulsion with a cutoff at a scaled sum of covalent radii is implemented as the `BornRepulsionCalculator` class. +A Mie potential is also implemented as the `MieCalculator`class. ## HPBoundary ### Boundary conditions -The boundary classes used for constructing boundary conditions for the hyperparameters. +The boundary classes are used for constructing boundary conditions for the hyperparameters in the `hpboundary` module. ### Hptrans -A variable transformation of the hyperparameters is performed with Variable_Transformation class. The region of interest in hyperparameter space is enlarged without restricting any value. +A variable transformation of the hyperparameters is performed with the `VariableTransformation` class. The region of interest in hyperparameter space is enlarged without restricting any value. ## Calculator -The calculator module include the scripts needed for converging the Gaussian process to an ASE calculator. -The scripts are: -- mlmodel: MLModel is a class that calculate energies, forces, and uncertainties for ASE Atoms. -- mlcalc: MLCalculator is an ASE calculator class that uses the MLModel as calculator. -- database: Database is class that collects the atomic structures with their energies, forces, and fingerprints. -- database_reduction: Database_Reduction is a Database class that reduces the number of training structures. - -## Educated -The Educated_guess class make educated guesses of the MLE and boundary conditions of the hyperparameters. +The calculator module includes the scripts needed for converting the Gaussian process to an ASE calculator. +The modules are: +- `MLModel` is a class that calculates energies, forces, and uncertainties for ASE Atoms. +- `MLCalculator` is an ASE calculator class that uses the MLModel as a calculator. +- `BOCalculator` is like the `MLCalculator`, but the calculated potential energy is added together with the uncertainty times kappa. +- `Database` is a class that collects the atomic structures with their energies, forces, and fingerprints. +- `DatabaseReduction` is a `Database` class that reduces the number of training structures. ## Ensemble -The ensemble model uses multiple models that is trained and make an ensemble of their predictions in different ways. One way is the mean of the prediction another is the variance weighted predictions. The EnsembleClustering class uses one of the clustering algorithm from the clustering module to split the training data for the different machine learning models. +The ensemble model uses multiple models that are trained and make an ensemble of their predictions in different ways. One way is the mean of the prediction, and another is the variance-weighted prediction. The EnsembleClustering class uses one of the clustering algorithms from the clustering module to split the training data for the different machine learning models. The clustering algorithms are: -- K_means: The K-means++ clustering method that use the distances to the defined centroids. Each datapoints is assigned to each cluster. A training point can only be included in one cluster. -- K_means_number: This method uses distances similar to the K-means++ clustering method. However, the cluster are of the same size and the number of clusters are defined from the number of training points. A training point can be included in multiple clusters. -- K_means_auto: It is similar to K_means_number, but it uses a range of number of training points that the clusters include. A training point can be included in multiple clusters. -- DistanceClustering: It use predefined centroids where the data points are assigned to each cluster. A training point can only be included in one cluster. +- `K_means` is the K-means++ clustering method that uses the distances to the defined centroids. Each datapoint is assigned to each cluster. A training point can only be included in one cluster. +- `K_means_number`: This method uses distances similar to the K-means++ clustering method. However, the clusters are of the same size, and the number of clusters is defined by the number of training points. A training point can be included in multiple clusters if the data points can not be split equally. +- `K_means_auto` is similar to `K_means_number`, but it uses a range of the number of training points that the clusters include. A training point can be included in multiple clusters. +- `K_means_enumeration` uses the training point in the order it is given. +- `FixedClustering` uses predefined centroids where the data points are assigned to each cluster. A training point can only be included in one cluster. +- `RandomClustering` randomly places training points in the given number of clusters. +- `RandomClustering_number` randomly places training points in clusters so that they match the number of points in each cluster as requested. ## Fingerprint -The Fingerprint class convert ASE Atoms into a FingerprintObject class. The FingerprintObject contain a fingerprint vector and derivatives with respect to the Cartesian coordinates. The Fingerprint class has children of different fingerprints: -- cartesian: Cartesian coordinates of the ASE Atoms in the order of the Atoms index. -- coulomb: The Coulomb matrix fingerprint. -- fpwrapper: A wrapper of fingerprints from ASE-GPATOM to the FingerprintObject class. -- invdistances: The inverse distance of all element combinations, where the blocks of each combination is sorted with magnitude. The inverse distances is scaled with the sum of the elements covalent radii. -- sumdistances: The summed inverse distances for each element combinations scaled with elements covalent radii. -- sumdistancespower: The summed inverse distances for each element combinations scaled with elements covalent radii to different orders. +The Fingerprint class converts ASE Atoms into a `FingerprintObject` instance. The `FingerprintObject` contains a fingerprint vector and derivatives with respect to the Cartesian coordinates. The Fingerprint class has child classes with different fingerprints, which are: +- `Cartesian` is the Cartesian coordinates of the ASE Atoms in the order of the atom index. +- `InvDistances` is the inverse distance of all atom combinations. The inverse distances are scaled with the sum of the elements' covalent radii. +- `SortedInvDistances` is the inverse distances of all element combinations where the blocks of each combination are sorted by magnitude. +- `SumDistances` is the summed inverse distances for each element combination, scaled with the sum of the elements' covalent radii. +- `SumDistancesPower` is the summed inverse distances for each element combination, scaled with the sum of the elements' covalent radii to different orders. +- `MeanDistances` is the mean inverse distances for each element combination, scaled with the sum of the elements' covalent radii. +- `MeanDistancesPower` is the mean inverse distances for each element combination, scaled with the sum of the elements' covalent radii to different orders. +- `FingerprintWrapperGPAtom` is a wrapper of fingerprints from ASE-GPATOM to the `FingerprintObject` instance. +- `FingerprintWrapperDScribe` is a wrapper of fingerprints from DScribe to the `FingerprintObject` instance. ## Hpfitter -The hyperparameter fitter class that optimize the hyperparameters of the Gaussian process. The hyperparameter fitter needs an objective function and a optimization method as arguments. -A fully-Bayesian mimicking Gaussian process can be achived by the fbpmgp class. +The hyperparameter fitter class that optimizes the hyperparameters of the Gaussian process. The hyperparameter fitter needs an objective function and an optimization method as arguments. +A fully Bayesian mimicking Gaussian process can be achieved by the `FBPMGP` class. ## Kernel -The kernel function is a fundamental part of the Gaussian process. The kernel function uses a distance meassure. -The Distance_matrix class construct a distance matrix of the features that can be used by the kernel function. The Distance_matrix_per_dimension class is used when derivatives of the targets are used, since the distances in each feature dimension needs to be saved. -A parent Kernel class is defined when only targets are used. The Kernel_Derivative class is used when derivatives of the targets are needed. +The kernel function is a fundamental part of the Gaussian process. Derivatives of the kernel function can be used by setting the `use_derivatives` argument. An implemented kernel function is the squared exponential kernel (SE) class. ## Means -In the means module different prior mean classes is defined. The prior mean is a key part of the Gaussian process. Constant value prior means classes is implemented as the parent Prior_constant class in constant submodule. The implemented children prior means classes are: -- first: Use the value of the first target. -- max: Use the value of the target with the largest value. -- mean: Use the mean value of the targets. -- median: Use the median value of the targets. -- min: Use the value of the target with the smallest value. +In the means module, different prior mean classes are defined. The prior mean is a key part of the Gaussian process. Constant value prior means classes are implemented as the parent `Prior_constant` class in the constant submodule. The implemented child prior means classes are: +- `Prior_first` uses the value of the first target. +- `Prior_max` uses the value of the target with the largest value. +- `Prior_mean` uses the mean value of the targets. +- `Prior_median` uses the median value of the targets. +- `Prior_min` uses the value of the target with the smallest value. ## Models -The Gaussian process and the Student t process is imported from this module. +The Gaussian process and the Student t process are imported from this module. ## Objectivefunctions -The parent Object_functions class give the form of the objective functions used for optimizing the hyperparameters. The implemented children objective function classes are split into Gaussian process and Student t process objective functions. +The parent `ObjectiveFuction` class gives the form of the objective functions used for optimizing the hyperparameters. The implemented child objective function classes are split into Gaussian process and Student t process objective functions. ### GP The Gaussian process objective functions are: -- factorized_gpp: Calculate the minimum of the GPP objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. The prefactor hyperparameter is determined analytically. -- factorized_likelihood_svd: Calculate the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. The prefactor hyperparameter is determined analytically by maximum-likelihood-estimation. SVD is used for finding the singular values and therefore a noise correction is not needed and the inversion is robust. -- factorized_likelihood: Calculate the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. The prefactor hyperparameter is determined analytically by maximum-likelihood-estimation. -- gpe: The GPE objective function is calculated. -- gpp: The GPP objective function is calculated. -- likelihood: The negative log-likelihood is calculated. -- loo: The leave-one-out cross-validation from a single covariance matrix inversion is calculated. A modification can be used to also get good values for the prefactor hyperparameter. -- mle: The negative maximum log-likelihood is calculated by using an analutically expression of the prefactor hyperparameter. +- `LogLikelihood` is the negative log-likelihood. +- `MaximumLogLikelihood` is the negative maximum log-likelihood calculated by using an analytical expression of the prefactor hyperparameter. +- `FactorizedGPP` calculates the minimum of the GPP objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. The prefactor hyperparameter is determined analytically. +- `FactorizedLogLikelihood` calculates the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. The prefactor hyperparameter is determined analytically by maximum-likelihood estimation. +- `GPE` is Geisser's predictive mean square error objective function. +- `GPP` is Geisser's surrogate predictive probability objective function. +- `LOO` is the leave-one-out cross-validation from a single covariance matrix inversion is calculated. A modification can also be used to get good values for the prefactor hyperparameter. ### TP -- factorized_likelihood_svd: Calculate the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. SVD is used for finding the singular values and therefore a noise correction is not needed and the inversion is robust. -- factorized_likelihood: Calculate the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. -- likelihood: The negative log-likelihood is calculated. +- `LogLikelihood` is the negative log-likelihood. +- `FactorizedLogLikelihood` calculates the minimum of the negative log-likelihood objective function value over all relative-noise hyperparameter values for each length-scale hyperparameter. ## Optimizers -Different optimizers can be used for optimizing the hyperparameters of the Gaussian process. The optimizers are split into local and global optimizers. +Different optimizers can be used for optimizing the hyperparameters of the Gaussian process. The optimizers are split into local, global, line search, and noise line search optimizers. ## Pdistributions -Prior distributions for the hyperparameters can be applied to the objective function. Thereby, the log-posterior is maximized instead of the log-likelihood. The prior distributions are important for the optimization of hyperparameters, since it gives prior knowledge about decent hyperparameters. The hyperparameter values are in log-scale. -The parent prior distribution class is Prior_distribution in pdistributions. The children classes are: -- gamma: The gamma distribution. -- gen_normal: The generalized normal distribution. -- invgamma: The inverse-gamma distribution. -- normal: The normal distribution. -- uniform: The uniform prior distribution within an interval. - - +Prior distributions for the hyperparameters can be applied to the objective function. Thereby, the log-posterior is maximized instead of the log-likelihood. The prior distributions are important for the optimization of hyperparameters, since they give prior knowledge about decent hyperparameters. The hyperparameter values are on a natural log-scale. +The parent prior distribution class is the `Prior_distribution` in the `pdistributions`module. The child classes are: +- `Gamma_prior` is the gamma distribution. +- `Gen_normal_prior` is the generalized normal distribution. +- `Invgamma_prior` is the inverse-gamma distribution. +- `Normal_prior` is the normal distribution. +- `Uniform_prior` is the uniform prior distribution within an interval. diff --git a/catlearn/regression/gp/baseline/__init__.py b/catlearn/regression/gp/baseline/__init__.py index 162a6538..1e04836a 100644 --- a/catlearn/regression/gp/baseline/__init__.py +++ b/catlearn/regression/gp/baseline/__init__.py @@ -1,10 +1,13 @@ from .baseline import BaselineCalculator +from .bornrepulsive import BornRepulsionCalculator from .idpp import IDPP from .mie import MieCalculator from .repulsive import RepulsionCalculator + __all__ = [ "BaselineCalculator", + "BornRepulsionCalculator", "IDPP", "MieCalculator", "RepulsionCalculator", diff --git a/catlearn/regression/gp/baseline/baseline.py b/catlearn/regression/gp/baseline/baseline.py index 1cf3e988..facce1a3 100644 --- a/catlearn/regression/gp/baseline/baseline.py +++ b/catlearn/regression/gp/baseline/baseline.py @@ -1,28 +1,43 @@ -import numpy as np +from numpy import finfo, zeros from ase.calculators.calculator import Calculator, all_changes -from ase.constraints import FixAtoms class BaselineCalculator(Calculator): + """ + A baseline calculator for ASE Atoms instance. + It uses a flat baseline with zero energy and forces. + """ + implemented_properties = ["energy", "forces"] nolabel = True def __init__( self, reduce_dimensions=True, + use_forces=True, + dtype=float, **kwargs, ): """ - A baseline calculator for ASE atoms object. - It uses a flat baseline with zero energy and forces. + Initialize the baseline calculator. Parameters: reduce_dimensions: bool Whether to reduce the dimensions to only moving atoms if constrains are used. + use_forces: bool + Calculate and store the forces. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__() - self.update_arguments(reduce_dimensions=reduce_dimensions, **kwargs) + self.update_arguments( + reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + dtype=dtype, + **kwargs, + ) def calculate( self, @@ -39,75 +54,111 @@ def calculate( # Atoms object. Calculator.calculate(self, atoms, properties, system_changes) # Obtain energy and forces for the given structure: - if "forces" in properties: + if "forces" in properties or self.use_forces: energy, forces = self.get_energy_forces( atoms, - get_derivatives=True, + use_forces=True, ) self.results["forces"] = forces else: - energy = self.get_energy_forces(atoms, get_derivatives=False) + energy, _ = self.get_energy_forces(atoms, use_forces=False) self.results["energy"] = energy pass - def update_arguments(self, reduce_dimensions=None, **kwargs): + def set_use_forces(self, use_forces, **kwargs): """ - Update the class with its arguments. - The existing arguments are used if they are not given. + Set whether to use forces or not. + + Parameters: + use_forces: bool + Whether to use forces or not. + + Returns: + self: The updated object itself. + """ + # Set the use_forces + self.use_forces = use_forces + return self + + def set_reduce_dimensions(self, reduce_dimensions, **kwargs): + """ + Set whether to reduce the dimensions or not. Parameters: reduce_dimensions: bool - Whether to reduce the dimensions to only moving atoms - if constrains are used. + Whether to reduce the dimensions or not. + Returns: self: The updated object itself. """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions + # Set the reduce_dimensions + self.reduce_dimensions = reduce_dimensions return self - def get_energy_forces(self, atoms, get_derivatives=True, **kwargs): - "Get the energy and forces." - if get_derivatives: - return 0.0, np.zeros((len(atoms), 3)) - return 0.0 + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + If None, the default data type is used. - def get_constraints(self, atoms, **kwargs): + Returns: + self: The updated object itself. + """ + # Set the dtype + self.dtype = dtype + # Set a small number to avoid division by zero + self.eps = finfo(self.dtype).eps + return self + + def update_arguments( + self, + reduce_dimensions=None, + use_forces=None, + dtype=None, + **kwargs, + ): """ - Get the indicies of the atoms that does not have fixed constraints. + Update the class with its arguments. + The existing arguments are used if they are not given. Parameters: - atoms : ASE Atoms - The ASE Atoms object with a calculator. + reduce_dimensions: bool + Whether to reduce the dimensions to only moving atoms + if constrains are used. + use_forces: bool + Calculate and store the forces. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: - not_masked : list - A list of indicies for the moving atoms - if constraints are used. - masked : list - A list of indicies for the fixed atoms - if constraints are used. + self: The updated object itself. """ - not_masked = list(range(len(atoms))) - if not self.reduce_dimensions: - return not_masked, [] - constraints = atoms.constraints - if len(constraints) > 0: - masked = np.concatenate( - [ - c.get_indices() - for c in constraints - if isinstance(c, FixAtoms) - ] - ) - masked = set(masked) - return list(set(not_masked).difference(masked)), list(masked) - return not_masked, [] + if reduce_dimensions is not None: + self.set_reduce_dimensions(reduce_dimensions=reduce_dimensions) + if use_forces is not None: + self.set_use_forces(use_forces=use_forces) + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) + return self + + def get_energy_forces(self, atoms, use_forces=True, **kwargs): + "Get the energy and forces." + if use_forces: + return 0.0, zeros((len(atoms), 3), dtype=self.dtype) + return 0.0, None def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(reduce_dimensions=self.reduce_dimensions) + arg_kwargs = dict( + reduce_dimensions=self.reduce_dimensions, + use_forces=self.use_forces, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/baseline/bornrepulsive.py b/catlearn/regression/gp/baseline/bornrepulsive.py new file mode 100644 index 00000000..370a2b6b --- /dev/null +++ b/catlearn/regression/gp/baseline/bornrepulsive.py @@ -0,0 +1,229 @@ +from numpy import where +from .repulsive import RepulsionCalculator + + +class BornRepulsionCalculator(RepulsionCalculator): + """ + A baseline calculator for ASE Atoms instance. + It uses the Born repulsion potential. + A cutoff distance is used to remove the repulsion + at larger distances. + """ + + implemented_properties = ["energy", "forces"] + nolabel = True + + def __init__( + self, + reduce_dimensions=True, + use_forces=True, + wrap=True, + include_ncells=True, + mic=False, + all_ncells=True, + cell_cutoff=2.0, + r_scale=0.57, + power=2, + rs1_cross=0.97, + k_scale=1.0, + dtype=float, + **kwargs, + ): + """ + Initialize the baseline calculator. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + r_scale: float + The scaling of the covalent radii. + A smaller value will move the repulsion to a lower distances. + All distances larger than r_scale is cutoff. + power: int + The power of the repulsion. + rs1_cross: float + The scaled value of the inverse distance with scaling (r_scale) + that crosses the energy of 1 eV. + k_scale: float + The scaling of the repulsion energy after a default scaling + of the energy is calculated. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ + super().__init__( + reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + wrap=wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + r_scale=r_scale, + power=power, + rs1_cross=rs1_cross, + k_scale=k_scale, + dtype=dtype, + **kwargs, + ) + + def update_arguments( + self, + reduce_dimensions=None, + use_forces=None, + wrap=None, + include_ncells=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + r_scale=None, + power=None, + rs1_cross=None, + k_scale=None, + dtype=None, + **kwargs, + ): + """ + Update the class with its arguments. + The existing arguments are used if they are not given. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + r_scale: float + The scaling of the covalent radii. + A smaller value will move the repulsion to a lower distances. + All distances larger than r_scale is cutoff. + power: int + The power of the repulsion. + rs1_cross: float + The scaled value of the inverse distance with scaling (r_scale) + that crosses the energy of 1 eV. + k_scale: float + The scaling of the repulsion energy after a default scaling + of the energy is calculated. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated object itself. + """ + # Set the arguments + if rs1_cross is not None: + self.rs1_cross = abs(float(rs1_cross)) + if k_scale is not None: + self.k_scale = abs(float(k_scale)) + # Update the arguments of the parent class + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + wrap=wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=False, + rs_cutoff=None, + re_cutoff=None, + r_scale=r_scale, + power=power, + dtype=dtype, + ) + return self + + def set_normalization_constant(self, **kwargs): + # Calculate the normalization + self.c0 = self.k_scale / ((1.0 / self.rs1_cross - 1.0) ** self.power) + self.c0p = -self.c0 * self.power * self.r_scale + return self + + def get_inv_dis( + self, + atoms, + not_masked, + i_nm, + use_forces, + use_vector, + use_include_ncells, + use_mic, + **kwargs, + ): + # Calculate the inverse distances + inv_dist, deriv = super().get_inv_dis( + atoms=atoms, + not_masked=not_masked, + i_nm=i_nm, + use_forces=use_forces, + use_vector=use_vector, + use_include_ncells=use_include_ncells, + use_mic=use_mic, + **kwargs, + ) + # Calculate the scaled inverse distances + inv_dist = self.r_scale * inv_dist - 1.0 + # Use only the repulsive part + inv_dist = where(inv_dist < 0.0, 0.0, inv_dist) + return inv_dist, deriv + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + reduce_dimensions=self.reduce_dimensions, + use_forces=self.use_forces, + wrap=self.wrap, + include_ncells=self.include_ncells, + mic=self.mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + r_scale=self.r_scale, + power=self.power, + dtype=self.dtype, + rs1_cross=self.rs1_cross, + k_scale=self.k_scale, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/baseline/idpp.py b/catlearn/regression/gp/baseline/idpp.py index e9eb51d5..258f8d32 100644 --- a/catlearn/regression/gp/baseline/idpp.py +++ b/catlearn/regression/gp/baseline/idpp.py @@ -1,43 +1,56 @@ -import numpy as np from .baseline import BaselineCalculator from ..fingerprint.geometry import get_full_distance_matrix class IDPP(BaselineCalculator): + """ + A baseline calculator for ASE Atoms instance. + It uses image dependent pair potential. + (https://doi.org/10.1063/1.4878664) + """ def __init__( self, target=[], + wrap=False, mic=False, + use_forces=True, + dtype=float, **kwargs, ): """ - A baseline calculator for ASE atoms object. - It uses image dependent pair potential. + Initialize the baseline calculator. Parameters: target: array The target distances for the IDPP. - mic : bool + wrap: bool + Whether to wrap the atoms to the unit cell or not. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - - See: - Improved initial guess for minimum energy path calculations. - Søren Smidstrup, Andreas Pedersen, Kurt Stokbro and Hannes Jónsson - Chem. Phys. 140, 214106 (2014) + use_forces: bool + Calculate and store the forces. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - super().__init__() - self.update_arguments( + super().__init__( + reduce_dimensions=False, target=target, + wrap=wrap, mic=mic, - **kwargs, + use_forces=use_forces, + dtype=dtype, ) def update_arguments( self, target=None, + wrap=None, mic=None, + use_forces=None, + dtype=None, **kwargs, ): """ @@ -47,28 +60,39 @@ def update_arguments( Parameters: target: array The target distances for the IDPP. - mic : bool + wrap: bool + Whether to wrap the atoms to the unit cell or not. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). + use_forces: bool + Calculate and store the forces. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + super().update_arguments( + reduce_dimensions=False, + use_forces=use_forces, + dtype=dtype, + ) if target is not None: self.target = target.copy() + if wrap is not None: + self.wrap = wrap if mic is not None: self.mic = mic return self - def get_energy_forces(self, atoms, get_derivatives=True, **kwargs): + def get_energy_forces(self, atoms, use_forces=True, **kwargs): "Get the energy and forces." # Get all distances - dis, dis_vec = get_full_distance_matrix( - atoms, - not_masked=None, - mic=self.mic, - vector=get_derivatives, - wrap=False, + dis, dis_vec = self.get_distances( + atoms=atoms, + use_vector=use_forces, ) # Get the number of atoms n_atoms = len(atoms) @@ -79,22 +103,45 @@ def get_energy_forces(self, atoms, get_derivatives=True, **kwargs): # Calculate the energy dis_t = dis - self.target dis_t2 = dis_t**2 - e = 0.5 * np.sum(weights * dis_t2) - if get_derivatives: + e = 0.5 * (weights * dis_t2).sum() + if use_forces: # Calculate the forces finner = 2.0 * (weights / dis_non) * dis_t2 finner -= weights * dis_t finner = finner / dis_non - f = np.sum(dis_vec * finner[:, :, None], axis=0) + f = (dis_vec * finner[:, :, None]).sum(axis=0) return e, f - return e + return e, None + + def get_distances( + self, + atoms, + use_vector, + **kwargs, + ): + "Calculate the distances." + dist, dist_vec = get_full_distance_matrix( + atoms=atoms, + not_masked=None, + use_vector=use_vector, + wrap=self.wrap, + include_ncells=False, + all_ncells=False, + mic=self.mic, + dtype=self.dtype, + **kwargs, + ) + return dist, dist_vec def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization arg_kwargs = dict( target=self.target, + wrap=self.wrap, mic=self.mic, + use_forces=self.use_forces, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/baseline/mie.py b/catlearn/regression/gp/baseline/mie.py index b8e07c32..fb5b4df9 100644 --- a/catlearn/regression/gp/baseline/mie.py +++ b/catlearn/regression/gp/baseline/mie.py @@ -1,80 +1,119 @@ -import numpy as np +from numpy import einsum from .repulsive import RepulsionCalculator class MieCalculator(RepulsionCalculator): + """ + A baseline calculator for ASE Atoms instance. + It uses the Mie potential baseline. + The power and the scaling of the Mie potential can be selected. + """ + implemented_properties = ["energy", "forces"] nolabel = True def __init__( self, reduce_dimensions=True, - r_scale=1.0, + use_forces=True, + wrap=True, + include_ncells=True, + mic=False, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=False, + rs_cutoff=3.0, + re_cutoff=4.0, + r_scale=0.7, denergy=0.1, power_r=8, power_a=6, - periodic_softmax=True, - mic=False, - wrap=True, - eps=1e-16, + dtype=float, **kwargs, ): """ - A baseline calculator for ASE atoms object. - It uses the Mie potential baseline. - The power and the scaling of the Mie potential can be selected. + Initialize the baseline calculator. Parameters: reduce_dimensions: bool - Whether to reduce the dimensions to only moving atoms - if constrains are used. - r_scale : float + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + r_scale: float The scaling of the covalent radii. - A smaller value will move the potential to a lower distances. + A smaller value will move the repulsion to a lower distances. denergy : float The dispersion energy of the potential. power_r : int - The power of the potential part. + The power of the repulsive part. power_a : int - The power of the attraction part. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances - when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + The power of the attractive part. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + wrap=wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, r_scale=r_scale, denergy=denergy, power_a=power_a, power_r=power_r, - periodic_softmax=periodic_softmax, - mic=mic, - wrap=wrap, - eps=eps, + dtype=dtype, **kwargs, ) def update_arguments( self, reduce_dimensions=None, + use_forces=None, + wrap=None, + include_ncells=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, r_scale=None, denergy=None, power_r=None, power_a=None, - periodic_softmax=None, - mic=None, - wrap=None, - eps=None, + dtype=None, **kwargs, ): """ @@ -83,106 +122,163 @@ def update_arguments( Parameters: reduce_dimensions: bool - Whether to reduce the dimensions to only moving atoms - if constrains are used. - r_scale : float + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + r_scale: float The scaling of the covalent radii. - A smaller value will move the potential to a lower distances. + A smaller value will move the repulsion to a lower distances. denergy : float The dispersion energy of the potential. power_r : int - The power of the potential part. + The power of the repulsive part. power_a : int - The power of the attraction part. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances - when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + The power of the attractive part. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if r_scale is not None: - self.r_scale = float(r_scale) + # Update the arguments of the class if denergy is not None: self.denergy = float(denergy) if power_r is not None: self.power_r = int(power_r) if power_a is not None: self.power_a = int(power_a) - if periodic_softmax is not None: - self.periodic_softmax = periodic_softmax - if mic is not None: - self.mic = mic - if wrap is not None: - self.wrap = wrap - if eps is not None: - self.eps = abs(float(eps)) + # Update the arguments of the parent class + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + wrap=wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + r_scale=r_scale, + power=None, + dtype=dtype, + ) + return self + + def set_normalization_constant(self, **kwargs): # Calculate the normalization power_ar = self.power_a / (self.power_r - self.power_a) - c0 = self.denergy * ( - ((self.power_r / self.power_a) ** power_ar) - * (self.power_r / (self.power_r - self.power_a)) - ) + c0 = self.denergy * ((self.power_r / self.power_a) ** power_ar) + c0 = c0 * (self.power_r / (self.power_r - self.power_a)) # Calculate the r_scale powers self.r_scale_r = c0 * (self.r_scale**self.power_r) self.r_scale_a = c0 * (self.r_scale**self.power_a) + self.power_ar = -self.power_a * self.r_scale_a + self.power_rr = -self.power_r * self.r_scale_r return self - def get_energy_forces(self, atoms, get_derivatives=True, **kwargs): - "Get the energy and forces." - # Get the not fixed (not masked) atom indicies - not_masked, masked = self.get_constraints(atoms) - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - # Get the inverse distances - f, g = self.get_inv_distances( - atoms, - not_masked, - masked, - get_derivatives, - **kwargs, - ) - # Calculate energy - energy = (self.r_scale_r * np.sum(f**self.power_r)) - ( - self.r_scale_a * np.sum(f**self.power_a) - ) - if get_derivatives: - forces = np.zeros((len(atoms), 3)) - power_ar = self.power_a * self.r_scale_a - power_rr = self.power_r * self.r_scale_r - inner = (power_ar * (f ** (self.power_a - 1))) - ( - power_rr * (f ** (self.power_r - 1)) - ) - derivs = np.sum(inner.reshape(-1, 1) * g, axis=0) - forces[not_masked] = derivs.reshape(-1, 3) - return energy, forces + def calc_energy( + self, + inv_dist, + not_masked, + i_nm, + use_include_ncells, + **kwargs, + ): + "Calculate the energy." + # Get the repulsive part + if use_include_ncells: + inv_dist_p = (inv_dist**self.power_r).sum(axis=0) + else: + inv_dist_p = inv_dist**self.power_r + # Take double countings into account + inv_dist_p[i_nm, not_masked] *= 2.0 + inv_dist_p[:, not_masked] *= 0.5 + energy = self.r_scale_r * inv_dist_p.sum() + # Get the attractive part + if use_include_ncells: + inv_dist_p = (inv_dist**self.power_a).sum(axis=0) + else: + inv_dist_p = inv_dist**self.power_a + # Take double countings into account + inv_dist_p[i_nm, not_masked] *= 2.0 + inv_dist_p[:, not_masked] *= 0.5 + energy -= self.r_scale_a * inv_dist_p.sum() return energy + def calc_forces( + self, + inv_dist, + deriv, + not_masked, + i_nm, + use_include_ncells=False, + **kwargs, + ): + "Calculate the forces." + # Calculate the derivative of the repulsive energy + inv_dist_p = inv_dist ** (self.power_r - 1) + # Calculate the forces + if use_include_ncells: + forces = einsum("dijc,dij->ic", deriv, inv_dist_p) + else: + forces = einsum("ijc,ij->ic", deriv, inv_dist_p) + forces *= self.power_rr + # Calculate the derivative of the attractive energy + inv_dist_p = inv_dist ** (self.power_a - 1) + # Calculate the forces + if use_include_ncells: + forces -= einsum("dijc,dij->ic", deriv, inv_dist_p) + else: + forces -= self.power_ar * einsum("ijc,ij->ic", deriv, inv_dist_p) + return forces + def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization arg_kwargs = dict( reduce_dimensions=self.reduce_dimensions, + use_forces=self.use_forces, + wrap=self.wrap, + include_ncells=self.include_ncells, + mic=self.mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, r_scale=self.r_scale, denergy=self.denergy, power_a=self.power_a, power_r=self.power_r, - periodic_softmax=self.periodic_softmax, - mic=self.mic, - wrap=self.wrap, - eps=self.eps, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/baseline/repulsive.py b/catlearn/regression/gp/baseline/repulsive.py index 0ac9cf4c..acc803f9 100644 --- a/catlearn/regression/gp/baseline/repulsive.py +++ b/catlearn/regression/gp/baseline/repulsive.py @@ -1,72 +1,116 @@ -import numpy as np +from numpy import arange, asarray, einsum, zeros +from ase.data import covalent_radii from .baseline import BaselineCalculator -from ..fingerprint.geometry import get_inverse_distances +from ..fingerprint.geometry import ( + get_constraints, + get_full_distance_matrix, + fp_cosine_cutoff, +) class RepulsionCalculator(BaselineCalculator): + """ + A baseline calculator for ASE Atoms instance. + It uses a repulsive Lennard-Jones potential baseline. + The power and the scaling of the repulsive Lennard-Jones potential + can be selected. + """ + implemented_properties = ["energy", "forces"] nolabel = True def __init__( self, reduce_dimensions=True, - r_scale=0.7, - power=12, - periodic_softmax=True, - mic=False, + use_forces=True, wrap=True, - eps=1e-16, + include_ncells=True, + mic=False, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=True, + rs_cutoff=3.0, + re_cutoff=4.0, + r_scale=0.7, + power=10, + dtype=float, **kwargs, ): """ - A baseline calculator for ASE atoms object. - It uses a repulsive Lennard-Jones potential baseline. - The power and the scaling of the repulsive Lennard-Jones potential - can be selected. + Initialize the baseline calculator. Parameters: reduce_dimensions: bool - Whether to reduce the dimensions to only moving atoms - if constrains are used. - r_scale : float + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + r_scale: float The scaling of the covalent radii. A smaller value will move the repulsion to a lower distances. - power : int + power: int The power of the repulsion. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances - when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + wrap=wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, r_scale=r_scale, power=power, - periodic_softmax=periodic_softmax, - mic=mic, - wrap=wrap, - eps=eps, + dtype=dtype, **kwargs, ) def update_arguments( self, reduce_dimensions=None, + use_forces=None, + wrap=None, + include_ncells=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, r_scale=None, power=None, - periodic_softmax=None, - mic=None, - wrap=None, - eps=None, + dtype=None, **kwargs, ): """ @@ -75,115 +119,326 @@ def update_arguments( Parameters: reduce_dimensions: bool - Whether to reduce the dimensions to only moving atoms - if constrains are used. - r_scale : float + Whether to reduce the fingerprint space if constrains are used. + use_forces: bool + Calculate and store the forces. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The distances will include the neighboring cells. + include_ncells will replace mic. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic or include_ncells. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + r_scale: float The scaling of the covalent radii. A smaller value will move the repulsion to a lower distances. - power : int + power: int The power of the repulsion. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances - when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_forces=use_forces, + dtype=dtype, + ) + if wrap is not None: + self.wrap = wrap + if include_ncells is not None: + self.include_ncells = include_ncells + if mic is not None: + self.mic = mic + if all_ncells is not None: + self.all_ncells = all_ncells + if cell_cutoff is not None: + self.cell_cutoff = abs(float(cell_cutoff)) + if use_cutoff is not None: + self.use_cutoff = use_cutoff + if rs_cutoff is not None: + self.rs_cutoff = abs(float(rs_cutoff)) + if re_cutoff is not None: + self.re_cutoff = abs(float(re_cutoff)) if r_scale is not None: - self.r_scale = r_scale + self.r_scale = abs(float(r_scale)) if power is not None: self.power = int(power) - if periodic_softmax is not None: - self.periodic_softmax = periodic_softmax - if mic is not None: - self.mic = mic - if wrap is not None: - self.wrap = wrap - if eps is not None: - self.eps = abs(float(eps)) + # Calculate the normalization + self.set_normalization_constant() + return self + + def set_normalization_constant(self, **kwargs): + "Set the normalization constant." # Calculate the normalization self.c0 = self.r_scale**self.power + self.c0p = -self.c0 * self.power return self - def get_energy_forces(self, atoms, get_derivatives=True, **kwargs): + def get_energy_forces(self, atoms, use_forces=True, **kwargs): "Get the energy and forces." - # Get the not fixed (not masked) atom indicies - not_masked, masked = self.get_constraints(atoms) - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - # Get the inverse distances - f, g = self.get_inv_distances( + # Get the not fixed (not masked) atom indices + not_masked, _ = get_constraints( atoms, - not_masked, - masked, - get_derivatives, + reduce_dimensions=self.reduce_dimensions, + ) + i_nm = arange(len(not_masked)) + # Check if there are any not masked atoms + if len(not_masked) == 0: + if use_forces: + return 0.0, zeros((len(atoms), 3), dtype=self.dtype) + return 0.0, None + # Check what distance method should be used + ( + use_vector, + use_include_ncells, + use_mic, + ) = self.use_dis_method(pbc=atoms.pbc, use_forces=use_forces, **kwargs) + # Calculate the inverse distances and their derivatives + inv_dist, deriv = self.get_inv_dis( + atoms=atoms, + not_masked=not_masked, + i_nm=i_nm, + use_forces=use_forces, + use_vector=use_vector, + use_include_ncells=use_include_ncells, + use_mic=use_mic, **kwargs, ) # Calculate energy - energy = self.c0 * np.sum(f**self.power) - if get_derivatives: - forces = np.zeros((len(atoms), 3), dtype=float) - c0p = -self.c0 * self.power - derivs = np.sum( - c0p * (f ** (self.power - 1)).reshape(-1, 1) * g, - axis=0, + energy = self.calc_energy( + inv_dist=inv_dist, + i_nm=i_nm, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + ) + # Calculate forces + if use_forces: + forces = zeros((len(atoms), 3), dtype=self.dtype) + forces[not_masked] = self.calc_forces( + inv_dist=inv_dist, + deriv=deriv, + i_nm=i_nm, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + **kwargs, ) - forces[not_masked] = derivs.reshape(-1, 3) return energy, forces + return energy, None + + def calc_energy( + self, + inv_dist, + not_masked, + i_nm, + use_include_ncells, + **kwargs, + ): + "Calculate the energy." + if use_include_ncells: + inv_dist_p = (inv_dist**self.power).sum(axis=0) + else: + inv_dist_p = inv_dist**self.power + # Take double countings into account + inv_dist_p[i_nm, not_masked] *= 2.0 + inv_dist_p[:, not_masked] *= 0.5 + energy = self.c0 * inv_dist_p.sum() return energy - def get_inv_distances( - self, atoms, not_masked, masked, get_derivatives, **kwargs + def calc_forces( + self, + inv_dist, + deriv, + not_masked, + i_nm, + use_include_ncells=False, + **kwargs, + ): + "Calculate the forces." + # Calculate the derivative of the energy + inv_dist_p = inv_dist ** (self.power - 1) + # Calculate the forces + if use_include_ncells: + forces = einsum("dijc,dij->ic", deriv, inv_dist_p) + else: + forces = einsum("ijc,ij->ic", deriv, inv_dist_p) + forces *= self.c0p + return forces + + def get_inv_dis( + self, + atoms, + not_masked, + i_nm, + use_forces, + use_vector, + use_include_ncells, + use_mic, + **kwargs, ): """ - Get the unique inverse distances scaled with the covalent radii - and its derivatives. + Get the inverse distances and their derivatives. + + Parameters: + atoms: ase.Atoms + The atoms object. + not_masked: list + The indices of the atoms that are not masked. + i_nm: list + The indices of the atoms that are not masked. + use_forces: bool + Whether to calculate the forces. + use_vector: bool + Whether to use the vector of the distances. + use_include_ncells: bool + Whether to include the neighboring cells when calculating + the distances. + use_mic: bool + Whether to use the minimum image convention. + + Returns: + inv_dist: array + The inverse distances. + deriv: array + The derivatives of the inverse distances. """ - # Get the indicies for not fixed and not fixed atoms interactions - nmi, nmj = np.triu_indices(len(not_masked), k=1, m=None) - nmi_ind = not_masked[nmi] - nmj_ind = not_masked[nmj] - f, g = get_inverse_distances( - atoms, + # Calculate the distances + dist, dist_vec = self.get_distances( + atoms=atoms, not_masked=not_masked, - masked=masked, - nmi=nmi, - nmj=nmj, - nmi_ind=nmi_ind, - nmj_ind=nmj_ind, - use_derivatives=get_derivatives, - use_covrad=True, - periodic_softmax=self.periodic_softmax, - mic=self.mic, + use_vector=use_vector, + use_include_ncells=use_include_ncells, + use_mic=use_mic, + **kwargs, + ) + # Get the covalent radii + cov_dis = self.get_covalent_distances( + atoms.get_atomic_numbers(), + not_masked, + ) + # Add a small number to avoid division by zero + dist += self.eps + # Check if the distances should be included in the neighboring cells + if use_include_ncells: + # Calculate the inverse distances + inv_dist = cov_dis[None, ...] / dist + # Remove self interaction + inv_dist[0, i_nm, not_masked] = 0.0 + else: + # Calculate the inverse distances + inv_dist = cov_dis / dist + # Remove self interaction + inv_dist[i_nm, not_masked] = 0.0 + # Calculate the derivatives + if use_forces: + deriv = dist_vec * (inv_dist / (dist**2))[..., None] + else: + deriv = None + # Calculate the cutoff function + if self.use_cutoff: + inv_dist, deriv = fp_cosine_cutoff( + inv_dist, + deriv, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + eps=self.eps, + ) + return inv_dist, deriv + + def get_distances( + self, + atoms, + not_masked, + use_vector, + use_include_ncells, + use_mic, + **kwargs, + ): + "Calculate the distances." + dist, dist_vec = get_full_distance_matrix( + atoms=atoms, + not_masked=not_masked, + use_vector=use_vector, wrap=self.wrap, - eps=self.eps, + include_ncells=use_include_ncells, + all_ncells=self.all_ncells, + mic=use_mic, + cell_cutoff=self.cell_cutoff, + dtype=self.dtype, **kwargs, ) - return f, g + return dist, dist_vec + + def get_covalent_distances(self, atomic_numbers, not_masked): + "Get the covalent distances of the atoms." + cov_dis = covalent_radii[atomic_numbers] + return asarray(cov_dis + cov_dis[not_masked, None], dtype=self.dtype) + + def use_dis_method(self, pbc, use_forces, **kwargs): + """ + Check what distance method should be used. + + Parameters: + pbc: bool + The periodic boundary conditions. + use_forces: bool + Whether to calculate the forces. + + Returns: + use_vector: bool + Whether to use the vector of the distances. + use_include_ncells: bool + Whether to include the neighboring cells when calculating + the distances. + use_mic: bool + Whether to use the minimum image convention. + """ + if not pbc.any(): + return use_forces, False, False + if self.include_ncells: + return True, True, False + if self.mic: + return True, False, True + return use_forces, False, False def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization arg_kwargs = dict( reduce_dimensions=self.reduce_dimensions, + use_forces=self.use_forces, + wrap=self.wrap, + include_ncells=self.include_ncells, + mic=self.mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, r_scale=self.r_scale, power=self.power, - periodic_softmax=self.periodic_softmax, - mic=self.mic, - wrap=self.wrap, - eps=self.eps, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/calculator/__init__.py b/catlearn/regression/gp/calculator/__init__.py index b7c5cda6..e856e813 100644 --- a/catlearn/regression/gp/calculator/__init__.py +++ b/catlearn/regression/gp/calculator/__init__.py @@ -1,5 +1,5 @@ from .database import Database -from .copy_atoms import copy_atoms +from .copy_atoms import compare_atoms, copy_atoms from .database_reduction import ( DatabaseReduction, DatabaseDistance, @@ -11,10 +11,11 @@ DatabasePointsInterest, DatabasePointsInterestEach, ) -from .mlmodel import ( - MLModel, +from .mlmodel import MLModel +from .default_model import ( get_default_model, get_default_database, + get_default_ensemble, get_default_mlmodel, ) from .hiermodel import HierarchicalMLModel @@ -23,6 +24,7 @@ __all__ = [ "Database", + "compare_atoms", "copy_atoms", "DatabaseReduction", "DatabaseDistance", @@ -36,6 +38,7 @@ "MLModel", "get_default_model", "get_default_database", + "get_default_ensemble", "get_default_mlmodel", "HierarchicalMLModel", "MLCalculator", diff --git a/catlearn/regression/gp/calculator/bocalc.py b/catlearn/regression/gp/calculator/bocalc.py index 91c5ed03..52df343f 100644 --- a/catlearn/regression/gp/calculator/bocalc.py +++ b/catlearn/regression/gp/calculator/bocalc.py @@ -1,8 +1,16 @@ from .mlcalc import MLCalculator +from ..fingerprint.geometry import sine_activation from ase.calculators.calculator import Calculator, all_changes class BOCalculator(MLCalculator): + """ + The machine learning calculator object applicable as an ASE calculator for + ASE Atoms instance. + This uses an acquisition function as the energy and forces. + E = E_pred + kappa * sigma + Therefore, it is Bayesian optimization calculator object. + """ # Define the properties available in this calculator implemented_properties = [ @@ -24,15 +32,17 @@ def __init__( calc_force_unc=False, calc_unc_deriv=True, calc_kwargs={}, + round_pred=None, kappa=2.0, + max_unc=None, + max_unc_scale=0.95, **kwargs, ): """ - Bayesian optimization calculator object - applicable as an ASE calculator. + Initialize the ML calculator. Parameters: - mlmodel : MLModel class object + mlmodel: MLModel class object Machine Learning model used for ASE Atoms and calculator. The object must have the functions: calculate, train_model, and add_training. @@ -47,12 +57,21 @@ def __init__( calc_unc_deriv: bool Whether to calculate the derivatives of the uncertainty of the energy. - calc_kwargs : dict + calc_kwargs: dict A dictionary with kwargs for the parent calculator class object. - kappa : float + round_pred: int (optional) + The number of decimals to round the preditions to. + If None, the predictions are not rounded. + kappa: float The weight of the uncertainty relative to the energy. If kappa>0, the uncertainty is added to the predicted energy. + max_unc: float (optional) + The maximum uncertainty value that can be added to the energy. + If the uncertainty is larger than the max_unc_scale times this + value, the cutoff is activated to limit the uncertainty. + max_unc_scale: float (optional) + The scale of the maximum uncertainty value to start the cutoff. """ super().__init__( mlmodel=mlmodel, @@ -61,7 +80,10 @@ def __init__( calc_force_unc=calc_force_unc, calc_unc_deriv=calc_unc_deriv, calc_kwargs=calc_kwargs, + round_pred=round_pred, kappa=kappa, + max_unc=max_unc, + max_unc_scale=max_unc_scale, **kwargs, ) @@ -70,7 +92,7 @@ def get_predicted_energy(self, atoms=None, **kwargs): Get the predicted energy without the uncertainty. Parameters: - atoms : ASE Atoms (optional) + atoms: ASE Atoms (optional) The ASE Atoms instance which is used if the uncertainty is not stored. @@ -84,7 +106,7 @@ def get_predicted_forces(self, atoms=None, **kwargs): Get the predicted forces without the derivatives of the uncertainty. Parameters: - atoms : ASE Atoms (optional) + atoms: ASE Atoms (optional) The ASE Atoms instance which is used if the uncertainty is not stored. @@ -114,7 +136,7 @@ def calculate( and predicted forces using *atoms.calc.get_predicted_forces(atoms)*. Returns: - self.results : dict + self.results: dict A dictionary with all the calculated properties. """ # Atoms object. @@ -135,22 +157,44 @@ def calculate( get_unc_derivatives=get_unc_derivatives, ) # Store the properties that are implemented - for key, value in results.items(): - if key in self.implemented_properties: - self.results[key] = value + self.store_properties(results) # Save the predicted properties - self.results["predicted energy"] = results["energy"] + self.modify_results_bo( + get_forces=get_forces, + ) + return self.results + + def modify_results_bo( + self, + get_forces, + **kwargs, + ): + """ + Modify the results of the Bayesian optimization calculator. + """ + # Save the predicted properties + self.results["predicted energy"] = self.results["energy"] if get_forces: - self.results["predicted forces"] = results["forces"].copy() + self.results["predicted forces"] = self.results["forces"].copy() # Calculate the acquisition function and its derivative if self.kappa != 0.0: - self.results["energy"] = ( - results["energy"] + self.kappa * results["uncertainty"] - ) + # Get the uncertainty and its derivatives + unc = self.results["uncertainty"] if get_forces: - self.results["forces"] = results["forces"] - ( - self.kappa * results["uncertainty derivatives"] + unc_deriv = self.results["uncertainty derivatives"] + else: + unc_deriv = None + # Limit the uncertainty to the maximum uncertainty + if self.max_unc is not None and unc > self.max_unc_start: + unc, unc_deriv = self.max_unc_activation( + unc, + unc_deriv=unc_deriv, + use_derivatives=get_forces, ) + # Add the uncertainty to the energy and forces + self.results["energy"] += self.kappa * unc + if get_forces: + self.results["forces"] -= self.kappa * unc_deriv return self.results def update_arguments( @@ -161,7 +205,10 @@ def update_arguments( calc_force_unc=None, calc_unc_deriv=None, calc_kwargs=None, + round_pred=None, kappa=None, + max_unc=None, + max_unc_scale=None, **kwargs, ): """ @@ -169,7 +216,7 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - mlmodel : MLModel class object + mlmodel: MLModel class object Machine Learning model used for ASE Atoms and calculator. The object must have the functions: calculate, train_model, and add_training. @@ -184,30 +231,64 @@ def update_arguments( calc_unc_deriv: bool Whether to calculate the derivatives of the uncertainty of the energy. - calc_kwargs : dict + calc_kwargs: dict A dictionary with kwargs for the parent calculator class object. - kappa : float + round_pred: int (optional) + The number of decimals to round the preditions to. + If None, the predictions are not rounded. + kappa: float The weight of the uncertainty relative to the energy. + max_unc: float (optional) + The maximum uncertainty value that can be added to the energy. + If the uncertainty is larger than the max_unc_scale times this + value, the cutoff is activated to limit the uncertainty. + max_unc_scale: float (optional) + The scale of the maximum uncertainty value to start the cutoff. Returns: self: The updated object itself. """ - if mlmodel is not None: - self.mlmodel = mlmodel.copy() - if calc_forces is not None: - self.calc_forces = calc_forces - if calc_unc is not None: - self.calc_unc = calc_unc - if calc_force_unc is not None: - self.calc_force_unc = calc_force_unc - if calc_unc_deriv is not None: - self.calc_unc_deriv = calc_unc_deriv - if calc_kwargs is not None: - self.calc_kwargs = calc_kwargs.copy() + # Set the parameters in the parent class + super().update_arguments( + mlmodel=mlmodel, + calc_forces=calc_forces, + calc_unc=calc_unc, + calc_force_unc=calc_force_unc, + calc_unc_deriv=calc_unc_deriv, + calc_kwargs=calc_kwargs, + round_pred=round_pred, + ) + # Set the kappa value if kappa is not None: - self.kappa = float(kappa) - # Empty the results + self.set_kappa(kappa) + elif not hasattr(self, "kappa"): + self.set_kappa(0.0) + # Set the maximum uncertainty value + if max_unc is not None: + self.max_unc = abs(float(max_unc)) + elif not hasattr(self, "max_unc"): + self.max_unc = None + if max_unc_scale is not None: + self.max_unc_scale = float(max_unc_scale) + if self.max_unc_scale > 1.0: + raise ValueError( + "max_unc_scale must be less than or equal to 1.0" + ) + if self.max_unc is not None: + self.max_unc_start = self.max_unc_scale * self.max_unc + return self + + def set_kappa(self, kappa, **kwargs): + """ + Set the kappa value. + The kappa value is used to calculate the acquisition function. + + Parameters: + kappa: float + The weight of the uncertainty relative to the energy. + """ + self.kappa = float(kappa) self.reset() return self @@ -247,6 +328,22 @@ def get_property_arguments(self, properties=[], **kwargs): get_unc_derivatives, ) + def max_unc_activation(self, unc, unc_deriv=None, use_derivatives=False): + # Calculate the activation function + fc, gc = sine_activation( + unc, + use_derivatives=use_derivatives, + xs_activation=self.max_unc_start, + xe_activation=self.max_unc, + ) + # Calculate the derivative of the uncertainty + if use_derivatives: + unc_deriv = unc_deriv * (1.0 - fc) + unc_deriv += gc * (self.max_unc_start - unc) + # Apply the activation function to the uncertainty + unc = (unc * (1.0 - fc)) + (self.max_unc * fc) + return unc, unc_deriv + def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization @@ -257,7 +354,10 @@ def get_arguments(self): calc_force_unc=self.calc_force_unc, calc_unc_deriv=self.calc_unc_deriv, calc_kwargs=self.calc_kwargs, + round_pred=self.round_pred, kappa=self.kappa, + max_unc=self.max_unc, + max_unc_scale=self.max_unc_scale, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/calculator/copy_atoms.py b/catlearn/regression/gp/calculator/copy_atoms.py index 1bf0a6f3..493d4e8f 100644 --- a/catlearn/regression/gp/calculator/copy_atoms.py +++ b/catlearn/regression/gp/calculator/copy_atoms.py @@ -1,36 +1,115 @@ -import numpy as np +from numpy import array, asarray, isscalar, ndarray from ase.calculators.calculator import Calculator, PropertyNotImplementedError -def copy_atoms(atoms, **kwargs): +def copy_atoms(atoms, results={}, **kwargs): """ - Copy the atoms object together with the calculated properties. + Copy the atoms instance together with the calculated properties. Parameters: - atoms : ASE Atoms - The ASE Atoms object with a calculator that is copied. + atoms: ASE Atoms instance + The ASE Atoms instance with a calculator that is copied. + results: dict (optional) + The properties to be saved in the calculator. + If not given, the properties are taken from the calculator. Returns: - atoms0 : ASE Atoms - The copy of the Atoms object with saved data in the calculator. + atoms0: ASE Atoms instance + The copy of the Atoms instance with saved data in the calculator. """ - # Save the properties calculated - if atoms.calc is not None: - results = atoms.calc.results.copy() - else: - results = {} - # Copy the ASE Atoms object + # Check if results are given + if not isinstance(results, dict) or len(results) == 0: + # Save the properties calculated + if atoms.calc is not None and atoms.calc.atoms is not None: + if compare_atoms(atoms, atoms.calc.atoms): + results = atoms.calc.results.copy() + # Copy the ASE Atoms instance atoms0 = atoms.copy() # Store the properties in a calculator atoms0.calc = StoredDataCalculator(atoms, **results) return atoms0 +def compare_atoms( + atoms0, + atoms1, + tol=1e-8, + properties_to_check=["atoms", "positions"], + **kwargs, +): + """ + Compare two atoms instances. + + Parameters: + atoms0: ASE Atoms instance + The first ASE Atoms instance. + atoms1: ASE Atoms + The second ASE Atoms instance. + tol: float (optional) + The tolerance for the comparison. + properties_to_check: list (optional) + The properties to be compared. + + Returns: + bool: True if the atoms instances are equal otherwise False. + """ + # Check if the number of atoms is equal + if len(atoms0) != len(atoms1): + return False + # Check if the chemical symbols are equal + if "atoms" in properties_to_check: + if not ( + asarray(atoms0.get_chemical_symbols()) + == asarray(atoms1.get_chemical_symbols()) + ).all(): + return False + # Check if the positions are equal + if "positions" in properties_to_check: + if abs(atoms0.get_positions() - atoms1.get_positions()).max() > tol: + return False + # Check if the cell is equal + if "cell" in properties_to_check: + if abs(atoms0.get_cell() - atoms1.get_cell()).max() > tol: + return False + # Check if the pbc is equal + if "pbc" in properties_to_check: + if not (asarray(atoms0.get_pbc()) == asarray(atoms1.get_pbc())).all(): + return False + # Check if the initial charges are equal + if "initial_charges" in properties_to_check: + if ( + abs( + atoms0.get_initial_charges() - atoms1.get_initial_charges() + ).max() + > tol + ): + return False + # Check if the initial magnetic moments are equal + if "initial_magnetic_moments" in properties_to_check: + if ( + abs( + atoms0.get_initial_magnetic_moments() + - atoms1.get_initial_magnetic_moments() + ).max() + > tol + ): + return False + # Check if the momenta are equal + if "momenta" in properties_to_check: + if abs(atoms0.get_momenta() - atoms1.get_momenta()).max() > tol: + return False + # Check if the velocities are equal + if "velocities" in properties_to_check: + if abs(atoms0.get_velocities() - atoms1.get_velocities()).max() > tol: + return False + return True + + class StoredDataCalculator(Calculator): """ A special calculator that store the data (results) of a single configuration. - It will raise an exception if the atoms object is changed. + It will raise an exception if the atoms instance is changed. """ name = "unknown" @@ -38,26 +117,38 @@ class StoredDataCalculator(Calculator): def __init__( self, atoms, + dtype=float, **results, ): - """Save the properties for the given configuration.""" + """ + Save the properties for the given configuration. + + Parameters: + atoms: ASE Atoms instance + The ASE Atoms instance which is used. + dtype: data type + The data type of the properties. + results: dict + The properties to be saved in the calculator. + If not given, the properties are taken from the calculator. + """ super().__init__() self.results = {} # Save the properties - for property, value in results.items(): + for prop, value in results.items(): if value is None: continue elif isinstance(value, (float, int)): - self.results[property] = value + self.results[prop] = value else: - self.results[property] = np.array(value, dtype=float) + self.results[prop] = array(value, dtype=dtype) # Save the configuration self.atoms = atoms.copy() def __str__(self): tokens = [] for key, val in sorted(self.results.items()): - if np.isscalar(val): + if isscalar(val): txt = "{}={}".format(key, val) else: txt = "{}=...".format(key) @@ -76,6 +167,20 @@ def get_property(self, name, atoms=None, allow_calculation=True): return None # Return the property result = self.results[name] - if isinstance(result, np.ndarray): + if isinstance(result, (ndarray, list)): result = result.copy() return result + + def get_uncertainty(self, atoms=None, **kwargs): + """ + Get the predicted uncertainty of the energy. + + Parameters: + atoms: ASE Atoms (optional) + The ASE Atoms instance which is used + if the uncertainty is not stored. + + Returns: + float: The predicted uncertainty of the energy. + """ + return self.get_property("uncertainty", atoms=atoms) diff --git a/catlearn/regression/gp/calculator/database.py b/catlearn/regression/gp/calculator/database.py index 3dd93d56..93dbdd68 100644 --- a/catlearn/regression/gp/calculator/database.py +++ b/catlearn/regression/gp/calculator/database.py @@ -1,44 +1,61 @@ -import numpy as np +from numpy import array, asarray, concatenate +from numpy import round as round_ +from numpy.random import default_rng, Generator, RandomState from scipy.spatial.distance import cdist from ase.constraints import FixAtoms -from ase.io import write +from ase.io.trajectory import TrajectoryWriter from .copy_atoms import copy_atoms class Database: + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + """ + def __init__( self, fingerprint=None, reduce_dimensions=True, use_derivatives=True, use_fingerprint=True, + round_targets=None, + seed=None, + dtype=float, **kwargs, ): """ - Database of ASE atoms objects that are converted - into fingerprints and targets. + Initialize the database. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed can also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. """ # The negative forces have to be used since the derivatives are used self.use_negative_forces = True # Use default fingerprint if it is not given if fingerprint is None: - from ..fingerprint.cartesian import Cartesian - - fingerprint = Cartesian( + self.set_default_fp( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + dtype=dtype, ) # Set the arguments self.update_arguments( @@ -46,6 +63,9 @@ def __init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, **kwargs, ) @@ -54,7 +74,7 @@ def add(self, atoms, **kwargs): Add an ASE Atoms object to the database. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. Returns: @@ -68,7 +88,7 @@ def add_set(self, atoms_list, **kwargs): Add a set of ASE Atoms objects to the database. Parameters: - atoms_list : list or ASE Atoms + atoms_list: list or ASE Atoms A list of or a single ASE Atoms with calculated energies and forces. @@ -81,36 +101,37 @@ def add_set(self, atoms_list, **kwargs): def get_constraints(self, atoms, **kwargs): """ - Get the indicies of the atoms that does not have fixed constraints. + Get the indices of the atoms that does not have fixed constraints. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. Returns: - not_masked : list - A list of indicies for the moving atoms. + not_masked: list + A list of indices for the moving atoms. """ not_masked = list(range(len(atoms))) if not self.reduce_dimensions: return not_masked constraints = atoms.constraints - if len(constraints) > 0: - index_mask = [ + if len(constraints): + masked = [ c.get_indices() for c in constraints if isinstance(c, FixAtoms) ] - index_mask = set(np.concatenate(index_mask)) - return list(set(not_masked).difference(index_mask)) + if len(masked): + masked = set(concatenate(masked)) + return list(set(not_masked).difference(masked)) return not_masked - def get_atoms(self, **kwargs): + def get_data_atoms(self, **kwargs): """ Get the list of atoms in the database. Returns: list: A list of the saved ASE Atoms objects. """ - return self.atoms_list.copy() + return [self.copy_atoms(atoms) for atoms in self.atoms_list] def get_features(self, **kwargs): """ @@ -119,7 +140,9 @@ def get_features(self, **kwargs): Returns: array: A matrix array with the saved features or fingerprints. """ - return np.array(self.features) + if self.use_fingerprint: + return asarray(self.features) + return array(self.features, dtype=self.dtype) def get_targets(self, **kwargs): """ @@ -128,20 +151,48 @@ def get_targets(self, **kwargs): Returns: array: A matrix array with the saved targets. """ - return np.array(self.targets) + return array(self.targets, dtype=self.dtype) - def save_data(self, trajectory="data.traj", **kwargs): + def save_data( + self, + trajectory="data.traj", + mode="w", + write_last=False, + **kwargs, + ): """ Save the ASE Atoms data to a trajectory. Parameters: - trajectory : str + trajectory: str or TrajectoryWriter instance The name of the trajectory file where the data is saved. + Or a TrajectoryWriter instance where the data is saved to. + mode: str + The mode of the trajectory file. + write_last: bool + Whether to only write the last atoms instance to the + trajectory. + If False, all atoms instances in the database are written + to the trajectory. Returns: self: The updated object itself. """ - write(trajectory, self.get_atoms()) + if trajectory is None: + return self + if isinstance(trajectory, str): + with TrajectoryWriter(trajectory, mode=mode) as traj: + if write_last: + traj.write(self.atoms_list[-1]) + else: + for atoms in self.atoms_list: + traj.write(atoms) + elif isinstance(trajectory, TrajectoryWriter): + if write_last: + trajectory.write(self.atoms_list[-1]) + else: + for atoms in self.atoms_list: + trajectory.write(atoms) return self def copy_atoms(self, atoms, **kwargs): @@ -149,7 +200,7 @@ def copy_atoms(self, atoms, **kwargs): Copy the atoms object together with the calculated properties. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator that is copied. Returns: @@ -164,7 +215,7 @@ def make_atoms_feature(self, atoms, **kwargs): It can e.g. be used for predicting. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. Returns: @@ -176,18 +227,47 @@ def make_atoms_feature(self, atoms, **kwargs): return self.fingerprint(atoms) return self.fingerprint(atoms).get_vector() + def append_target(self, atoms, **kwargs): + """ + Append the target(s) to the list. + + Parameters: + atoms: ASE Atoms + The ASE Atoms object with a calculator. + + Returns: + self: The updated object + """ + # Make the target(s) + target = self.make_target( + atoms, + use_derivatives=self.use_derivatives, + use_negative_forces=self.use_negative_forces, + **kwargs, + ) + # Round the target if needed + if self.round_targets is not None: + target = round_(target, self.round_targets) + # Append the target(s) + self.targets.append(target) + return self + def make_target( - self, atoms, use_derivatives=True, use_negative_forces=True, **kwargs + self, + atoms, + use_derivatives=True, + use_negative_forces=True, + **kwargs, ): """ Calculate the target as the energy and forces if selected. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_negative_forces : bool + use_negative_forces: bool Whether derivatives (True) or forces (False) are used. Returns: @@ -200,11 +280,12 @@ def make_target( e = atoms.get_potential_energy() if use_derivatives: not_masked = self.get_constraints(atoms) - f = (atoms.get_forces()[not_masked]).reshape(-1) + f = atoms.get_forces(apply_constraint=False) + f = f[not_masked].reshape(-1) if use_negative_forces: - return np.concatenate([[e], -f]).reshape(-1) - return np.concatenate([[e], f]).reshape(-1) - return np.array([e]) + return concatenate([[e], -f], dtype=self.dtype).reshape(-1) + return concatenate([[e], f], dtype=self.dtype).reshape(-1) + return array([e], dtype=self.dtype) def reset_database(self, **kwargs): """ @@ -223,9 +304,9 @@ def is_in_database(self, atoms, dtol=1e-8, **kwargs): Check if the ASE Atoms is in the database. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. - dtol : float + dtol: float The tolerance value to determine identical Atoms. Returns: @@ -241,9 +322,12 @@ def is_in_database(self, atoms, dtol=1e-8, **kwargs): # Transform the fingerprints into vectors if self.use_fingerprint: fp_atoms = fp_atoms.get_vector() - fp_database = np.array([fp.get_vector() for fp in fp_database]) + fp_database = asarray( + [fp.get_vector() for fp in fp_database], + dtype=self.dtype, + ) # Get the minimum distance between atoms object and the database - dis_min = np.min(cdist([fp_atoms], fp_database)) + dis_min = cdist([fp_atoms], fp_database).min() # Check if the atoms object is in the database if dis_min < dtol: return True @@ -258,12 +342,7 @@ def append(self, atoms, **kwargs): # Append the feature self.features.append(self.make_atoms_feature(atoms)) # Append the target(s) - target = self.make_target( - atoms, - use_derivatives=self.use_derivatives, - use_negative_forces=self.use_negative_forces, - ) - self.targets.append(target) + self.append_target(atoms) return self def get_use_derivatives(self): @@ -281,12 +360,143 @@ def get_use_fingerprint(self): "Get whether a fingerprint is used as the features." return self.use_fingerprint + def set_fingerprint(self, fingerprint, **kwargs): + """ + Set the fingerprint instance. + + Parameters: + fingerprint: Fingerprint object + An object as a fingerprint class + that convert atoms to fingerprint. + + Returns: + self: The updated object itself. + """ + self.fingerprint = fingerprint.copy() + # Reset the database if the use fingerprint is changed + self.reset_database() + return self + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the data type of the fingerprint + self.fingerprint.set_dtype(dtype) + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether the kernel uses fingerprint objects (True) + or arrays (False). + + Parameters: + use_fingerprint: bool + Whether the kernel uses fingerprint objects (True) + or arrays (False). + + Returns: + self: The updated object itself. + """ + # Check if the use fingerprint is already set + if hasattr(self, "use_fingerprint"): + if self.use_fingerprint == use_fingerprint: + return self + # Set the use fingerprint + self.use_fingerprint = use_fingerprint + # Reset the database if the use fingerprint is changed + self.reset_database() + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives/forces in the targets. + + Parameters: + use_derivatives: bool + Whether to use derivatives/forces in the targets. + + Returns: + self: The updated object itself. + """ + # Check if the use derivatives is already set + if hasattr(self, "use_derivatives"): + if self.use_derivatives == use_derivatives: + return self + # Set the use derivatives + self.use_derivatives = use_derivatives + # Set the use derivatives of the fingerprint + if use_derivatives: + self.fingerprint.set_use_derivatives(use_derivatives) + # Reset the database if the use derivatives is changed + self.reset_database() + return self + + def set_reduce_dimensions(self, reduce_dimensions, **kwargs): + """ + Set whether to reduce the fingerprint space if constrains are used. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + + Returns: + self: The updated object itself. + """ + # Check if the reduce_dimensions is already set + if hasattr(self, "reduce_dimensions"): + if self.reduce_dimensions == reduce_dimensions: + return self + # Set the reduce dimensions + self.reduce_dimensions = reduce_dimensions + # Set the reduce dimensions of the fingerprint + self.fingerprint.set_reduce_dimensions(reduce_dimensions) + # Reset the database if the reduce dimensions is changed + self.reset_database() + return self + + def set_seed(self, seed=None): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + def update_arguments( self, fingerprint=None, reduce_dimensions=None, use_derivatives=None, use_fingerprint=None, + round_targets=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -294,45 +504,66 @@ def update_arguments( if they are not given. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ - # Control if the database has to be reset - reset_database = False if fingerprint is not None: - self.fingerprint = fingerprint.copy() - reset_database = True + self.set_fingerprint(fingerprint) if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - reset_database = True + self.set_reduce_dimensions(reduce_dimensions) if use_derivatives is not None: - self.use_derivatives = use_derivatives - reset_database = True + self.set_use_derivatives(use_derivatives) if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint - reset_database = True + self.set_use_fingerprint(use_fingerprint) + if round_targets is not None or not hasattr(self, "round_targets"): + self.round_targets = round_targets + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) # Check that the database and the fingerprint have the same attributes self.check_attributes() - # Reset the database if an argument has been changed - if reset_database: - self.reset_database() return self + def set_default_fp( + self, + reduce_dimensions=True, + use_derivatives=True, + dtype=float, + **kwargs, + ): + "Use default fingerprint if it is not given." + from ..fingerprint.cartesian import Cartesian + + return Cartesian( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + dtype=dtype, + **kwargs, + ) + def check_attributes(self): "Check if all attributes agree between the class and subclasses." if self.reduce_dimensions != self.fingerprint.get_reduce_dimensions(): - raise Exception( + raise ValueError( "Database and Fingerprint do not agree " "whether to reduce dimensions!" ) @@ -350,6 +581,9 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, + round_targets=self.round_targets, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/calculator/database_reduction.py b/catlearn/regression/gp/calculator/database_reduction.py index 2165021a..d7adb4cd 100644 --- a/catlearn/regression/gp/calculator/database_reduction.py +++ b/catlearn/regression/gp/calculator/database_reduction.py @@ -1,57 +1,83 @@ -import numpy as np +from numpy import ( + append, + arange, + argmax, + argmin, + argsort, + array, + asarray, + delete, + einsum, + nanmin, + sqrt, +) from scipy.spatial.distance import cdist from .database import Database -from ase.io import write class DatabaseReduction(Database): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done with a method that is defined in the class. + """ + def __init__( self, fingerprint=None, reduce_dimensions=True, use_derivatives=True, use_fingerprint=True, + round_targets=None, + seed=None, + dtype=float, npoints=25, - initial_indicies=[0], + initial_indices=[0], include_last=1, **kwargs, ): """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from a method. + Initialize the database. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. """ # The negative forces have to be used since the derivatives are used self.use_negative_forces = True - # Set initial indicies - self.indicies = [] + # Set initial indices + self.indices = [] # Use default fingerprint if it is not given if fingerprint is None: - from ..fingerprint.cartesian import Cartesian - - fingerprint = Cartesian( + self.set_default_fp( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + dtype=dtype, ) # Set the arguments self.update_arguments( @@ -59,8 +85,11 @@ def __init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, npoints=npoints, - initial_indicies=initial_indicies, + initial_indices=initial_indices, include_last=include_last, **kwargs, ) @@ -71,8 +100,11 @@ def update_arguments( reduce_dimensions=None, use_derivatives=None, use_fingerprint=None, + round_targets=None, + seed=None, + dtype=None, npoints=None, - initial_indicies=None, + initial_indices=None, include_last=None, **kwargs, ): @@ -81,82 +113,82 @@ def update_arguments( if they are not given. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. Returns: self: The updated object itself. """ - # Control if the database has to be reset - reset_database = False - if fingerprint is not None: - self.fingerprint = fingerprint.copy() - reset_database = True - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - reset_database = True - if use_derivatives is not None: - self.use_derivatives = use_derivatives - reset_database = True - if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint - reset_database = True + # Set the parameters in the parent class + super().update_arguments( + fingerprint=fingerprint, + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + ) + # Set the number of points to use if npoints is not None: self.npoints = int(npoints) - if initial_indicies is not None: - self.initial_indicies = np.array(initial_indicies, dtype=int) + # Set the initial indices to keep fixed + if initial_indices is not None: + self.initial_indices = array(initial_indices, dtype=int) + # Set the number of last points to include if include_last is not None: self.include_last = int(abs(include_last)) # Check that too many last points are not included - n_extra = self.npoints - len(self.initial_indicies) + n_extra = self.npoints - len(self.initial_indices) if self.include_last > n_extra: self.include_last = n_extra if n_extra >= 0 else 0 - # Check that the database and the fingerprint have the same attributes - self.check_attributes() - # Reset the database if an argument has been changed - if reset_database: - self.reset_database() # Store that the data base has changed - self.update_indicies = True + self.update_indices = True return self - def get_all_atoms(self, **kwargs): + def get_all_data_atoms(self, **kwargs): """ Get the list of all atoms in the database. Returns: list: A list of the saved ASE Atoms objects. """ - return self.atoms_list.copy() + return super().get_data_atoms(**kwargs) - def get_atoms(self, **kwargs): + def get_data_atoms(self, **kwargs): """ Get the list of atoms in the reduced database. Returns: list: A list of the saved ASE Atoms objects. """ - indicies = self.get_reduction_indicies() - return [ - atoms - for i, atoms in enumerate(self.get_all_atoms(**kwargs)) - if i in indicies - ] + indices = self.get_reduction_indices() + atoms_list = self.get_all_data_atoms(**kwargs) + return [atoms_list[i] for i in indices] def get_features(self, **kwargs): """ @@ -165,15 +197,17 @@ def get_features(self, **kwargs): Returns: array: A matrix array with the saved features or fingerprints. """ - indicies = self.get_reduction_indicies() - return np.array(self.features)[indicies] + indices = self.get_reduction_indices() + if self.use_fingerprint: + return array(self.features)[indices] + return array(self.features, dtype=self.dtype)[indices] def get_all_feature_vectors(self, **kwargs): "Get all the features in numpy array form." if self.use_fingerprint: features = [feature.get_vector() for feature in self.features] - return np.array(features) - return np.array(self.features) + return array(features, dtype=self.dtype) + return array(self.features, dtype=self.dtype) def get_targets(self, **kwargs): """ @@ -182,8 +216,8 @@ def get_targets(self, **kwargs): Returns: array: A matrix array with the saved targets. """ - indicies = self.get_reduction_indicies() - return np.array(self.targets)[indicies] + indices = self.get_reduction_indices() + return array(self.targets, dtype=self.dtype)[indices] def get_all_targets(self, **kwargs): """ @@ -192,92 +226,79 @@ def get_all_targets(self, **kwargs): Returns: array: A matrix array with the saved targets. """ - return np.array(self.targets) + return array(self.targets, dtype=self.dtype) - def get_initial_indicies(self, **kwargs): + def get_initial_indices(self, **kwargs): """ - Get the initial indicies of the used atoms in the database. + Get the initial indices of the used atoms in the database. Returns: - array: The initial indicies of the atoms used. + array: The initial indices of the atoms used. """ - return self.initial_indicies.copy() + return array(self.initial_indices, dtype=int) - def get_last_indicies(self, indicies, not_indicies, **kwargs): + def get_last_indices(self, indices, not_indices, **kwargs): """ - Include the last indicies that are not in the used indicies list. + Include the last indices that are not in the used indices list. Parameters: - indicies : list - A list of used indicies. - not_indicies : list - A list of indicies that not used yet. + indices: list + A list of used indices. + not_indices: list + A list of indices that not used yet. Returns: - list: A list of the used indicies including the last indicies. + list: A list of the used indices including the last indices. """ if self.include_last != 0: - indicies = np.append( - indicies, - [not_indicies[-self.include_last :]], + last = -self.include_last + indices = append( + indices, + [not_indices[last:]], ) - return indicies + return indices - def get_not_indicies(self, indicies, all_indicies, **kwargs): + def get_not_indices(self, indices, all_indices, **kwargs): """ - Get a list of the indicies that are not in the used indicies list. + Get a list of the indices that are not in the used indices list. Parameters: - indicies : list - A list of indicies. - all_indicies : list - A list of all indicies. + indices: list + A list of indices. + all_indices: list + A list of all indices. Returns: - list: A list of indicies that not used. + list: A list of indices that not used. """ - return list(set(all_indicies).difference(indicies)) - - def save_data(self, trajectory="data.traj", **kwargs): - """ - Save the ASE Atoms data to a trajectory. - - Parameters: - trajectory : str - The name of the trajectory file where the data is saved. - - Returns: - self: The updated object itself. - """ - write(trajectory, self.get_all_atoms()) - return self + return list(set(all_indices).difference(indices)) def append(self, atoms, **kwargs): "Append the atoms object, the fingerprint, and target(s) to lists." # Store that the data base has changed - self.update_indicies = True + self.update_indices = True # Append to the data base super().append(atoms, **kwargs) return self - def get_reduction_indicies(self, **kwargs): - "Get the indicies of the reduced data used." - # If the indicies is already calculated then give them - if not self.update_indicies: - return self.indicies - # Set up all the indicies - self.update_indicies = False + def get_reduction_indices(self, **kwargs): + "Get the indices of the reduced data used." + # If the indices is already calculated then give them + if not self.update_indices: + return self.indices + # Set up all the indices + self.update_indices = False data_len = self.__len__() - all_indicies = np.arange(data_len) + all_indices = arange(data_len) # No reduction is needed if the database is not large if data_len <= self.npoints: - self.indicies = all_indicies.copy() - return self.indicies + self.indices = all_indices.copy() + return self.indices # Reduce the data base - self.indicies = self.make_reduction(all_indicies) - return self.indicies + self.indices = self.make_reduction(all_indices) + return self.indices - def make_reduction(self, all_indicies, **kwargs): + def make_reduction(self, all_indices, **kwargs): "Make the reduction of the data base with a chosen method." raise NotImplementedError() @@ -289,205 +310,147 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, + round_targets=self.round_targets, + seed=self.seed, + dtype=self.dtype, npoints=self.npoints, - initial_indicies=self.initial_indicies, + initial_indices=self.initial_indices, include_last=self.include_last, ) # Get the constants made within the class - constant_kwargs = dict(update_indicies=self.update_indicies) + constant_kwargs = dict(update_indices=self.update_indices) # Get the objects made within the class object_kwargs = dict( atoms_list=self.atoms_list.copy(), features=self.features.copy(), targets=self.targets.copy(), - indicies=self.indicies.copy(), + indices=self.indices.copy(), ) return arg_kwargs, constant_kwargs, object_kwargs class DatabaseDistance(DatabaseReduction): - def __init__( - self, - fingerprint=None, - reduce_dimensions=True, - use_derivatives=True, - use_fingerprint=True, - npoints=25, - initial_indicies=[0], - include_last=1, - **kwargs, - ): - """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from the distances. - - Parameters: - fingerprint : Fingerprint object - An object as a fingerprint class - that convert atoms to fingerprint. - reduce_dimensions: bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Whether to use derivatives/forces in the targets. - use_fingerprint : bool - Whether the kernel uses fingerprint objects (True) - or arrays (False). - npoints : int - Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included - in the used data base. - include_last : int - Number of last data point to include in the used data base. - """ - super().__init__( - fingerprint=fingerprint, - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=npoints, - initial_indicies=initial_indicies, - include_last=include_last, - **kwargs, - ) - - def make_reduction(self, all_indicies, **kwargs): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done by selecting the points with the + largest distances from each other. + """ + + def make_reduction(self, all_indices, **kwargs): "Reduce the training set with the points farthest from each other." - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) + indices = self.get_last_indices(indices, not_indices) # Get a random index if no fixed index exist - if len(indicies) == 0: - indicies = np.array([np.random.choice(all_indicies)]) + if len(indices) == 0: + indices = asarray([self.rng.choice(not_indices)], dtype=int) + not_indices = self.get_not_indices(indices, all_indices) # Get all the features features = self.get_all_feature_vectors() fdim = len(features[0]) - for i in range(len(indicies), self.npoints): - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + for i in range(len(indices), self.npoints): + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Calculate the distances to the points already used dist = cdist( - features[indicies].reshape(-1, fdim), - features[not_indicies].reshape(-1, fdim), + features[indices].reshape(-1, fdim), + features[not_indices].reshape(-1, fdim), ) # Choose the point furthest from the points already used - i_max = np.argmax(np.nanmin(dist, axis=0)) - indicies = np.append(indicies, [not_indicies[i_max]]) - return np.array(indicies, dtype=int) + i_max = argmax(nanmin(dist, axis=0)) + indices = append(indices, [not_indices[i_max]]) + return array(indices, dtype=int) class DatabaseRandom(DatabaseReduction): - def __init__( - self, - fingerprint=None, - reduce_dimensions=True, - use_derivatives=True, - use_fingerprint=True, - npoints=25, - initial_indicies=[0], - include_last=1, - **kwargs, - ): - """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from random. - - Parameters: - fingerprint : Fingerprint object - An object as a fingerprint class - that convert atoms to fingerprint. - reduce_dimensions: bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Whether to use derivatives/forces in the targets. - use_fingerprint : bool - Whether the kernel uses fingerprint objects (True) - or arrays (False). - npoints : int - Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included - in the used data base. - include_last : int - Number of last data point to include in the used data base. - """ - super().__init__( - fingerprint=fingerprint, - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=npoints, - initial_indicies=initial_indicies, - include_last=include_last, - **kwargs, - ) - - def make_reduction(self, all_indicies, **kwargs): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done by selecting the points randomly. + """ + + def make_reduction(self, all_indices, **kwargs): "Random select the training points." - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + indices = self.get_last_indices(indices, not_indices) + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Get the number of missing points - npoints = int(self.npoints - len(indicies)) - # Randomly get the indicies - indicies = np.append( - indicies, - np.random.permutation(not_indicies)[:npoints], + npoints = int(self.npoints - len(indices)) + # Randomly get the indices + indices = append( + indices, + self.rng.permutation(not_indices)[:npoints], ) - return np.array(indicies, dtype=int) + return array(indices, dtype=int) class DatabaseHybrid(DatabaseReduction): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done by selecting the points with the + largest distances from each other and randomly. + The random points are selected at every random_fraction step. + """ + def __init__( self, fingerprint=None, reduce_dimensions=True, use_derivatives=True, use_fingerprint=True, + round_targets=None, + seed=None, + dtype=float, npoints=25, - initial_indicies=[0], + initial_indices=[0], include_last=1, random_fraction=3, **kwargs, ): """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from a mix of - the distances and random. + Initialize the database. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - random_fraction : int + random_fraction: int How often the data point is sampled randomly. """ super().__init__( @@ -495,8 +458,11 @@ def __init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, npoints=npoints, - initial_indicies=initial_indicies, + initial_indices=initial_indices, include_last=include_last, random_fraction=random_fraction, **kwargs, @@ -508,8 +474,11 @@ def update_arguments( reduce_dimensions=None, use_derivatives=None, use_fingerprint=None, + round_targets=None, + seed=None, + dtype=None, npoints=None, - initial_indicies=None, + initial_indices=None, include_last=None, random_fraction=None, **kwargs, @@ -519,102 +488,92 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - random_fraction : int + random_fraction: int How often the data point is sampled randomly. Returns: self: The updated object itself. """ - # Control if the database has to be reset - reset_database = False - if fingerprint is not None: - self.fingerprint = fingerprint.copy() - reset_database = True - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - reset_database = True - if use_derivatives is not None: - self.use_derivatives = use_derivatives - reset_database = True - if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint - reset_database = True - if npoints is not None: - self.npoints = int(npoints) - if initial_indicies is not None: - self.initial_indicies = np.array(initial_indicies, dtype=int) - if include_last is not None: - self.include_last = int(abs(include_last)) + # Set the parameters in the parent class + super().update_arguments( + fingerprint=fingerprint, + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + npoints=npoints, + initial_indices=initial_indices, + include_last=include_last, + ) + # Set the random fraction if random_fraction is not None: self.random_fraction = int(abs(random_fraction)) if self.random_fraction == 0: self.random_fraction = 1 - # Check that too many last points are not included - n_extra = self.npoints - len(self.initial_indicies) - if self.include_last > n_extra: - self.include_last = n_extra if n_extra >= 0 else 0 - # Check that the database and the fingerprint have the same attributes - self.check_attributes() - # Reset the database if an argument has been changed - if reset_database: - self.reset_database() - # Store that the data base has changed - self.update_indicies = True return self - def make_reduction(self, all_indicies, **kwargs): + def make_reduction(self, all_indices, **kwargs): """ Use a combination of random sampling and farthest distance to reduce training set. """ - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) + indices = self.get_last_indices(indices, not_indices) # Get a random index if no fixed index exist - if len(indicies) == 0: - indicies = [np.random.choice(all_indicies)] + if len(indices) == 0: + indices = asarray([self.rng.choice(not_indices)], dtype=int) + not_indices = self.get_not_indices(indices, all_indices) # Get all the features features = self.get_all_feature_vectors() fdim = len(features[0]) - for i in range(len(indicies), self.npoints): - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + for i in range(len(indices), self.npoints): + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) if i % self.random_fraction == 0: # Get a random index - indicies = np.append( - indicies, - [np.random.choice(not_indicies)], - ) + indices = append(indices, [self.rng.choice(not_indices)]) else: # Calculate the distances to the points already used dist = cdist( - features[indicies].reshape(-1, fdim), - features[not_indicies].reshape(-1, fdim), + features[indices].reshape(-1, fdim), + features[not_indices].reshape(-1, fdim), ) # Choose the point furthest from the points already used - i_max = np.argmax(np.nanmin(dist, axis=0)) - indicies = np.append(indicies, [not_indicies[i_max]]) - return np.array(indicies, dtype=int) + i_max = argmax(nanmin(dist, axis=0)) + indices = append(indices, [not_indices[i_max]]) + return array(indices, dtype=int) def get_arguments(self): "Get the arguments of the class itself." @@ -624,61 +583,81 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, + round_targets=self.round_targets, + seed=self.seed, + dtype=self.dtype, npoints=self.npoints, - initial_indicies=self.initial_indicies, + initial_indices=self.initial_indices, include_last=self.include_last, random_fraction=self.random_fraction, ) # Get the constants made within the class - constant_kwargs = dict(update_indicies=self.update_indicies) + constant_kwargs = dict(update_indices=self.update_indices) # Get the objects made within the class object_kwargs = dict( atoms_list=self.atoms_list.copy(), features=self.features.copy(), targets=self.targets.copy(), - indicies=self.indicies.copy(), + indices=self.indices.copy(), ) return arg_kwargs, constant_kwargs, object_kwargs class DatabaseMin(DatabaseReduction): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done by selecting the points with the + smallest targets. + """ + def __init__( self, fingerprint=None, reduce_dimensions=True, use_derivatives=True, use_fingerprint=True, + round_targets=None, + seed=None, + dtype=float, npoints=25, - initial_indicies=[0], + initial_indices=[0], include_last=1, force_targets=False, **kwargs, ): """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from the smallest targets. + Initialize the database. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - force_targets : bool + force_targets: bool Whether to include the derivatives/forces in targets when the smallest targets are found. """ @@ -687,8 +666,11 @@ def __init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, npoints=npoints, - initial_indicies=initial_indicies, + initial_indices=initial_indices, include_last=include_last, force_targets=force_targets, **kwargs, @@ -700,8 +682,11 @@ def update_arguments( reduce_dimensions=None, use_derivatives=None, use_fingerprint=None, + round_targets=None, + seed=None, + dtype=None, npoints=None, - initial_indicies=None, + initial_indices=None, include_last=None, force_targets=None, **kwargs, @@ -711,90 +696,83 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - force_targets : bool + force_targets: bool Whether to include the derivatives/forces in targets when the smallest targets are found. Returns: self: The updated object itself. """ - # Control if the database has to be reset - reset_database = False - if fingerprint is not None: - self.fingerprint = fingerprint.copy() - reset_database = True - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - reset_database = True - if use_derivatives is not None: - self.use_derivatives = use_derivatives - reset_database = True - if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint - reset_database = True - if npoints is not None: - self.npoints = int(npoints) - if initial_indicies is not None: - self.initial_indicies = np.array(initial_indicies, dtype=int) - if include_last is not None: - self.include_last = int(abs(include_last)) + # Set the parameters in the parent class + super().update_arguments( + fingerprint=fingerprint, + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + npoints=npoints, + initial_indices=initial_indices, + include_last=include_last, + ) + # Set the force targets if force_targets is not None: self.force_targets = force_targets - # Check that too many last points are not included - n_extra = self.npoints - len(self.initial_indicies) - if self.include_last > n_extra: - self.include_last = n_extra if n_extra >= 0 else 0 - # Check that the database and the fingerprint have the same attributes - self.check_attributes() - # Reset the database if an argument has been changed - if reset_database: - self.reset_database() - # Store that the data base has changed - self.update_indicies = True return self - def make_reduction(self, all_indicies, **kwargs): + def make_reduction(self, all_indices, **kwargs): "Use the targets with smallest norms in the training set." - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) - # Get the indicies for the system not already included - not_indicies = np.array(self.get_not_indicies(indicies, all_indicies)) + indices = self.get_last_indices(indices, not_indices) + # Get the indices for the system not already included + not_indices = array(self.get_not_indices(indices, all_indices)) # Get the targets - targets = self.get_all_targets()[not_indicies] + targets = self.get_all_targets()[not_indices] # Get sorting of the targets if self.force_targets: # Get the points with the lowest norm of the targets - i_sort = np.argsort(np.linalg.norm(targets, axis=1)) + targets_norm = sqrt(einsum("ij,ij->i", targets, targets)) + i_sort = argsort(targets_norm) else: # Get the points with the lowest energies - i_sort = np.argsort(targets[:, 0]) + i_sort = argsort(targets[:, 0]) # Get the number of missing points - npoints = int(self.npoints - len(indicies)) - # Get the indicies for the system not already included + npoints = int(self.npoints - len(indices)) + # Get the indices for the system not already included i_sort = i_sort[:npoints] - indicies = np.append(indicies, not_indicies[i_sort]) - return np.array(indicies, dtype=int) + indices = append(indices, not_indices[i_sort]) + return array(indices, dtype=int) def get_arguments(self): "Get the arguments of the class itself." @@ -804,206 +782,142 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, + round_targets=self.round_targets, + seed=self.seed, + dtype=self.dtype, npoints=self.npoints, - initial_indicies=self.initial_indicies, + initial_indices=self.initial_indices, include_last=self.include_last, force_targets=self.force_targets, ) # Get the constants made within the class - constant_kwargs = dict(update_indicies=self.update_indicies) + constant_kwargs = dict(update_indices=self.update_indices) # Get the objects made within the class object_kwargs = dict( atoms_list=self.atoms_list.copy(), features=self.features.copy(), targets=self.targets.copy(), - indicies=self.indicies.copy(), + indices=self.indices.copy(), ) return arg_kwargs, constant_kwargs, object_kwargs class DatabaseLast(DatabaseReduction): - def __init__( - self, - fingerprint=None, - reduce_dimensions=True, - use_derivatives=True, - use_fingerprint=True, - npoints=25, - initial_indicies=[0], - include_last=1, - **kwargs, - ): - """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from the last data points. - - Parameters: - fingerprint : Fingerprint object - An object as a fingerprint class - that convert atoms to fingerprint. - reduce_dimensions: bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Whether to use derivatives/forces in the targets. - use_fingerprint : bool - Whether the kernel uses fingerprint objects (True) - or arrays (False). - npoints : int - Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included - in the used data base. - include_last : int - Number of last data point to include in the used data base. - """ - super().__init__( - fingerprint=fingerprint, - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=npoints, - initial_indicies=initial_indicies, - include_last=include_last, - **kwargs, - ) - - def make_reduction(self, all_indicies, **kwargs): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduction is done by selecting the last points in the database. + """ + + def make_reduction(self, all_indices, **kwargs): "Use the last data points." - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Get the number of missing points - npoints = int(self.npoints - len(indicies)) + npoints = int(self.npoints - len(indices)) # Get the last points in the database if npoints > 0: - indicies = np.append(indicies, not_indicies[-npoints:]) - return np.array(indicies, dtype=int) + indices = append(indices, not_indices[-npoints:]) + return array(indices, dtype=int) class DatabaseRestart(DatabaseReduction): - def __init__( - self, - fingerprint=None, - reduce_dimensions=True, - use_derivatives=True, - use_fingerprint=True, - npoints=25, - initial_indicies=[0], - include_last=1, - **kwargs, - ): - """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from restarts after npoints are used. - The initial indicies and the last data point is used at each restart. - - Parameters: - fingerprint : Fingerprint object - An object as a fingerprint class - that convert atoms to fingerprint. - reduce_dimensions: bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Whether to use derivatives/forces in the targets. - use_fingerprint : bool - Whether the kernel uses fingerprint objects (True) - or arrays (False). - npoints : int - Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included - in the used data base. - include_last : int - Number of last data point to include in the used data base. - """ - super().__init__( - fingerprint=fingerprint, - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=npoints, - initial_indicies=initial_indicies, - include_last=include_last, - **kwargs, - ) - - def make_reduction(self, all_indicies, **kwargs): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduced data set is selected from restarts after npoints are used. + The initial indices and the last data point is used at each restart. + """ + + def make_reduction(self, all_indices, **kwargs): "Make restart of used data set." - # Get the fixed indicies - indicies = self.get_initial_indicies() + # Get the fixed indices + indices = self.get_initial_indices() # Get the data set size - data_len = len(all_indicies) + data_len = len(all_indices) # Check how many last points are used lasts = self.include_last if lasts == 0: lasts = 1 # Get the minimum number of points in the database - n_initial = len(indicies) + n_initial = len(indices) if lasts > 1: n_initial += lasts - 1 # Get the number of data point after the first restart n_use = data_len - self.npoints - 1 - # Get the number of points that are not initial or last indicies + # Get the number of points that are not initial or last indices nfree = self.npoints - n_initial # Get the excess of data points after each restart n_extra = int(n_use % nfree) - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) - # Include the indicies - indicies = np.append(indicies, not_indicies[-(n_extra + lasts) :]) - return np.array(indicies, dtype=int) + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) + # Include the indices + lasts_i = -(n_extra + lasts) + indices = append(indices, not_indices[lasts_i:]) + return array(indices, dtype=int) class DatabasePointsInterest(DatabaseLast): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduced data set is selected from the distances + to the points of interest. + The distance metric is the shortest distance + to any of the points of interest. + """ + def __init__( self, fingerprint=None, reduce_dimensions=True, use_derivatives=True, use_fingerprint=True, + round_targets=None, + seed=None, + dtype=float, npoints=25, - initial_indicies=[0], + initial_indices=[0], include_last=1, feature_distance=True, point_interest=[], **kwargs, ): """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from the distances - to the points of interest. - The distance metric is the shortest distance - to any of the points of interest. + Initialize the database. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - feature_distance : bool + feature_distance: bool Whether to calculate the distance in feature space (True) or Cartesian coordinate space (False). - point_interest : list + point_interest: list A list of the points of interest as ASE Atoms instances. """ super().__init__( @@ -1011,8 +925,11 @@ def __init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, npoints=npoints, - initial_indicies=initial_indicies, + initial_indices=initial_indices, include_last=include_last, feature_distance=feature_distance, point_interest=point_interest, @@ -1028,24 +945,26 @@ def get_feature_interest(self, **kwargs): the points of interest. """ if self.use_fingerprint: - return np.array( - [feature.get_vector() for feature in self.fp_interest] + return array( + [feature.get_vector() for feature in self.fp_interest], + dtype=self.dtype, ) - return np.array(self.fp_interest) + return array(self.fp_interest, dtype=self.dtype) def get_positions(self, atoms_list, **kwargs): """ Get the Cartesian coordinates of the atoms. Parameters: - atoms_list : list or ASE Atoms + atoms_list: list or ASE Atoms A list of ASE Atoms. Returns: list: A list of the positions of the atoms for each system. """ - return np.array( - [atoms.get_positions().reshape(-1) for atoms in atoms_list] + return array( + [atoms.get_positions().reshape(-1) for atoms in atoms_list], + dtype=self.dtype, ) def get_positions_interest(self, **kwargs): @@ -1066,15 +985,15 @@ def get_all_positions(self, **kwargs): list: A list of the positions of all the atoms in the database for each system. """ - return self.get_positions(self.get_all_atoms()) + return self.get_positions(self.get_all_data_atoms()) - def get_distances(self, not_indicies, **kwargs): + def get_distances(self, not_indices, **kwargs): """ Calculate the distances to the points of interest. Parameters: - not_indicies : list - A list of indicies that not used yet. + not_indices: list + A list of indices that not used yet. Returns: array: The distances to the points of interest. @@ -1092,7 +1011,7 @@ def get_distances(self, not_indicies, **kwargs): fdim = len(features[0]) # Calculate the minimum distances to the points of interest dist = cdist( - features_interest, features[not_indicies].reshape(-1, fdim) + features_interest, features[not_indices].reshape(-1, fdim) ) return dist @@ -1102,8 +1021,11 @@ def update_arguments( reduce_dimensions=None, use_derivatives=None, use_fingerprint=None, + round_targets=None, + seed=None, + dtype=None, npoints=None, - initial_indicies=None, + initial_indices=None, include_last=None, feature_distance=None, point_interest=None, @@ -1114,98 +1036,94 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - fingerprint : Fingerprint object + fingerprint: Fingerprint object An object as a fingerprint class that convert atoms to fingerprint. reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Whether to use derivatives/forces in the targets. - use_fingerprint : bool + use_fingerprint: bool Whether the kernel uses fingerprint objects (True) or arrays (False). - npoints : int + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + npoints: int Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included + initial_indices: list + The indices of the data points that must be included in the used data base. - include_last : int + include_last: int Number of last data point to include in the used data base. - feature_distance : bool + feature_distance: bool Whether to calculate the distance in feature space (True) or Cartesian coordinate space (False). - point_interest : list + point_interest: list A list of the points of interest as ASE Atoms instances. Returns: self: The updated object itself. """ - # Control if the database has to be reset - reset_database = False - if fingerprint is not None: - self.fingerprint = fingerprint.copy() - reset_database = True - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - reset_database = True - if use_derivatives is not None: - self.use_derivatives = use_derivatives - reset_database = True - if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint - reset_database = True - if npoints is not None: - self.npoints = int(npoints) - if initial_indicies is not None: - self.initial_indicies = np.array(initial_indicies, dtype=int) - if include_last is not None: - self.include_last = int(abs(include_last)) + # Set the parameters in the parent class + super().update_arguments( + fingerprint=fingerprint, + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + npoints=npoints, + initial_indices=initial_indices, + include_last=include_last, + ) + # Set the feature distance if feature_distance is not None: self.feature_distance = feature_distance + # Set the points of interest if point_interest is not None: + # Ensure point_interest is a list of ASE Atoms instances + if not isinstance(point_interest, list): + point_interest = [point_interest] self.point_interest = [atoms.copy() for atoms in point_interest] self.fp_interest = [ self.make_atoms_feature(atoms) for atoms in self.point_interest ] - # Check that too many last points are not included - n_extra = self.npoints - len(self.initial_indicies) - if self.include_last > n_extra: - self.include_last = n_extra if n_extra >= 0 else 0 - # Check that the database and the fingerprint have the same attributes - self.check_attributes() - # Reset the database if an argument has been changed - if reset_database: - self.reset_database() - # Store that the data base has changed - self.update_indicies = True return self - def make_reduction(self, all_indicies, **kwargs): + def make_reduction(self, all_indices, **kwargs): """ Reduce the training set with the points closest to the points of interests. """ # Check if there are points of interest else use the Parent class if len(self.point_interest) == 0: - return super().make_reduction(all_indicies, **kwargs) - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + return super().make_reduction(all_indices, **kwargs) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) - # Get the indicies for the system not already included - not_indicies = np.array(self.get_not_indicies(indicies, all_indicies)) + indices = self.get_last_indices(indices, not_indices) + # Get the indices for the system not already included + not_indices = array(self.get_not_indices(indices, all_indices)) # Get the number of missing points - npoints = int(self.npoints - len(indicies)) + npoints = int(self.npoints - len(indices)) # Calculate the distances to the points of interest - dist = self.get_distances(not_indicies) + dist = self.get_distances(not_indices) # Get the minimum distances to the points of interest - dist = np.min(dist, axis=0) - i_min = np.argsort(dist)[:npoints] - # Get the indicies - indicies = np.append(indicies, [not_indicies[i_min]]) - return np.array(indicies, dtype=int) + dist = dist.min(axis=0) + i_min = argsort(dist)[:npoints] + # Get the indices + indices = append(indices, [not_indices[i_min]]) + return array(indices, dtype=int) def get_arguments(self): "Get the arguments of the class itself." @@ -1215,116 +1133,70 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, + round_targets=self.round_targets, + seed=self.seed, + dtype=self.dtype, npoints=self.npoints, - initial_indicies=self.initial_indicies, + initial_indices=self.initial_indices, include_last=self.include_last, feature_distance=self.feature_distance, point_interest=self.point_interest, ) # Get the constants made within the class - constant_kwargs = dict(update_indicies=self.update_indicies) + constant_kwargs = dict(update_indices=self.update_indices) # Get the objects made within the class object_kwargs = dict( atoms_list=self.atoms_list.copy(), features=self.features.copy(), targets=self.targets.copy(), - indicies=self.indicies.copy(), + indices=self.indices.copy(), ) return arg_kwargs, constant_kwargs, object_kwargs class DatabasePointsInterestEach(DatabasePointsInterest): - def __init__( - self, - fingerprint=None, - reduce_dimensions=True, - use_derivatives=True, - use_fingerprint=True, - npoints=25, - initial_indicies=[0], - include_last=1, - feature_distance=True, - point_interest=[], - **kwargs, - ): - """ - Database of ASE atoms objects that are converted - into fingerprints and targets. - The used Database is a reduced set of the full Database. - The reduced data set is selected from the distances - to each point of interest. - The distance metric is the shortest distance to the point of interest - and it is performed iteratively. - - Parameters: - fingerprint : Fingerprint object - An object as a fingerprint class - that convert atoms to fingerprint. - reduce_dimensions: bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Whether to use derivatives/forces in the targets. - use_fingerprint : bool - Whether the kernel uses fingerprint objects (True) - or arrays (False). - npoints : int - Number of points that are used from the database. - initial_indicies : list - The indicies of the data points that must be included - in the used data base. - include_last : int - Number of last data point to include in the used data base. - feature_distance : bool - Whether to calculate the distance in feature space (True) - or Cartesian coordinate space (False). - point_interest : list - A list of the points of interest as ASE Atoms instances. - """ - super().__init__( - fingerprint=fingerprint, - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=npoints, - initial_indicies=initial_indicies, - include_last=include_last, - feature_distance=feature_distance, - point_interest=point_interest, - **kwargs, - ) - - def make_reduction(self, all_indicies, **kwargs): + """ + Database of ASE Atoms instances that are converted + into stored fingerprints and targets. + The used Database is a reduced set of the full Database. + The reduced data set is selected from the distances + to each point of interest. + The distance metric is the shortest distance to the point of interest + and it is performed iteratively. + """ + + def make_reduction(self, all_indices, **kwargs): """ Reduce the training set with the points closest to the points of interests. """ # Check if there are points of interest else use the Parent class if len(self.point_interest) == 0: - return super().make_reduction(all_indicies, **kwargs) - # Get the fixed indicies - indicies = self.get_initial_indicies() - # Get the indicies for the system not already included - not_indicies = self.get_not_indicies(indicies, all_indicies) + return super().make_reduction(all_indices, **kwargs) + # Get the fixed indices + indices = self.get_initial_indices() + # Get the indices for the system not already included + not_indices = self.get_not_indices(indices, all_indices) # Include the last point - indicies = self.get_last_indicies(indicies, not_indicies) - # Get the indicies for the system not already included - not_indicies = np.array(self.get_not_indicies(indicies, all_indicies)) + indices = self.get_last_indices(indices, not_indices) + # Get the indices for the system not already included + not_indices = array(self.get_not_indices(indices, all_indices)) # Calculate the distances to the points of interest - dist = self.get_distances(not_indicies) + dist = self.get_distances(not_indices) # Get the number of points of interest n_points_interest = len(dist) # Iterate over the points of interests p = 0 - while len(indicies) < self.npoints: + while len(indices) < self.npoints: # Get the point with the minimum distance - i_min = np.argmin(dist[p]) + i_min = argmin(dist[p]) # Get and append the index - indicies = np.append(indicies, [not_indicies[i_min]]) + indices = append(indices, [not_indices[i_min]]) # Remove the index - not_indicies = np.delete(not_indicies, i_min) - dist = np.delete(dist, i_min, axis=1) + not_indices = delete(not_indices, i_min) + dist = delete(dist, i_min, axis=1) # Use the next point p += 1 if p >= n_points_interest: p = 0 - return np.array(indicies, dtype=int) + return array(indices, dtype=int) diff --git a/catlearn/regression/gp/calculator/default_model.py b/catlearn/regression/gp/calculator/default_model.py new file mode 100644 index 00000000..fc778952 --- /dev/null +++ b/catlearn/regression/gp/calculator/default_model.py @@ -0,0 +1,638 @@ +import warnings + + +def get_default_model( + model="tp", + prior="median", + use_derivatives=True, + use_fingerprint=False, + global_optimization=True, + parallel=False, + n_reduced=None, + round_hp=3, + seed=None, + dtype=float, + model_kwargs={}, + prior_kwargs={}, + kernel_kwargs={}, + hpfitter_kwargs={}, + optimizer_kwargs={}, + lineoptimizer_kwargs={}, + function_kwargs={}, + **kwargs, +): + """ + Get the default ML model from the simple given arguments. + + Parameters: + model: str + Either the tp that gives the Studen T process or + gp that gives the Gaussian process. + prior: str + Specify what prior mean should be used. + use_derivatives: bool + Whether to use derivatives of the targets. + use_fingerprint: bool + Whether to use fingerprints for the features. + This has to be the same as for the database! + global_optimization: bool + Whether to perform a global optimization of the hyperparameters. + A local optimization is used if global_optimization=False, + which can not be parallelized. + parallel: bool + Whether to optimize the hyperparameters in parallel. + n_reduced: int or None + If n_reduced is an integer, the hyperparameters are only optimized + when the data set size is equal to or below the integer. + If n_reduced is None, the hyperparameter is always optimized. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + model_kwargs: dict (optional) + The keyword arguments for the model. + The additional arguments are passed to the model. + prior_kwargs: dict (optional) + The keyword arguments for the prior mean. + kernel_kwargs: dict (optional) + The keyword arguments for the kernel. + hpfitter_kwargs: dict (optional) + The keyword arguments for the hyperparameter fitter. + optimizer_kwargs: dict (optional) + The keyword arguments for the optimizer. + lineoptimizer_kwargs: dict (optional) + The keyword arguments for the line optimizer. + function_kwargs: dict (optional) + The keyword arguments for the objective function. + + Returns: + model: Model + The Machine Learning Model with kernel and + prior that are optimized. + """ + # Check that the model is given as a string + if not isinstance(model, str): + return model + # Make the prior mean from given string + if isinstance(prior, str): + from ..means import Prior_median, Prior_mean, Prior_min, Prior_max + + if prior.lower() == "median": + prior = Prior_median(**prior_kwargs) + elif prior.lower() == "mean": + prior = Prior_mean(**prior_kwargs) + elif prior.lower() == "min": + prior = Prior_min(**prior_kwargs) + elif prior.lower() == "max": + prior = Prior_max(**prior_kwargs) + # Construct the kernel class object + from ..kernel.se import SE + + kernel = SE( + use_fingerprint=use_fingerprint, + use_derivatives=use_derivatives, + dtype=dtype, + **kernel_kwargs, + ) + # Set the hyperparameter optimization method + if global_optimization: + # Set global optimization with or without parallelization + from ..optimizers.globaloptimizer import FactorizedOptimizer + + # Set the line searcher for the hyperparameter optimization + if parallel: + from ..optimizers.linesearcher import FineGridSearch + + lineoptimizer_kwargs_default = dict( + optimize=True, + multiple_min=False, + ngrid=80, + loops=3, + ) + lineoptimizer_kwargs_default.update(lineoptimizer_kwargs) + line_optimizer = FineGridSearch( + parallel=True, + dtype=dtype, + **lineoptimizer_kwargs_default, + ) + else: + from ..optimizers.linesearcher import GoldenSearch + + lineoptimizer_kwargs_default = dict( + optimize=True, + multiple_min=False, + ) + lineoptimizer_kwargs_default.update(lineoptimizer_kwargs) + line_optimizer = GoldenSearch( + parallel=False, + dtype=dtype, + **lineoptimizer_kwargs_default, + ) + # Set the optimizer for the hyperparameter optimization + optimizer_kwargs_default = dict( + ngrid=80, + calculate_init=False, + ) + optimizer_kwargs_default.update(optimizer_kwargs) + optimizer = FactorizedOptimizer( + line_optimizer=line_optimizer, + parallel=parallel, + dtype=dtype, + **optimizer_kwargs_default, + ) + else: + from ..optimizers.localoptimizer import ScipyOptimizer + + optimizer_kwargs_default = dict( + maxiter=500, + jac=True, + method="l-bfgs-b", + use_bounds=False, + tol=1e-12, + ) + optimizer_kwargs_default.update(optimizer_kwargs) + # Make the local optimizer + optimizer = ScipyOptimizer( + dtype=dtype, + **optimizer_kwargs_default, + ) + if parallel: + warnings.warn( + "Parallel optimization is not implemented" + "with local optimization!" + ) + # Use either the Student t process or the Gaussian process + model_kwargs.update(kwargs) + if model.lower() == "tp": + # Set model + from ..models.tp import TProcess + + model_kwargs_default = dict( + a=1e-4, + b=4.0, + ) + model_kwargs_default.update(model_kwargs) + model = TProcess( + prior=prior, + kernel=kernel, + use_derivatives=use_derivatives, + dtype=dtype, + **model_kwargs_default, + ) + # Set objective function + if global_optimization: + from ..objectivefunctions.tp.factorized_likelihood import ( + FactorizedLogLikelihood, + ) + + func = FactorizedLogLikelihood(dtype=dtype, **function_kwargs) + else: + from ..objectivefunctions.tp.likelihood import LogLikelihood + + func = LogLikelihood(dtype=dtype, **function_kwargs) + else: + # Set model + from ..models.gp import GaussianProcess + + model = GaussianProcess( + prior=prior, + kernel=kernel, + use_derivatives=use_derivatives, + dtype=dtype, + **model_kwargs, + ) + # Set objective function + if global_optimization: + from ..objectivefunctions.gp.factorized_likelihood import ( + FactorizedLogLikelihood, + ) + + func = FactorizedLogLikelihood(dtype=dtype, **function_kwargs) + else: + from ..objectivefunctions.gp.likelihood import LogLikelihood + + func = LogLikelihood(dtype=dtype, **function_kwargs) + # Set hpfitter and whether a maximum data set size is applied + if n_reduced is None: + from ..hpfitter import HyperparameterFitter + + hpfitter = HyperparameterFitter( + func=func, + optimizer=optimizer, + round_hp=round_hp, + dtype=dtype, + **hpfitter_kwargs, + ) + else: + from ..hpfitter.redhpfitter import ReducedHyperparameterFitter + + hpfitter = ReducedHyperparameterFitter( + func=func, + optimizer=optimizer, + opt_tr_size=n_reduced, + round_hp=round_hp, + dtype=dtype, + **hpfitter_kwargs, + ) + model.update_arguments(hpfitter=hpfitter) + # Set the seed for the model + if seed is not None: + model.set_seed(seed=seed) + # Return the model + return model + + +def get_default_database( + fp=None, + use_derivatives=True, + database_reduction=False, + round_targets=5, + seed=None, + dtype=float, + **database_kwargs, +): + """ + Get the default Database from the simple given arguments. + + Parameters: + fp: Fingerprint class object or None + The fingerprint object used to generate the fingerprints. + Cartesian coordinates are used if it is None. + use_derivatives: bool + Whether to use derivatives of the targets. + database_reduction: bool or str + Whether to used a reduced database after a number + of training points. + If a string is given, the database reduction method is created + from the string. + If False, no database reduction is used. + If True, the default database reduction method is used. + The default database reduction method is DatabasePointsInterest. + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + database_kwargs: dict (optional) + A dictionary with additional arguments for the database. + Also used for the reduced databases. + + Returns: + database: Database object + The Database object with ASE atoms. + """ + # Set a fingerprint + if fp is None: + from ..fingerprint.cartesian import Cartesian + + # Use cartesian coordinates as the fingerprint + fp = Cartesian(reduce_dimensions=True, use_derivatives=use_derivatives) + use_fingerprint = False + else: + use_fingerprint = True + # Make the data base ready + if isinstance(database_reduction, str) or database_reduction is True: + # Set the default database arguments + data_kwargs = dict( + fingerprint=fp, + reduce_dimensions=True, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + npoints=50, + initial_indices=[0, 1], + include_last=1, + ) + data_kwargs.update(database_kwargs) + if ( + database_reduction is True + or database_reduction.lower() == "interest" + ): + from .database_reduction import DatabasePointsInterest + + database = DatabasePointsInterest(**data_kwargs) + if database_reduction.lower() == "distance": + from .database_reduction import DatabaseDistance + + database = DatabaseDistance(**data_kwargs) + elif database_reduction.lower() == "random": + from .database_reduction import DatabaseRandom + + database = DatabaseRandom(**data_kwargs) + elif database_reduction.lower() == "hybrid": + from .database_reduction import DatabaseHybrid + + database = DatabaseHybrid(**data_kwargs) + elif database_reduction.lower() == "min": + from .database_reduction import DatabaseMin + + database = DatabaseMin(**data_kwargs) + elif database_reduction.lower() == "last": + from .database_reduction import DatabaseLast + + database = DatabaseLast(**data_kwargs) + elif database_reduction.lower() == "restart": + from .database_reduction import DatabaseRestart + + database = DatabaseRestart(**data_kwargs) + elif database_reduction.lower() == "each_interest": + from .database_reduction import DatabasePointsInterestEach + + database = DatabasePointsInterestEach(**data_kwargs) + else: + from .database import Database + + data_kwargs = dict( + reduce_dimensions=True, + ) + data_kwargs.update(database_kwargs) + database = Database( + fingerprint=fp, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=round_targets, + seed=seed, + dtype=dtype, + **data_kwargs, + ) + return database + + +def get_default_ensemble( + model, + clustering="k_number", + clustering_kwargs={}, + seed=None, + dtype=float, + **ensemble_kwargs, +): + """ + Get the default ensemble model with clustering and ensemble. + + Parameters: + model: Model + The Machine Learning Model with kernel and prior. + clustering: str or Clustering class instance + The clustering method used to split the data to different models. + If a string is given, the clustering method is created from the + string. + clustering_kwargs: dict (optional) + A dictionary with the arguments for the clustering method. + If clustering is a string, the arguments are used to create the + clustering method. + seed: int (optional) + The random seed for the clustering. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + ensemble_kwargs: dict (optional) + Additional keyword arguments for the EnsembleClustering class. + + Returns: + ensemble_model: EnsembleClustering + The EnsembleClustering with clustering and ensemble. + """ + from ..ensemble.ensemble_clustering import EnsembleClustering + + # Check if clustering is a string and make the clustering method + if isinstance(clustering, str): + # Load the clustering methods + from ..ensemble.clustering import ( + K_means_number, + K_means, + K_means_auto, + K_means_enumeration, + RandomClustering, + RandomClustering_number, + FixedClustering, + ) + + # Set the default clustering arguments + clustering_kwargs_default = dict( + seed=seed, + dtype=dtype, + ) + # Set the data number for the specific clustering method + if clustering.lower() in [ + "k_number", + "k_enumeration", + "random_number", + ]: + clustering_kwargs_default.update(dict(data_number=25)) + clustering_kwargs_default.update(clustering_kwargs) + if clustering.lower() == "k_number": + clustering = K_means_number(**clustering_kwargs_default) + elif clustering.lower() == "k_means": + clustering = K_means(**clustering_kwargs_default) + elif clustering.lower() == "k_auto": + clustering = K_means_auto(**clustering_kwargs_default) + elif clustering.lower() == "k_enumeration": + clustering = K_means_enumeration(**clustering_kwargs_default) + elif clustering.lower() == "random": + clustering = RandomClustering(**clustering_kwargs_default) + elif clustering.lower() == "random_number": + clustering = RandomClustering_number(**clustering_kwargs_default) + elif clustering.lower() == "fixed": + clustering = FixedClustering(**clustering_kwargs_default) + else: + raise ValueError(f"Clustering {clustering} is not implemented!") + # Create the ensemble model + return EnsembleClustering( + model=model, + clustering=clustering, + dtype=dtype, + **ensemble_kwargs, + ) + + +def get_default_mlmodel( + model="tp", + fp=None, + baseline=None, + prior="median", + optimize_hp=True, + use_pdis=True, + pdis=None, + use_derivatives=True, + global_optimization=True, + parallel=False, + n_reduced=None, + round_hp=3, + all_model_kwargs={}, + database_reduction=False, + round_targets=5, + database_kwargs={}, + use_ensemble=False, + clustering="k_number", + cluster_kwargs=dict(), + ensemble_kwargs={}, + verbose=False, + seed=None, + dtype=float, + **mlmodel_kwargs, +): + """ + Get the default ML model with a database for the ASE Atoms + from the simple given arguments. + + Parameters: + model: str or Model class instance + Either the tp that gives the Students T process or + gp that gives the Gaussian process. + fp: Fingerprint class instance or None + The fingerprint instance used to generate the fingerprints. + Cartesian coordinates are used if it is None. + baseline: Baseline class instance + The Baseline instance used to calculate energy and forces. + prior: str + Specify what prior mean should be used. + optimize_hp: bool + Whether to optimize the hyperparameters when the model is trained. + use_pdis: bool + Whether to make prior distributions for the hyperparameters. + pdis: dict (optional) + A dict of prior distributions for each hyperparameter type. + If None, the default prior distributions are used. + No prior distributions are used if use_pdis=False or pdis is {}. + use_derivatives: bool + Whether to use derivatives of the targets. + global_optimization: bool + Whether to perform a global optimization of the hyperparameters. + A local optimization is used if global_optimization=False, + which can not be parallelized. + parallel: bool + Whether to optimize the hyperparameters in parallel. + n_reduced: int or None + If n_reduced is an integer, the hyperparameters are only optimized + when the data set size is equal to or below the integer. + If n_reduced is None, the hyperparameter is always optimized. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + all_model_kwargs: dict (optional) + A dictionary with additional arguments for the model. + It also can include model_kwargs, prior_kwargs, + kernel_kwargs, hpfitter_kwargs, optimizer_kwargs, + lineoptimizer_kwargs, and function_kwargs. + database_reduction: bool or str + Whether to used a reduced database after a number + of training points. + If a string is given, the database reduction method is created + from the string. + If False, no database reduction is used. + If True, the default database reduction method is used. + The default database reduction method is DatabasePointsInterest. + round_targets: int (optional) + The number of decimals to round the targets to. + If None, the targets are not rounded. + database_kwargs: dict + A dictionary with the arguments for the database + if it is used. + use_ensemble: bool + Whether to use an ensemble model with clustering. + The use of ensemble models can avoid memory issues and speed up + the training. + clustering: str or Clustering class instance + The clustering method used to split the data to different models. + If a string is given, the clustering method is created from the + string. + cluster_kwargs: dict (optional) + A dictionary with the arguments for the clustering method. + If clustering is a string, the arguments are used to create the + clustering method. + ensemble_kwargs: dict (optional) + Additional keyword arguments for the EnsembleClustering class. + verbose: bool + Whether to print statements in the optimization. + seed: int (optional) + The random seed for the optimization. + The seed an also be a RandomState or Generator instance. + If not given, the default random number generator is used. + dtype: type + The data type of the arrays. + mlmodel_kwargs: dict (optional) + Additional keyword arguments for the MLModel class. + + Returns: + mlmodel: MLModel class instance + Machine Learning model used for ASE Atoms and calculator. + """ + from .mlmodel import MLModel + + # Check if fingerprints are used + if fp is None: + use_fingerprint = False + else: + use_fingerprint = True + # Make the model + if isinstance(model, str): + model = get_default_model( + model=model, + prior=prior, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + global_optimization=global_optimization, + parallel=parallel, + n_reduced=n_reduced, + round_hp=round_hp, + seed=seed, + dtype=dtype, + **all_model_kwargs, + ) + # Make the model as an ensemble model if specified + if use_ensemble: + if database_reduction: + warnings.warn( + "Database reduction is not allowed with ensemble models!" + ) + model = get_default_ensemble( + model=model, + clustering=clustering, + clustering_kwargs=cluster_kwargs, + seed=seed, + dtype=dtype, + **ensemble_kwargs, + ) + # Make the database + database = get_default_database( + fp=fp, + use_derivatives=use_derivatives, + database_reduction=database_reduction, + round_targets=round_targets, + seed=seed, + dtype=dtype, + **database_kwargs, + ) + # Make prior distributions for the hyperparameters if specified + if use_pdis and pdis is None: + from ..pdistributions.normal import Normal_prior + + pdis = dict( + length=Normal_prior(mu=[-0.8], std=[0.2], dtype=dtype), + noise=Normal_prior(mu=[-9.0], std=[1.0], dtype=dtype), + ) + elif not use_pdis: + pdis = None + # Make the ML model with database + return MLModel( + model=model, + database=database, + baseline=baseline, + optimize=optimize_hp, + pdis=pdis, + verbose=verbose, + dtype=dtype, + **mlmodel_kwargs, + ) diff --git a/catlearn/regression/gp/calculator/hiermodel.py b/catlearn/regression/gp/calculator/hiermodel.py index 3169f177..4675e344 100644 --- a/catlearn/regression/gp/calculator/hiermodel.py +++ b/catlearn/regression/gp/calculator/hiermodel.py @@ -1,9 +1,18 @@ -import numpy as np +from numpy import ndarray from .mlmodel import MLModel from .mlcalc import MLCalculator class HierarchicalMLModel(MLModel): + """ + Machine Learning model used for the ASE Atoms instances and + in the machine learning calculators. + It is a hierarchy of ML models where the first model is used + for the first npoints data points. A new model is made when the + number of data points exceed the number of points. + The old models are used as a baseline. + """ + def __init__( self, model=None, @@ -12,42 +21,50 @@ def __init__( optimize=True, hp=None, pdis=None, + include_noise=False, + to_save_mlmodel=False, + save_mlmodel_kwargs={}, verbose=False, npoints=25, - initial_indicies=[0], + initial_indices=[0], + dtype=float, **kwargs, ): """ - A hierarchy of Machine Learning model used for - ASE Atoms and calculator. - A new model is made when the number of data points - exceed the number of points. - The old models are used as a baseline. + Initialize the ML model for Atoms. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - database : Database object + database: Database object The Database object with ASE atoms. - baseline : Baseline object + baseline: Baseline object The Baseline object calculator that calculates energy and forces. - optimize : bool + optimize: bool Whether to optimize the hyperparameters when the model is trained. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. - verbose : bool + include_noise: bool + Whether to include noise in the uncertainty from the model. + to_save_mlmodel: bool + Whether to save the ML model to a file after training. + save_mlmodel_kwargs: dict + Arguments for saving the ML model, like the filename. + verbose: bool Whether to print statements in the optimization. - npoints : int + npoints: int Number of points that are used from the database in the models. - initial_indicies : list - The indicies of the data points that must be included in + initial_indices: list + The indices of the data points that must be included in the used data base for every model. + dtype: type + The data type of the arrays. """ super().__init__( model=model, @@ -56,9 +73,13 @@ def __init__( optimize=optimize, hp=hp, pdis=pdis, + include_noise=include_noise, + to_save_mlmodel=to_save_mlmodel, + save_mlmodel_kwargs=save_mlmodel_kwargs, verbose=verbose, npoints=npoints, - initial_indicies=initial_indicies, + initial_indices=initial_indices, + dtype=dtype, **kwargs, ) @@ -67,7 +88,7 @@ def add_training(self, atoms_list, **kwargs): Add training data in form of the ASE Atoms to the database. Parameters: - atoms_list : list or ASE Atoms + atoms_list: list or ASE Atoms A list of or a single ASE Atoms with calculated energies and forces. @@ -75,7 +96,7 @@ def add_training(self, atoms_list, **kwargs): self: The updated object itself. """ data_len = self.get_training_set_size() - if not isinstance(atoms_list, (list, np.ndarray)): + if not isinstance(atoms_list, (list, ndarray)): atoms_list = [atoms_list] # Store the data if data_len + len(atoms_list) <= self.npoints: @@ -90,12 +111,12 @@ def add_training(self, atoms_list, **kwargs): ) # Make a new ml model with the mandatory points data_atoms = self.get_data_atoms() - data_atoms = [data_atoms[i] for i in self.initial_indicies] + data_atoms = [data_atoms[i] for i in self.initial_indices] self.reset_database() super().add_training(data_atoms) super().add_training(atoms_list) else: - raise Exception( + raise AttributeError( "New baseline model can not be made without training. " "Include one point at the time!" ) @@ -109,9 +130,13 @@ def update_arguments( optimize=None, hp=None, pdis=None, + include_noise=None, + to_save_mlmodel=None, + save_mlmodel_kwargs=None, verbose=None, npoints=None, - initial_indicies=None, + initial_indices=None, + dtype=None, **kwargs, ): """ @@ -119,61 +144,61 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - database : Database object + database: Database object The Database object with ASE atoms. - baseline : Baseline object + baseline: Baseline object The Baseline object calculator that calculates energy and forces. - optimize : bool + optimize: bool Whether to optimize the hyperparameters when the model is trained. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. - verbose : bool + to_save_mlmodel: bool + Whether to save the ML model to a file after training. + save_mlmodel_kwargs: dict + Arguments for saving the ML model, like the filename. + include_noise: bool + Whether to include noise in the uncertainty from the model. + verbose: bool Whether to print statements in the optimization. - npoints : int + npoints: int Number of points that are used from the database in the models. - initial_indicies : list - The indicies of the data points that must be included in + initial_indices: list + The indices of the data points that must be included in the used data base for every model. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ - if model is not None: - self.model = model.copy() - if database is not None: - self.database = database.copy() - if baseline is not None: - self.baseline = baseline.copy() - if optimize is not None: - self.optimize = optimize - if hp is not None: - self.hp = hp.copy() - if pdis is not None: - self.pdis = pdis.copy() - if verbose is not None: - self.verbose = verbose + # Set the parameters in the parent class + super().update_arguments( + model=model, + database=database, + baseline=baseline, + optimize=optimize, + hp=hp, + pdis=pdis, + include_noise=include_noise, + to_save_mlmodel=to_save_mlmodel, + save_mlmodel_kwargs=save_mlmodel_kwargs, + verbose=verbose, + dtype=dtype, + ) + # Set the number of points if npoints is not None: self.npoints = int(npoints) - if initial_indicies is not None: - self.initial_indicies = initial_indicies.copy() - # Check if the baseline is used - if self.baseline is None: - self.use_baseline = False - else: - self.use_baseline = True - # Make a list of the baseline targets - if baseline is not None or database is not None: - self.baseline_targets = [] - # Check that the model and database have the same attributes - self.check_attributes() + # Set the initial indices + if initial_indices is not None: + self.initial_indices = initial_indices.copy() return self def get_arguments(self): @@ -186,12 +211,16 @@ def get_arguments(self): optimize=self.optimize, hp=self.hp, pdis=self.pdis, + include_noise=self.include_noise, + to_save_mlmodel=self.to_save_mlmodel, + save_mlmodel_kwargs=self.save_mlmodel_kwargs, verbose=self.verbose, npoints=self.npoints, - initial_indicies=self.initial_indicies, + initial_indices=self.initial_indices, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class - object_kwargs = dict(baseline_targets=self.baseline_targets.copy()) + object_kwargs = dict() return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/calculator/mlcalc.py b/catlearn/regression/gp/calculator/mlcalc.py index 74e23269..9d698f76 100644 --- a/catlearn/regression/gp/calculator/mlcalc.py +++ b/catlearn/regression/gp/calculator/mlcalc.py @@ -1,7 +1,14 @@ +from numpy import round as round_ from ase.calculators.calculator import Calculator, all_changes +import pickle +from .mlmodel import MLModel class MLCalculator(Calculator): + """ + The machine learning calculator object applicable as an ASE calculator for + ASE Atoms instance. + """ # Define the properties available in this calculator implemented_properties = [ @@ -21,13 +28,14 @@ def __init__( calc_force_unc=False, calc_unc_deriv=False, calc_kwargs={}, + round_pred=None, **kwargs, ): """ - ML calculator object applicable as an ASE calculator. + Initialize the ML calculator. Parameters: - mlmodel : MLModel class object + mlmodel: MLModel class object Machine Learning model used for ASE Atoms and calculator. The object must have the functions: calculate, train_model, and add_training. @@ -42,16 +50,17 @@ def __init__( calc_unc_deriv: bool Whether to calculate the derivatives of the uncertainty of the energy. - calc_kwargs : dict + calc_kwargs: dict A dictionary with kwargs for the parent calculator class object. + round_pred: int (optional) + The number of decimals to round the predictions to. + If None, the predictions are not rounded. """ # Inherit from the Calculator object Calculator.__init__(self, **calc_kwargs) # Set default mlmodel if mlmodel is None: - from .mlmodel import MLModel - mlmodel = MLModel( model=None, database=None, @@ -66,6 +75,7 @@ def __init__( calc_force_unc=calc_force_unc, calc_unc_deriv=calc_unc_deriv, calc_kwargs=calc_kwargs, + round_pred=round_pred, **kwargs, ) @@ -74,7 +84,7 @@ def get_uncertainty(self, atoms=None, **kwargs): Get the predicted uncertainty of the energy. Parameters: - atoms : ASE Atoms (optional) + atoms: ASE Atoms (optional) The ASE Atoms instance which is used if the uncertainty is not stored. @@ -88,7 +98,7 @@ def get_force_uncertainty(self, atoms=None, **kwargs): Get the predicted uncertainty of the forces. Parameters: - atoms : ASE Atoms (optional) + atoms: ASE Atoms (optional) The ASE Atoms instance which is used if the force uncertainties are not stored. @@ -102,7 +112,7 @@ def get_uncertainty_derivatives(self, atoms=None, **kwargs): Get the derivatives of the uncertainty of the energy. Parameters: - atoms : ASE Atoms (optional) + atoms: ASE Atoms (optional) The ASE Atoms instance which is used if the derivatives of the uncertainty are not stored. @@ -116,7 +126,7 @@ def set_atoms(self, atoms, **kwargs): Save the ASE Atoms instance in the calculator. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms instance that are saved. Returns: @@ -132,7 +142,7 @@ def add_training(self, atoms_list, **kwarg): Add training data as ASE Atoms to the ML model. Parameters: - atoms_list : list or ASE Atoms + atoms_list: list or ASE Atoms A list of or a single ASE Atoms with calculated energies and forces. @@ -153,20 +163,48 @@ def train_model(self, **kwarg): self.mlmodel.train_model(**kwarg) return self - def save_data(self, trajectory="data.traj", **kwarg): + def save_data( + self, + trajectory="data.traj", + mode="w", + write_last=False, + **kwargs, + ): """ Save the ASE Atoms data to a trajectory. Parameters: - trajectory : str + trajectory: str or TrajectoryWriter instance The name of the trajectory file where the data is saved. + Or a TrajectoryWriter instance where the data is saved to. + mode: str + The mode of the trajectory file. + write_last: bool + Whether to only write the last atoms instance to the + trajectory. + If False, all atoms instances in the database are written + to the trajectory. Returns: self: The updated object itself. """ - self.mlmodel.save_data(trajectory=trajectory, **kwarg) + self.mlmodel.save_data( + trajectory=trajectory, + mode=mode, + write_last=write_last, + **kwargs, + ) return self + def get_data_atoms(self, **kwargs): + """ + Get the list of atoms in the database. + + Returns: + list: A list of the saved ASE Atoms objects. + """ + return self.mlmodel.get_data_atoms() + def get_training_set_size(self): """ Get the number of atoms objects in the ML model. @@ -181,7 +219,7 @@ def is_in_database(self, atoms, **kwargs): Check if the ASE Atoms is in the database. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms instance with a calculator. Returns: @@ -194,7 +232,7 @@ def copy_atoms(self, atoms, **kwargs): Copy the atoms object together with the calculated properties. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator that is copied. Returns: @@ -203,12 +241,26 @@ def copy_atoms(self, atoms, **kwargs): """ return self.mlmodel.copy_atoms(atoms, **kwargs) + def update_mlmodel_arguments(self, **kwargs): + """ + Update the arguments in the ML model. + + Parameters: + kwargs: dict + A dictionary with the arguments to update. + + Returns: + self: The updated object itself. + """ + self.mlmodel.update_arguments(**kwargs) + return self + def update_database_arguments(self, point_interest=None, **kwargs): """ Update the arguments in the database. Parameters: - point_interest : list + point_interest: list A list of the points of interest as ASE Atoms instances. Returns: @@ -238,7 +290,7 @@ def calculate( using *atoms.calc.get_uncertainty_derivatives(atoms)*. Returns: - self.results : dict + self.results: dict A dictionary with all the calculated properties. """ # Atoms object @@ -259,41 +311,35 @@ def calculate( get_unc_derivatives=get_unc_derivatives, ) # Store the properties that are implemented - for key, value in results.items(): - if key in self.implemented_properties: - self.results[key] = value + self.store_properties(results) return self.results def save_mlcalc(self, filename="mlcalc.pkl", **kwargs): """ - Save the ML calculator object to a file. + Save the ML calculator instance to a file. Parameters: - filename : str - The name of the file where the object is saved. + filename: str + The name of the file where the instance is saved. Returns: - self: The object itself. + self: The instance itself. """ - import pickle - with open(filename, "wb") as file: pickle.dump(self, file) return self def load_mlcalc(self, filename="mlcalc.pkl", **kwargs): """ - Load the ML calculator object from a file. + Load the ML calculator instance from a file. Parameters: - filename : str - The name of the file where the object is saved. + filename: str + The name of the file where the instance is saved. Returns: - mlcalc: The loaded ML calculator object. + mlcalc: The loaded ML calculator instance. """ - import pickle - with open(filename, "rb") as file: mlcalc = pickle.load(file) return mlcalc @@ -306,6 +352,7 @@ def update_arguments( calc_force_unc=None, calc_unc_deriv=None, calc_kwargs=None, + round_pred=None, **kwargs, ): """ @@ -313,7 +360,7 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - mlmodel : MLModel class object + mlmodel: MLModel class object Machine Learning model used for ASE Atoms and calculator. The object must have the functions: calculate, train_model, and add_training. @@ -328,27 +375,100 @@ def update_arguments( calc_unc_deriv: bool Whether to calculate the derivatives of the uncertainty of the energy. - calc_kwargs : dict + calc_kwargs: dict A dictionary with kwargs for the parent calculator class object. + round_pred: int (optional) + The number of decimals to round the predictions to. + If None, the predictions are not rounded. Returns: self: The updated object itself. """ + reset = False if mlmodel is not None: self.mlmodel = mlmodel.copy() + reset = True if calc_forces is not None: self.calc_forces = calc_forces + reset = True if calc_unc is not None: self.calc_unc = calc_unc + reset = True if calc_force_unc is not None: self.calc_force_unc = calc_force_unc + reset = True if calc_unc_deriv is not None: self.calc_unc_deriv = calc_unc_deriv + reset = True if calc_kwargs is not None: self.calc_kwargs = calc_kwargs.copy() + reset = True + if round_pred is not None or not hasattr(self, "round_pred"): + self.round_pred = round_pred + reset = True # Empty the results - self.reset() + if reset: + self.reset() + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + # Set the random seed for the ML model + self.mlmodel.set_seed(seed) + return self + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + self.mlmodel.set_dtype(dtype, **kwargs) + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether to use fingerprints in the model and database. + + Parameters: + use_fingerprint: bool + Whether to use fingerprints in the model and database. + + Returns: + self: The updated object itself. + """ + self.mlmodel.set_use_fingerprint(use_fingerprint=use_fingerprint) + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives in the model and database. + + Parameters: + use_derivatives: bool + Whether to use derivatives in the model and database. + + Returns: + self: The updated object itself. + """ + self.mlmodel.set_use_derivatives(use_derivatives=use_derivatives) return self def get_property_arguments(self, properties=[], **kwargs): @@ -405,6 +525,20 @@ def model_prediction( ) return results + def store_properties(self, results, **kwargs): + "Store the properties that are implemented." + for key, value in results.items(): + if key in self.implemented_properties: + # Round the predictions if needed + if self.round_pred is not None: + if isinstance(value, float): + value = round(value, self.round_pred) + else: + value = round_(value, self.round_pred) + # Save the properties in the results + self.results[key] = value + return self.results + def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization @@ -415,6 +549,7 @@ def get_arguments(self): calc_force_unc=self.calc_force_unc, calc_unc_deriv=self.calc_unc_deriv, calc_kwargs=self.calc_kwargs, + round_pred=self.round_pred, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/calculator/mlmodel.py b/catlearn/regression/gp/calculator/mlmodel.py index 86b21e50..96f9d032 100644 --- a/catlearn/regression/gp/calculator/mlmodel.py +++ b/catlearn/regression/gp/calculator/mlmodel.py @@ -1,7 +1,15 @@ -import numpy as np +from numpy import asarray, ndarray, sqrt, zeros +from ase.parallel import parprint +import pickle +from .default_model import get_default_model, get_default_database class MLModel: + """ + Machine Learning model used for the ASE Atoms instances and + in the machine learning calculators. + """ + def __init__( self, model=None, @@ -10,46 +18,50 @@ def __init__( optimize=True, hp=None, pdis=None, + include_noise=False, + to_save_mlmodel=False, + save_mlmodel_kwargs={}, verbose=False, + dtype=float, **kwargs, ): """ - Machine Learning model used for ASE Atoms and calculator. + Initialize the ML model for Atoms. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - database : Database object + database: Database object The Database object with ASE atoms. - baseline : Baseline object + baseline: Baseline object The Baseline object calculator that calculates energy and forces. - optimize : bool + optimize: bool Whether to optimize the hyperparameters when the model is trained. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. - verbose : bool + include_noise: bool + Whether to include noise in the uncertainty from the model. + to_save_mlmodel: bool + Whether to save the ML model to a file after training. + save_mlmodel_kwargs: dict + Arguments for saving the ML model, like the filename. + verbose: bool Whether to print statements in the optimization. + dtype: type + The data type of the arrays. """ # Make default model if it is not given if model is None: - model = get_default_model() + model = get_default_model(dtype=dtype) # Make default database if it is not given if database is None: - database = get_default_database() - # Use default baseline if it is not given - if baseline is None: - self.baseline = None - # Use default pdis if it is not given - if pdis is None: - self.pdis = None - # Make default hyperparameters if it is not given - self.hp = None + database = get_default_database(dtype=dtype) # Set the arguments self.update_arguments( model=model, @@ -58,7 +70,11 @@ def __init__( optimize=optimize, hp=hp, pdis=pdis, + include_noise=include_noise, + to_save_mlmodel=to_save_mlmodel, + save_mlmodel_kwargs=save_mlmodel_kwargs, verbose=verbose, + dtype=dtype, **kwargs, ) @@ -67,17 +83,16 @@ def add_training(self, atoms_list, **kwargs): Add training data in form of the ASE Atoms to the database. Parameters: - atoms_list : list or ASE Atoms + atoms_list: list or ASE Atoms A list of or a single ASE Atoms with calculated energies and forces. Returns: self: The updated object itself. """ - if not isinstance(atoms_list, (list, np.ndarray)): + if not isinstance(atoms_list, (list, ndarray)): atoms_list = [atoms_list] self.database.add_set(atoms_list) - self.store_baseline_targets(atoms_list) return self def train_model(self, **kwargs): @@ -88,9 +103,9 @@ def train_model(self, **kwargs): self: The updated object itself. """ # Get data from the data base - features, targets = self.get_data() + features, targets, atoms_list = self.get_data() # Correct targets with the baseline - targets = self.get_baseline_corrected_targets(targets) + targets = self.get_baseline_corrected_targets(atoms_list, targets) # Train model if self.optimize: # Optimize the hyperparameters and train the ML model @@ -98,6 +113,9 @@ def train_model(self, **kwargs): else: # Train the ML model self.model_training(features, targets, **kwargs) + # Save the ML model to a file if requested + if self.to_save_mlmodel: + self.save_mlmodel(**self.save_mlmodel_kwargs) return self def calculate( @@ -114,29 +132,29 @@ def calculate( If get_variance=False, variance is returned as None. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object that the properties (incl. energy) are calculated for. - get_uncertainty : bool + get_uncertainty: bool Whether to calculate the uncertainty. The uncertainty is None if get_uncertainty=False. - get_forces : bool + get_forces: bool Whether to calculate the forces. - get_force_uncertainties : bool + get_force_uncertainties: bool Whether to calculate the uncertainties of the predicted forces. - get_unc_derivatives : bool + get_unc_derivatives: bool Whether to calculate the derivatives of the uncertainty of the predicted energy. Returns: - energy : float + energy: float The predicted energy of the ASE Atoms. - forces : (Nat,3) array or None + forces: (Nat,3) array or None The predicted forces if get_forces=True. - uncertainty : float or None + uncertainty: float or None The predicted uncertainty of the energy if get_uncertainty=True. - uncertainty_forces : (Nat,3) array or None + uncertainty_forces: (Nat,3) array or None The predicted uncertainties of the forces if get_uncertainty=True and get_forces=True. """ @@ -159,19 +177,37 @@ def calculate( ) return results - def save_data(self, trajectory="data.traj", **kwarg): + def save_data( + self, + trajectory="data.traj", + mode="w", + write_last=False, + **kwargs, + ): """ Save the ASE Atoms data to a trajectory. Parameters: - trajectory : str + trajectory: str or TrajectoryWriter instance The name of the trajectory file where the data is saved. + Or a TrajectoryWriter instance where the data is saved to. + mode: str + The mode of the trajectory file. + write_last: bool + Whether to only write the last atoms instance to the + trajectory. + If False, all atoms instances in the database are written + to the trajectory. Returns: self: The updated object itself. """ - " Save the ASE atoms data to a trajectory. " - self.database.save_data(trajectory=trajectory, **kwarg) + self.database.save_data( + trajectory=trajectory, + mode=mode, + write_last=write_last, + **kwargs, + ) return self def get_training_set_size(self, **kwargs): @@ -188,7 +224,7 @@ def is_in_database(self, atoms, **kwargs): Check if the ASE Atoms is in the database. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator. Returns: @@ -201,7 +237,7 @@ def copy_atoms(self, atoms, **kwargs): Copy the atoms object together with the calculated properties. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object with a calculator that is copied. Returns: @@ -215,7 +251,7 @@ def update_database_arguments(self, point_interest=None, **kwargs): Update the arguments in the database. Parameters: - point_interest : list + point_interest: list A list of the points of interest as ASE Atoms instances. Returns: @@ -224,6 +260,36 @@ def update_database_arguments(self, point_interest=None, **kwargs): self.database.update_arguments(point_interest=point_interest, **kwargs) return self + def save_mlmodel(self, filename="mlmodel.pkl", **kwargs): + """ + Save the ML model instance to a file. + + Parameters: + filename: str + The name of the file where the instance is saved. + + Returns: + self: The instance itself. + """ + with open(filename, "wb") as file: + pickle.dump(self, file) + return self + + def load_mlmodel(self, filename="mlmodel.pkl", **kwargs): + """ + Load the ML model instance from a file. + + Parameters: + filename: str + The name of the file where the instance is saved. + + Returns: + mlcalc: The loaded ML model instance. + """ + with open(filename, "rb") as file: + mlmodel = pickle.load(file) + return mlmodel + def update_arguments( self, model=None, @@ -232,7 +298,11 @@ def update_arguments( optimize=None, hp=None, pdis=None, + include_noise=None, + to_save_mlmodel=None, + save_mlmodel_kwargs=None, verbose=None, + dtype=None, **kwargs, ): """ @@ -240,24 +310,32 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - database : Database object + database: Database object The Database object with ASE atoms. - baseline : Baseline object + baseline: Baseline object The Baseline object calculator that calculates energy and forces. - optimize : bool + optimize: bool Whether to optimize the hyperparameters when the model is trained. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. - verbose : bool + include_noise: bool + Whether to include noise in the uncertainty from the model. + to_save_mlmodel: bool + Whether to save the ML model to a file after training. + save_mlmodel_kwargs: dict + Arguments for saving the ML model, like the filename. + verbose: bool Whether to print statements in the optimization. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. @@ -268,28 +346,40 @@ def update_arguments( self.database = database.copy() if baseline is not None: self.baseline = baseline.copy() + elif not hasattr(self, "baseline"): + self.baseline = None if optimize is not None: self.optimize = optimize if hp is not None: self.hp = hp.copy() + elif not hasattr(self, "hp"): + self.hp = None if pdis is not None: self.pdis = pdis.copy() + elif not hasattr(self, "pdis"): + self.pdis = None + if include_noise is not None: + self.include_noise = include_noise + if to_save_mlmodel is not None: + self.to_save_mlmodel = to_save_mlmodel + if save_mlmodel_kwargs is not None: + self.save_mlmodel_kwargs = save_mlmodel_kwargs if verbose is not None: self.verbose = verbose + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) # Check if the baseline is used if self.baseline is None: self.use_baseline = False else: self.use_baseline = True - # Make a list of the baseline targets - if baseline is not None or database is not None: - self.baseline_targets = [] # Check that the model and database have the same attributes self.check_attributes() return self def model_optimization(self, features, targets, **kwargs): "Optimize the ML model with the arguments set in optimize_kwargs." + # Optimize the hyperparameters and train the ML model sol = self.model.optimize( features, targets, @@ -299,9 +389,18 @@ def model_optimization(self, features, targets, **kwargs): verbose=False, **kwargs, ) + # Print the solution if verbose is True if self.verbose: - from ase.parallel import parprint - + # Get the prefactor if it is available + if hasattr(self.model, "get_prefactor"): + sol["prefactor"] = float( + "{:.3e}".format(self.model.get_prefactor()) + ) + # Get the noise correction if it is available + if hasattr(self.model, "get_correction"): + sol["correction"] = float( + "{:.3e}".format(self.model.get_correction()) + ) parprint(sol) return self.model @@ -321,22 +420,24 @@ def model_prediction( ): "Predict the targets and uncertainties." # Calculate fingerprint - fp = self.database.make_atoms_feature(atoms) + fp = self.make_atoms_feature(atoms) # Calculate energy, forces, and uncertainty y, var, var_deriv = self.model.predict( - np.array([fp]), + fp, get_derivatives=get_forces, get_variance=get_uncertainty, - include_noise=False, + include_noise=self.include_noise, get_derivtives_var=get_force_uncertainties, get_var_derivatives=get_unc_derivatives, ) # Correct the predicted targets with the baseline if it is used y = self.add_baseline_correction( - y, atoms=atoms, use_derivatives=get_forces + y, + atoms=atoms, + use_derivatives=get_forces, ) # Extract the energy - energy = y[0][0] + energy = y.item(0) # Extract the forces if they are requested if get_forces: forces = -y[0][1:] @@ -344,10 +445,10 @@ def model_prediction( forces = None # Get the uncertainties if they are requested if get_uncertainty: - unc = np.sqrt(var[0][0]) + unc = sqrt(var.item(0)) # Get the uncertainty of the forces if they are requested if get_force_uncertainties and get_forces: - unc_forces = np.sqrt(unc[0][1:]) + unc_forces = sqrt(var[0][1:]) else: unc_forces = None # Get the derivatives of the predicted uncertainty @@ -389,53 +490,60 @@ def store_results( # Make the full matrix of forces and save it if forces is not None: results["forces"] = self.not_masked_reshape( - forces, not_masked, natoms + forces, + not_masked=not_masked, + natoms=natoms, ) # Make the full matrix of force uncertainties and save it if unc_forces is not None: results["force uncertainties"] = self.not_masked_reshape( - unc_forces, not_masked, natoms + unc_forces, + not_masked=not_masked, + natoms=natoms, ) # Make the full matrix of derivatives of uncertainty and save it if unc_deriv is not None: results["uncertainty derivatives"] = self.not_masked_reshape( - unc_deriv, not_masked, natoms + unc_deriv, + not_masked=not_masked, + natoms=natoms, ) return results def add_baseline_correction( - self, targets, atoms, use_derivatives=True, **kwargs + self, + targets, + atoms, + use_derivatives=True, + **kwargs, ): "Add the baseline correction to the targets if a baseline is used." if self.use_baseline: - # Calculate the baseline for the ASE atoms object + # Calculate the baseline for the ASE atoms instance y_base = self.calculate_baseline( - [atoms], use_derivatives=use_derivatives, **kwargs + [atoms], + use_derivatives=use_derivatives, + **kwargs, ) # Add baseline correction to the targets - return targets + np.array(y_base)[0] + targets += asarray(y_base, dtype=self.dtype)[0] return targets - def get_baseline_corrected_targets(self, targets, **kwargs): + def get_baseline_corrected_targets(self, atoms_list, targets, **kwargs): """ Get the baseline corrected targets if a baseline is used. The baseline correction is subtracted from training targets. """ if self.use_baseline: - return targets - np.array(self.baseline_targets) - return targets - - def store_baseline_targets(self, atoms_list, **kwargs): - "Store the baseline correction on the targets." - # Calculate the baseline for each ASE atoms objects - if self.use_baseline: + # Calculate the baseline for each ASE atoms instance y_base = self.calculate_baseline( atoms_list, use_derivatives=self.database.use_derivatives, **kwargs, ) - self.baseline_targets.extend(y_base) - return self.baseline_targets + # Subtract baseline correction to the targets + targets -= asarray(y_base, dtype=self.dtype) + return targets def calculate_baseline(self, atoms_list, use_derivatives=True, **kwargs): "Calculate the baseline for each ASE atoms object." @@ -444,28 +552,38 @@ def calculate_baseline(self, atoms_list, use_derivatives=True, **kwargs): atoms_base = atoms.copy() atoms_base.calc = self.baseline y_base.append( - self.make_targets(atoms_base, use_derivatives=use_derivatives) + self.make_targets( + atoms_base, + use_derivatives=use_derivatives, + **kwargs, + ) ) return y_base - def not_masked_reshape(self, array, not_masked, natoms, **kwargs): + def not_masked_reshape(self, nm_array, not_masked, natoms, **kwargs): """ Reshape an array so that it works for all atom coordinates and - set constrained indicies to 0. + set constrained indices to 0. """ - full_array = np.zeros((natoms, 3)) - full_array[not_masked] = array.reshape(-1, 3) + full_array = zeros((natoms, 3), dtype=self.dtype) + full_array[not_masked] = nm_array.reshape(-1, 3) return full_array def get_data(self, **kwargs): "Get data from the data base." features = self.database.get_features() targets = self.database.get_targets() - return features, targets + atoms_list = self.get_data_atoms() + return features, targets, atoms_list def get_data_atoms(self, **kwargs): - "Get the atoms stored in the data base." - return self.database.get_atoms() + """ + Get the list of atoms in the database. + + Returns: + list: A list of the saved ASE Atoms objects. + """ + return self.database.get_data_atoms() def reset_database(self, **kwargs): """ @@ -475,31 +593,128 @@ def reset_database(self, **kwargs): self: The updated object itself. """ self.database.reset_database() - self.baseline_targets = [] return self def make_targets(self, atoms, use_derivatives=True, **kwargs): "Make the target in the data base." return self.database.make_target( - atoms, use_derivatives=use_derivatives, use_negative_forces=True + atoms, + use_derivatives=use_derivatives, + use_negative_forces=True, ) def get_constraints(self, atoms, **kwargs): """ - Get the number of atoms and the indicies of + Get the number of atoms and the indices of the atoms without constraints. """ natoms = len(atoms) not_masked = self.database.get_constraints(atoms, **kwargs) return natoms, not_masked + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + # Set the random seed for the database + self.database.set_seed(seed) + # Set the random seed for the model + self.model.set_seed(seed) + return self + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the data type of the model and database + self.model.set_dtype(dtype=dtype, **kwargs) + self.database.set_dtype(dtype=dtype, **kwargs) + # Set the data type of the baseline if it is used + if self.baseline is not None: + self.baseline.set_dtype(dtype=dtype, **kwargs) + # Set the data type of the prior distributions if they are used + if self.pdis is not None: + for pdis in self.pdis.values(): + pdis.set_dtype(dtype=dtype, **kwargs) + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether to use fingerprints in the model and database. + + Parameters: + use_fingerprint: bool + Whether to use fingerprints in the model and database. + + Returns: + self: The updated object itself. + """ + self.model.set_use_fingerprint(use_fingerprint=use_fingerprint) + self.database.set_use_fingerprint(use_fingerprint=use_fingerprint) + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives in the model and database. + + Parameters: + use_derivatives: bool + Whether to use derivatives in the model and database. + + Returns: + self: The updated object itself. + """ + self.model.set_use_derivatives(use_derivatives=use_derivatives) + self.database.set_use_derivatives(use_derivatives=use_derivatives) + # Set the data type of the baseline if it is used + if self.baseline is not None: + self.baseline.set_use_forces(use_derivatives) + return self + + def make_atoms_feature(self, atoms, **kwargs): + """ + Make the feature or fingerprint of a single Atoms object. + It can e.g. be used for predicting. + + Parameters: + atoms: ASE Atoms + The ASE Atoms object with a calculator. + + Returns: + array of fingerprint object: The fingerprint object of the + Atoms object. + or + array: The feature or fingerprint array of the Atoms object. + """ + # Calculate fingerprint + fp = self.database.make_atoms_feature(atoms, **kwargs) + return asarray([fp]) + def check_attributes(self): "Check if all attributes agree between the class and subclasses." if ( self.model.get_use_fingerprint() != self.database.get_use_fingerprint() ): - raise Exception( + raise ValueError( "Model and Database do not agree " "whether to use fingerprints!" ) @@ -507,7 +722,7 @@ def check_attributes(self): self.model.get_use_derivatives() != self.database.get_use_derivatives() ): - raise Exception( + raise ValueError( "Model and Database do not agree " "whether to use derivatives/forces!" ) @@ -523,12 +738,16 @@ def get_arguments(self): optimize=self.optimize, hp=self.hp, pdis=self.pdis, + include_noise=self.include_noise, + to_save_mlmodel=self.to_save_mlmodel, + save_mlmodel_kwargs=self.save_mlmodel_kwargs, verbose=self.verbose, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class - object_kwargs = dict(baseline_targets=self.baseline_targets.copy()) + object_kwargs = dict() return arg_kwargs, constant_kwargs, object_kwargs def copy(self): @@ -553,368 +772,3 @@ def __repr__(self): [f"{key}={value}" for key, value in arg_kwargs.items()] ) return "{}({})".format(self.__class__.__name__, str_kwargs) - - -def get_default_model( - model="tp", - prior="median", - use_derivatives=True, - use_fingerprint=False, - global_optimization=True, - parallel=False, - n_reduced=None, - **kwargs, -): - """ - Get the default ML model from the simple given arguments. - - Parameters: - model : str - Either the tp that gives the Studen T process or - gp that gives the Gaussian process. - prior : str - Specify what prior mean should be used. - use_derivatives : bool - Whether to use derivatives of the targets. - use_fingerprint : bool - Whether to use fingerprints for the features. - This has to be the same as for the database! - global_optimization : bool - Whether to perform a global optimization of the hyperparameters. - A local optimization is used if global_optimization=False, - which can not be parallelized. - parallel : bool - Whether to optimize the hyperparameters in parallel. - n_reduced : int or None - If n_reduced is an integer, the hyperparameters are only optimized - when the data set size is equal to or below the integer. - If n_reduced is None, the hyperparameter is always optimized. - - Returns: - model : Model - The Machine Learning Model with kernel and - prior that are optimized. - """ - # Check that the model is given as a string - if not isinstance(model, str): - return model - # Make the prior mean from given string - if isinstance(prior, str): - if prior.lower() == "median": - from ..means.median import Prior_median - - prior = Prior_median() - elif prior.lower() == "mean": - from ..means.mean import Prior_mean - - prior = Prior_mean() - elif prior.lower() == "min": - from ..means.min import Prior_min - - prior = Prior_min() - elif prior.lower() == "max": - from ..means.max import Prior_max - - prior = Prior_max() - # Construct the kernel class object - from ..kernel.se import SE - - kernel = SE( - use_fingerprint=use_fingerprint, use_derivatives=use_derivatives - ) - # Set the hyperparameter optimization method - if global_optimization: - # Set global optimization with or without parallelization - from ..optimizers.globaloptimizer import FactorizedOptimizer - - if parallel: - from ..optimizers.linesearcher import FineGridSearch - - line_optimizer = FineGridSearch( - optimize=True, - multiple_min=False, - ngrid=80, - loops=3, - parallel=True, - ) - else: - from ..optimizers.linesearcher import GoldenSearch - - line_optimizer = GoldenSearch( - optimize=True, multiple_min=False, parallel=False - ) - optimizer = FactorizedOptimizer( - line_optimizer=line_optimizer, - ngrid=80, - calculate_init=False, - parallel=parallel, - ) - else: - from ..optimizers.localoptimizer import ScipyOptimizer - - # Make the local optimizer - optimizer = ScipyOptimizer( - maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, - ) - if parallel: - import warnings - - warnings.warn( - "Parallel optimization is not implemented" - "with local optimization!" - ) - # Use either the Student t process or the Gaussian process - if model.lower() == "tp": - # Set model - from ..models.tp import TProcess - - model = TProcess( - prior=prior, - kernel=kernel, - use_derivatives=use_derivatives, - a=1e-3, - b=1e-4, - ) - # Set objective function - if global_optimization: - from ..objectivefunctions.tp.factorized_likelihood import ( - FactorizedLogLikelihood, - ) - - func = FactorizedLogLikelihood() - else: - from ..objectivefunctions.tp.likelihood import LogLikelihood - - func = LogLikelihood() - else: - # Set model - from ..models.gp import GaussianProcess - - model = GaussianProcess( - prior=prior, kernel=kernel, use_derivatives=use_derivatives - ) - # Set objective function - if global_optimization: - from ..objectivefunctions.gp.factorized_likelihood import ( - FactorizedLogLikelihood, - ) - - func = FactorizedLogLikelihood() - else: - from ..objectivefunctions.gp.likelihood import LogLikelihood - - func = LogLikelihood() - # Set hpfitter and whether a maximum data set size is applied - if n_reduced is None: - from ..hpfitter import HyperparameterFitter - - hpfitter = HyperparameterFitter(func=func, optimizer=optimizer) - else: - from ..hpfitter.redhpfitter import ReducedHyperparameterFitter - - hpfitter = ReducedHyperparameterFitter( - func=func, optimizer=optimizer, opt_tr_size=n_reduced - ) - model.update_arguments(hpfitter=hpfitter) - return model - - -def get_default_database( - fp=None, - use_derivatives=True, - database_reduction=False, - database_reduction_kwargs={}, - **kwargs, -): - """ - Get the default Database from the simple given arguments. - - Parameters: - fp : Fingerprint class object or None - The fingerprint object used to generate the fingerprints. - Cartesian coordinates are used if it is None. - use_derivatives : bool - Whether to use derivatives of the targets. - database_reduction : bool - Whether to used a reduced database after a number - of training points. - database_reduction_kwargs : dict - A dictionary with the arguments for the reduced database - if it is used. - - Returns: - database : Database object - The Database object with ASE atoms. - """ - # Set a fingerprint - if fp is None: - from ..fingerprint.cartesian import Cartesian - - # Use cartesian coordinates as the fingerprint - fp = Cartesian(reduce_dimensions=True, use_derivatives=use_derivatives) - use_fingerprint = False - else: - use_fingerprint = True - # Make the data base ready - if isinstance(database_reduction, str): - data_kwargs = dict( - reduce_dimensions=True, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - npoints=50, - initial_indicies=[0, 1], - include_last=1, - ) - data_kwargs.update(database_reduction_kwargs) - if database_reduction.lower() == "distance": - from .database_reduction import DatabaseDistance - - database = DatabaseDistance(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "random": - from .database_reduction import DatabaseRandom - - database = DatabaseRandom(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "hybrid": - from .database_reduction import DatabaseHybrid - - database = DatabaseHybrid(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "min": - from .database_reduction import DatabaseMin - - database = DatabaseMin(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "last": - from .database_reduction import DatabaseLast - - database = DatabaseLast(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "restart": - from .database_reduction import DatabaseRestart - - database = DatabaseRestart(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "interest": - from .database_reduction import DatabasePointsInterest - - database = DatabasePointsInterest(fingerprint=fp, **data_kwargs) - elif database_reduction.lower() == "each_interest": - from .database_reduction import DatabasePointsInterestEach - - database = DatabasePointsInterestEach( - fingerprint=fp, **data_kwargs - ) - else: - from .database import Database - - database = Database( - fingerprint=fp, - reduce_dimensions=True, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - ) - return database - - -def get_default_mlmodel( - model="tp", - fp=None, - baseline=None, - prior="median", - use_derivatives=True, - optimize_hp=True, - global_optimization=True, - parallel=False, - use_pdis=True, - n_reduced=None, - database_reduction=False, - database_reduction_kwargs={}, - verbose=False, - **kwargs, -): - """ - Get the default ML model with a database for the ASE Atoms - from the simple given arguments. - - Parameters: - model : str - Either the tp that gives the Studen T process or - gp that gives the Gaussian process. - fp : Fingerprint class object or None - The fingerprint object used to generate the fingerprints. - Cartesian coordinates are used if it is None. - baseline : Baseline object - The Baseline object calculator that calculates energy and forces. - prior : str - Specify what prior mean should be used. - use_derivatives : bool - Whether to use derivatives of the targets. - optimize_hp : bool - Whether to optimize the hyperparameters when the model is trained. - global_optimization : bool - Whether to perform a global optimization of the hyperparameters. - A local optimization is used if global_optimization=False, - which can not be parallelized. - parallel : bool - Whether to optimize the hyperparameters in parallel. - use_pdis : bool - Whether to make prior distributions for the hyperparameters. - n_reduced : int or None - If n_reduced is an integer, the hyperparameters are only optimized - when the data set size is equal to or below the integer. - If n_reduced is None, the hyperparameter is always optimized. - database_reduction : bool - Whether to used a reduced database after a number - of training points. - database_reduction_kwargs : dict - A dictionary with the arguments for the reduced database - if it is used. - verbose : bool - Whether to print statements in the optimization. - - Returns: - mlmodel : MLModel class object - Machine Learning model used for ASE Atoms and calculator. - """ - # Check if fingerprints are used - if fp is None: - use_fingerprint = False - else: - use_fingerprint = True - # Make the model - if isinstance(model, str): - model = get_default_model( - model=model, - prior=prior, - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - global_optimization=global_optimization, - parallel=parallel, - n_reduced=n_reduced, - ) - # Make the database - database = get_default_database( - fp=fp, - use_derivatives=use_derivatives, - database_reduction=database_reduction, - database_reduction_kwargs=database_reduction_kwargs, - ) - # Make prior distributions for the hyperparameters if specified - if use_pdis: - from ..pdistributions.normal import Normal_prior - - pdis = dict( - length=Normal_prior(mu=[-0.5], std=[1.0]), - noise=Normal_prior(mu=[-9.0], std=[1.0]), - ) - else: - pdis = None - # Make the ML model with database - return MLModel( - model=model, - database=database, - baseline=baseline, - optimize=optimize_hp, - pdis=pdis, - verbose=verbose, - ) diff --git a/catlearn/regression/gp/ensemble/clustering/__init__.py b/catlearn/regression/gp/ensemble/clustering/__init__.py index b5f16c84..d00ae539 100644 --- a/catlearn/regression/gp/ensemble/clustering/__init__.py +++ b/catlearn/regression/gp/ensemble/clustering/__init__.py @@ -2,6 +2,7 @@ from .k_means import K_means from .k_means_auto import K_means_auto from .k_means_number import K_means_number +from .k_means_enumeration import K_means_enumeration from .fixed import FixedClustering from .random import RandomClustering from .random_number import RandomClustering_number @@ -11,6 +12,7 @@ "K_means", "K_means_auto", "K_means_number", + "K_means_enumeration", "FixedClustering", "RandomClustering", "RandomClustering_number", diff --git a/catlearn/regression/gp/ensemble/clustering/clustering.py b/catlearn/regression/gp/ensemble/clustering/clustering.py index 27d6aca5..8049c9fa 100644 --- a/catlearn/regression/gp/ensemble/clustering/clustering.py +++ b/catlearn/regression/gp/ensemble/clustering/clustering.py @@ -1,22 +1,40 @@ +from numpy.random import default_rng, Generator, RandomState + + class Clustering: + """ + Clustering algorithm class for data sets. + """ + def __init__( self, + seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. + Initialize the clustering algorithm. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set default descriptors self.n_clusters = 1 # Set the arguments - self.update_arguments(**kwargs) + self.update_arguments(seed=seed, dtype=dtype, **kwargs) def fit(self, X, **kwargs): """ Fit the clustering algorithm. Parameters: - X : (N,D) array + X: (N,D) array Training features with N data points. Returns: @@ -31,11 +49,11 @@ def cluster_fit_data(self, X, **kwargs): Fit the clustering algorithm and return the clustered data. Parameters: - X : (N,D) array + X: (N,D) array Training features with N data points. Returns: - list: A list of indicies to the training data for each cluster. + list: A list of indices to the training data for each cluster. """ raise NotImplementedError() @@ -44,28 +62,91 @@ def cluster(self, X, **kwargs): Cluster the given data if it is fitted. Parameters: - X : (M,D) array + X: (M,D) array Features with M data points. Returns: - list: A list of indicies to the data for each cluster. + list: A list of indices to the data for each cluster. """ raise NotImplementedError() - def update_arguments(self, metric=None, **kwargs): + def get_n_clusters(self): + """ + Get the number of clusters. + + Returns: + int: The number of clusters. + """ + return self.n_clusters + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + + def update_arguments(self, seed=None, dtype=None, **kwargs): """ Update the class with its arguments. The existing arguments are used if they are not given. + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + Returns: self: The updated object itself. """ + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) return self def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict() + arg_kwargs = dict(seed=self.seed, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict(n_clusters=self.n_clusters) # Get the objects made within the class diff --git a/catlearn/regression/gp/ensemble/clustering/fixed.py b/catlearn/regression/gp/ensemble/clustering/fixed.py index ab8556c3..dfefe48f 100644 --- a/catlearn/regression/gp/ensemble/clustering/fixed.py +++ b/catlearn/regression/gp/ensemble/clustering/fixed.py @@ -1,44 +1,72 @@ -import numpy as np +from numpy import arange, argmin, empty from .k_means import K_means class FixedClustering(K_means): + """ + Clustering algorithm class for data sets. + It uses the distances to pre-defined fixed centroids for clustering. + """ + def __init__( self, metric="euclidean", - centroids=np.array([]), + centroids=empty(0), + seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. - Use distances to pre-defined fixed centroids for clustering. + Initialize the clustering algorithm. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - centroids : (K,D) array + centroids: (K,D) array An array with the centroids of the K clusters. The centroids must have the same dimensions as the features. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the arguments - self.update_arguments(centroids=centroids, metric=metric, **kwargs) + self.update_arguments( + centroids=centroids, + metric=metric, + seed=seed, + dtype=dtype, + **kwargs, + ) def cluster_fit_data(self, X, **kwargs): - indicies = np.array(range(len(X))) - i_min = np.argmin(self.calculate_distances(X, self.centroids), axis=1) - return [indicies[i_min == ki] for ki in range(self.n_clusters)] + indices = arange(len(X)) + i_min = argmin(self.calculate_distances(X, self.centroids), axis=1) + return [indices[i_min == ki] for ki in range(self.n_clusters)] - def update_arguments(self, metric=None, centroids=None, **kwargs): + def update_arguments( + self, metric=None, centroids=None, seed=None, dtype=None, **kwargs + ): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - centroids : (K,D) array + centroids: (K,D) array An array with the centroids of the K clusters. The centroids must have the same dimensions as the features. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -47,12 +75,22 @@ def update_arguments(self, metric=None, centroids=None, **kwargs): self.set_centroids(centroids) if metric is not None: self.metric = metric + # Set the parameters of the parent class + super(K_means, self).update_arguments( + seed=seed, + dtype=dtype, + ) return self def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(metric=self.metric, centroids=self.centroids) + arg_kwargs = dict( + metric=self.metric, + centroids=self.centroids, + seed=self.seed, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/ensemble/clustering/k_means.py b/catlearn/regression/gp/ensemble/clustering/k_means.py index 0b3e4c80..78ff5a42 100644 --- a/catlearn/regression/gp/ensemble/clustering/k_means.py +++ b/catlearn/regression/gp/ensemble/clustering/k_means.py @@ -1,33 +1,47 @@ -import numpy as np +from numpy import arange, argmax, argmin, array, append, asarray, empty +from numpy.linalg import norm from scipy.spatial.distance import cdist from .clustering import Clustering class K_means(Clustering): + """ + Clustering algorithm class for data sets. + It uses the K-means++ algorithm for clustering. + """ + def __init__( self, metric="euclidean", n_clusters=4, maxiter=100, tol=1e-4, + seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. - The K-means++ algorithm for clustering. + Initialize the clustering algorithm. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - n_clusters : int + n_clusters: int The number of used clusters. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set default descriptors - self.centroids = np.array([]) + self.centroids = empty(0, dtype=dtype) self.n_clusters = 1 # Set the arguments super().__init__( @@ -35,32 +49,36 @@ def __init__( n_clusters=n_clusters, maxiter=maxiter, tol=tol, + seed=seed, + dtype=dtype, **kwargs, ) def cluster_fit_data(self, X, **kwargs): - # If only one cluster is used give the full data + # Copy the data + X = array(X, dtype=self.dtype) + # If only one cluster is used, give the full data if self.n_clusters == 1: - self.centroids = np.array([np.mean(X, axis=0)]) - return [np.arange(len(X))] + self.centroids = asarray([X.mean(axis=0)]) + return [arange(len(X))] # Initiate the centroids centroids = self.initiate_centroids(X) # Optimize position of the centroids self.centroids = self.optimize_centroids(X, centroids) - # Return the cluster indicies + # Return the cluster indices return self.cluster(X) def cluster(self, X, **kwargs): - indicies = np.arange(len(X)) - i_min = np.argmin(self.calculate_distances(X, self.centroids), axis=1) - return [indicies[i_min == ki] for ki in range(self.n_clusters)] + indices = arange(len(X)) + i_min = argmin(self.calculate_distances(X, self.centroids), axis=1) + return [indices[i_min == ki] for ki in range(self.n_clusters)] def set_centroids(self, centroids, **kwargs): """ Set user defined centroids. Parameters: - centroids : (K,D) array + centroids: (K,D) array An array with the centroids of the K clusters. The centroids must have the same dimensions as the features. @@ -71,22 +89,42 @@ def set_centroids(self, centroids, **kwargs): self.n_clusters = len(self.centroids) return self + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + # Set the dtype + self.centroids = self.centroids.astype(dtype) + return self + def update_arguments( - self, metric=None, n_clusters=None, maxiter=None, tol=None, **kwargs + self, + metric=None, + n_clusters=None, + maxiter=None, + tol=None, + seed=None, + dtype=None, + **kwargs, ): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - n_clusters : int + n_clusters: int The number of used clusters. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -99,6 +137,11 @@ def update_arguments( self.maxiter = int(maxiter) if tol is not None: self.tol = tol + # Set the parameters of the parent class + super().update_arguments( + seed=seed, + dtype=dtype, + ) return self def calculate_distances(self, Q, X, **kwargs): @@ -108,30 +151,25 @@ def calculate_distances(self, Q, X, **kwargs): def initiate_centroids(self, X, **kwargs): "Initial the centroids from the K-mean++ method." # Get the first centroid randomly - centroids = np.array(X[np.random.choice(len(X), size=1)]) + centroids = X[self.rng.choice(len(X), size=1)] for ki in range(1, self.n_clusters): # Calculate the maximum nearest neighbor - i_max = np.argmax( - np.min(self.calculate_distances(X, centroids), axis=1) - ) - centroids = np.append(centroids, [X[i_max]], axis=0) + i_max = argmax(self.calculate_distances(X, centroids).min(axis=1)) + centroids = append(centroids, [X[i_max]], axis=0) return centroids def optimize_centroids(self, X, centroids, **kwargs): "Optimize the positions of the centroids." - for i in range(1, self.maxiter + 1): + for _ in range(1, self.maxiter + 1): # Store the old centroids centroids_old = centroids.copy() # Calculate which centroids that are closest - i_min = np.argmin(self.calculate_distances(X, centroids), axis=1) - centroids = np.array( - [ - np.mean(X[i_min == ki], axis=0) - for ki in range(self.n_clusters) - ] + i_min = argmin(self.calculate_distances(X, centroids), axis=1) + centroids = asarray( + [X[i_min == ki].mean(axis=0) for ki in range(self.n_clusters)] ) # Check if it is converged - if np.linalg.norm(centroids - centroids_old) <= self.tol: + if norm(centroids - centroids_old) <= self.tol: break return centroids @@ -143,9 +181,11 @@ def get_arguments(self): n_clusters=self.n_clusters, maxiter=self.maxiter, tol=self.tol, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class - object_kwargs = dict(centroids=self.centroids) + object_kwargs = dict(centroids=self.centroids.copy()) return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/ensemble/clustering/k_means_auto.py b/catlearn/regression/gp/ensemble/clustering/k_means_auto.py index ad428849..7b585ebb 100644 --- a/catlearn/regression/gp/ensemble/clustering/k_means_auto.py +++ b/catlearn/regression/gp/ensemble/clustering/k_means_auto.py @@ -1,8 +1,15 @@ -import numpy as np +from numpy import append, arange, argmin, argsort, array, asarray +from numpy.linalg import norm from .k_means import K_means class K_means_auto(K_means): + """ + Clustering algorithm class for data sets. + It uses the K-means++ algorithm for clustering. + It uses a interval of number of data points in each cluster. + """ + def __init__( self, metric="euclidean", @@ -10,24 +17,31 @@ def __init__( max_data=30, maxiter=100, tol=1e-4, + seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. - The K-means++ algorithm for clustering, but where the number - of clusters are updated. + Initialize the clustering algorithm. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - min_data : int + min_data: int The minimum number of data point in each cluster. - max_data : int + max_data: int The maximum number of data point in each cluster. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( metric=metric, @@ -35,27 +49,29 @@ def __init__( max_data=max_data, maxiter=maxiter, tol=tol, + seed=seed, + dtype=dtype, **kwargs, ) def cluster_fit_data(self, X, **kwargs): + # Copy the data + X = array(X, dtype=self.dtype) # Calculate the number of clusters - n_data = len(X) - self.n_clusters = int(n_data // self.max_data) - if n_data - (self.n_clusters * self.max_data): - self.n_clusters = self.n_clusters + 1 - # If only one cluster is used give the full data + self.n_clusters = self.calc_n_clusters(X) + # If only one cluster is used, give the full data if self.n_clusters == 1: - self.centroids = np.array([np.mean(X, axis=0)]) - return [np.arange(n_data)] + self.centroids = asarray([X.mean(axis=0)]) + return [arange(len(X))] # Initiate the centroids centroids = self.initiate_centroids(X) # Optimize position of the centroids - self.centroids, cluster_indicies = self.optimize_centroids( - X, centroids + self.centroids, cluster_indices = self.optimize_centroids( + X, + centroids, ) - # Return the cluster indicies - return cluster_indicies + # Return the cluster indices + return cluster_indices def update_arguments( self, @@ -64,6 +80,8 @@ def update_arguments( max_data=None, maxiter=None, tol=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -71,86 +89,105 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - min_data : int + min_data: int The minimum number of data point in each cluster. - max_data : int + max_data: int The maximum number of data point in each cluster. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if metric is not None: - self.metric = metric if min_data is not None: self.min_data = int(min_data) if max_data is not None: self.max_data = int(max_data) - if maxiter is not None: - self.maxiter = int(maxiter) - if tol is not None: - self.tol = tol # Check that the numbers of used data points agree if self.max_data < self.min_data: self.max_data = int(self.min_data) + # Set the arguments of the parent class + super().update_arguments( + metric=metric, + n_clusters=None, + maxiter=maxiter, + tol=tol, + seed=seed, + dtype=dtype, + ) return self + def calc_n_clusters(self, X, **kwargs): + """ + Calculate the number of clusters based on the data. + """ + n_data = len(X) + n_clusters = int(n_data // self.max_data) + if n_data > (n_clusters * self.max_data): + n_clusters += 1 + return n_clusters + def optimize_centroids(self, X, centroids, **kwargs): "Optimize the positions of the centroids." - indicies = np.arange(len(X)) - for i in range(1, self.maxiter + 1): + indices = arange(len(X)) + for _ in range(1, self.maxiter + 1): # Store the old centroids centroids_old = centroids.copy() # Calculate which centroids that are closest distance_matrix = self.calculate_distances(X, centroids) - cluster_indicies = self.count_clusters( - X, indicies, distance_matrix + cluster_indices = self.count_clusters( + X, + indices, + distance_matrix, ) - centroids = np.array( - [ - np.mean(X[indicies_ki], axis=0) - for indicies_ki in cluster_indicies - ] + centroids = asarray( + [X[indices_ki].mean(axis=0) for indices_ki in cluster_indices] ) # Check if it is converged - if np.linalg.norm(centroids - centroids_old) <= self.tol: + if norm(centroids - centroids_old) <= self.tol: break - return centroids, cluster_indicies + return centroids, cluster_indices - def count_clusters(self, X, indicies, distance_matrix, **kwargs): + def count_clusters(self, X, indices, distance_matrix, **kwargs): """ - Get the indicies for each of the clusters. + Get the indices for each of the clusters. The number of data points in each cluster is counted and restricted between the minimum and maximum number of allowed cluster sizes. """ - # Make a list cluster indicies - klist = np.arange(self.n_clusters).reshape(-1, 1) + # Make a list cluster indices + klist = arange(self.n_clusters).reshape(-1, 1) # Find the cluster that each point is closest to - k_indicies = np.argmin(distance_matrix, axis=1) - indicies_ki_bool = klist == k_indicies + k_indices = argmin(distance_matrix, axis=1) + indices_ki_bool = klist == k_indices # Check the number of points per cluster - n_ki = np.sum(indicies_ki_bool, axis=1) + n_ki = indices_ki_bool.sum(axis=1) # Ensure the number is within the conditions n_ki[n_ki > self.max_data] = self.max_data n_ki[n_ki < self.min_data] = self.min_data - # Sort the indicies as function of the distances to the centroids - d_indicies = np.argsort(distance_matrix, axis=0) - indicies_sorted = indicies[d_indicies.T] - indicies_ki_bool = indicies_ki_bool[klist, indicies_sorted] + # Sort the indices as function of the distances to the centroids + d_indices = argsort(distance_matrix, axis=0) + indices_sorted = indices[d_indices.T] + indices_ki_bool = indices_ki_bool[klist, indices_sorted] # Prioritize the points that is part of each cluster - cluster_indicies = [ - np.append( - indicies_sorted[ki, indicies_ki_bool[ki]], - indicies_sorted[ki, ~indicies_ki_bool[ki]], + cluster_indices = [ + append( + indices_sorted[ki, indices_ki_bool[ki]], + indices_sorted[ki, ~indices_ki_bool[ki]], )[: n_ki[ki]] for ki in range(self.n_clusters) ] - return cluster_indicies + return cluster_indices def get_arguments(self): "Get the arguments of the class itself." @@ -161,6 +198,8 @@ def get_arguments(self): max_data=self.max_data, maxiter=self.maxiter, tol=self.tol, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict(n_clusters=self.n_clusters) diff --git a/catlearn/regression/gp/ensemble/clustering/k_means_enumeration.py b/catlearn/regression/gp/ensemble/clustering/k_means_enumeration.py new file mode 100644 index 00000000..c8fa9aaa --- /dev/null +++ b/catlearn/regression/gp/ensemble/clustering/k_means_enumeration.py @@ -0,0 +1,134 @@ +from numpy import append, arange, array, asarray +from .k_means import K_means + + +class K_means_enumeration(K_means): + """ + Clustering algorithm class for data sets. + It uses the K-means++ algorithm for clustering. + It uses a fixed number of data points in each cluster. + """ + + def __init__( + self, + metric="euclidean", + data_number=25, + seed=None, + dtype=float, + **kwargs, + ): + """ + Initialize the clustering algorithm. + + Parameters: + metric: str + The metric used to calculate the distances of the data. + data_number: int + The number of data point in each cluster. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ + super().__init__( + metric=metric, + data_number=data_number, + seed=seed, + dtype=dtype, + **kwargs, + ) + + def cluster_fit_data(self, X, **kwargs): + # Copy the data + X = array(X, dtype=self.dtype) + # Calculate the number of clusters + self.n_clusters = self.calc_n_clusters(X) + # If only one cluster is used, give the full data + if self.n_clusters == 1: + self.centroids = asarray([X.mean(axis=0)]) + return [arange(len(X))] + # Initiate the centroids + self.centroids, cluster_indices = self.initiate_centroids(X) + # Return the cluster indices + return cluster_indices + + def update_arguments( + self, + metric=None, + data_number=None, + seed=None, + dtype=None, + **kwargs, + ): + """ + Update the class with its arguments. + The existing arguments are used if they are not given. + + Parameters: + metric: str + The metric used to calculate the distances of the data. + data_number: int + The number of data point in each cluster. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated object itself. + """ + if data_number is not None: + self.data_number = int(data_number) + # Set the arguments of the parent class + super().update_arguments( + metric=metric, + n_clusters=None, + seed=seed, + dtype=dtype, + ) + return self + + def calc_n_clusters(self, X, **kwargs): + """ + Calculate the number of clusters based on the data. + """ + n_data = len(X) + n_clusters = int(n_data // self.data_number) + if n_data - (n_clusters * self.data_number): + n_clusters += 1 + return n_clusters + + def initiate_centroids(self, X, **kwargs): + "Initial the centroids from the K-mean++ method." + n_data = len(X) + indices = arange(n_data) + if int(self.n_clusters * self.data_number) > n_data: + n_f = int((self.n_clusters - 1) * self.data_number) + n_r = int(n_data - self.data_number) + indices = append(indices[:n_f], indices[n_r:]) + indices = indices.reshape(self.n_clusters, self.data_number) + centroids = asarray( + [X[indices_ki].mean(axis=0) for indices_ki in indices] + ) + return centroids, indices + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + metric=self.metric, + data_number=self.data_number, + seed=self.seed, + dtype=self.dtype, + ) + # Get the constants made within the class + constant_kwargs = dict(n_clusters=self.n_clusters) + # Get the objects made within the class + object_kwargs = dict(centroids=self.centroids) + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/ensemble/clustering/k_means_number.py b/catlearn/regression/gp/ensemble/clustering/k_means_number.py index 8b0e04d6..adb0833f 100644 --- a/catlearn/regression/gp/ensemble/clustering/k_means_number.py +++ b/catlearn/regression/gp/ensemble/clustering/k_means_number.py @@ -1,134 +1,176 @@ -import numpy as np +from numpy import append, arange, argmin, argsort, array, asarray +from numpy.linalg import norm from .k_means import K_means class K_means_number(K_means): + """ + Clustering algorithm class for data sets. + It uses the K-means++ algorithm for clustering. + It uses a fixed number of data points in each cluster. + """ + def __init__( self, metric="euclidean", data_number=25, maxiter=100, tol=1e-4, + seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. - The K-means++ algorithm for clustering, but where the number - of clusters are updated from a fixed number data point in each cluster. + Initialize the clustering algorithm. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - data_number : int + data_number: int The number of data point in each cluster. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( metric=metric, data_number=data_number, maxiter=maxiter, tol=tol, + seed=seed, + dtype=dtype, **kwargs, ) def cluster_fit_data(self, X, **kwargs): + # Copy the data + X = array(X, dtype=self.dtype) # Calculate the number of clusters - n_data = len(X) - self.n_clusters = int(n_data // self.data_number) - if n_data - (self.n_clusters * self.data_number): - self.n_clusters = self.n_clusters + 1 - # If only one cluster is used give the full data + self.n_clusters = self.calc_n_clusters(X) + # If only one cluster is used, give the full data if self.n_clusters == 1: - self.centroids = np.array([np.mean(X, axis=0)]) - return [np.arange(n_data)] + self.centroids = asarray([X.mean(axis=0)]) + return [arange(len(X))] # Initiate the centroids centroids = self.initiate_centroids(X) # Optimize position of the centroids - self.centroids, cluster_indicies = self.optimize_centroids( - X, centroids + self.centroids, cluster_indices = self.optimize_centroids( + X, + centroids, ) - # Return the cluster indicies - return cluster_indicies + # Return the cluster indices + return cluster_indices def update_arguments( - self, metric=None, data_number=None, maxiter=None, tol=None, **kwargs + self, + metric=None, + data_number=None, + maxiter=None, + tol=None, + seed=None, + dtype=None, + **kwargs, ): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - metric : str + metric: str The metric used to calculate the distances of the data. - data_number : int + data_number: int The number of data point in each cluster. - maxiter : int + maxiter: int The maximum number of iterations used to fit the clusters. - tol : float + tol: float The tolerance before the cluster fit is converged. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if metric is not None: - self.metric = metric if data_number is not None: self.data_number = int(data_number) - if maxiter is not None: - self.maxiter = int(maxiter) - if tol is not None: - self.tol = tol + # Set the arguments of the parent class + super().update_arguments( + metric=metric, + n_clusters=None, + maxiter=maxiter, + tol=tol, + seed=seed, + dtype=dtype, + ) return self + def calc_n_clusters(self, X, **kwargs): + """ + Calculate the number of clusters based on the data. + """ + n_data = len(X) + n_clusters = int(n_data // self.data_number) + if n_data - (n_clusters * self.data_number): + n_clusters += 1 + return n_clusters + def optimize_centroids(self, X, centroids, **kwargs): "Optimize the positions of the centroids." - indicies = np.arange(len(X)) - for i in range(1, self.maxiter + 1): + indices = arange(len(X)) + for _ in range(1, self.maxiter + 1): # Store the old centroids centroids_old = centroids.copy() # Calculate which centroids that are closest distance_matrix = self.calculate_distances(X, centroids) - cluster_indicies = self.count_clusters( - X, indicies, distance_matrix + cluster_indices = self.count_clusters( + X, + indices, + distance_matrix, ) - centroids = np.array( - [ - np.mean(X[indicies_ki], axis=0) - for indicies_ki in cluster_indicies - ] + centroids = asarray( + [X[indices_ki].mean(axis=0) for indices_ki in cluster_indices] ) # Check if it is converged - if np.linalg.norm(centroids - centroids_old) <= self.tol: + if norm(centroids - centroids_old) <= self.tol: break - return centroids, cluster_indicies + return centroids, cluster_indices - def count_clusters(self, X, indicies, distance_matrix, **kwargs): + def count_clusters(self, X, indices, distance_matrix, **kwargs): """ - Get the indicies for each of the clusters. + Get the indices for each of the clusters. The number of data points in each cluster is counted and restricted between the minimum and maximum number of allowed cluster sizes. """ - # Make a list cluster indicies - klist = np.arange(self.n_clusters).reshape(-1, 1) + # Make a list cluster indices + klist = arange(self.n_clusters).reshape(-1, 1) # Find the cluster that each point is closest to - k_indicies = np.argmin(distance_matrix, axis=1) - indicies_ki_bool = klist == k_indicies - # Sort the indicies as function of the distances to the centroids - d_indicies = np.argsort(distance_matrix, axis=0) - indicies_sorted = indicies[d_indicies.T] - indicies_ki_bool = indicies_ki_bool[klist, indicies_sorted] + k_indices = argmin(distance_matrix, axis=1) + indices_ki_bool = klist == k_indices + # Sort the indices as function of the distances to the centroids + d_indices = argsort(distance_matrix, axis=0) + indices_sorted = indices[d_indices.T] + indices_ki_bool = indices_ki_bool[klist, indices_sorted] # Prioritize the points that is part of each cluster - cluster_indicies = [ - np.append( - indicies_sorted[ki, indicies_ki_bool[ki]], - indicies_sorted[ki, ~indicies_ki_bool[ki]], + cluster_indices = [ + append( + indices_sorted[ki, indices_ki_bool[ki]], + indices_sorted[ki, ~indices_ki_bool[ki]], )[: self.data_number] for ki in range(self.n_clusters) ] - return cluster_indicies + return cluster_indices def get_arguments(self): "Get the arguments of the class itself." @@ -138,6 +180,8 @@ def get_arguments(self): data_number=self.data_number, maxiter=self.maxiter, tol=self.tol, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict(n_clusters=self.n_clusters) diff --git a/catlearn/regression/gp/ensemble/clustering/random.py b/catlearn/regression/gp/ensemble/clustering/random.py index 743c1682..6a9939a7 100644 --- a/catlearn/regression/gp/ensemble/clustering/random.py +++ b/catlearn/regression/gp/ensemble/clustering/random.py @@ -1,64 +1,84 @@ -import numpy as np +from numpy import append, arange, array_split, tile from .clustering import Clustering class RandomClustering(Clustering): + """ + Clustering algorithm class for data sets. + It uses randomized clusters for clustering. + """ + def __init__( self, n_clusters=4, equal_size=True, seed=None, + dtype=float, **kwargs, ): """ - Clustering class object for data sets. - The K-means++ algorithm for clustering. + Initialize the clustering algorithm. Parameters: - n_clusters : int + n_clusters: int The number of used clusters. - equal_size : bool + equal_size: bool Whether the clusters are forced to have the same size. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # Set a random seed - self.seed = seed super().__init__( - n_clusters=n_clusters, equal_size=equal_size, seed=seed, **kwargs + n_clusters=n_clusters, + equal_size=equal_size, + seed=seed, + dtype=dtype, + **kwargs, ) def cluster_fit_data(self, X, **kwargs): - # Make indicies + # Make indices n_data = len(X) - indicies = np.arange(n_data) - # If only one cluster is used give the full data + indices = arange(n_data) + # If only one cluster is used, give the full data if self.n_clusters == 1: - return [indicies] + return [indices] # Randomly make clusters - i_clusters = self.randomized_clusters(indicies, n_data) - # Return the cluster indicies + i_clusters = self.randomized_clusters(indices, n_data) + # Return the cluster indices return i_clusters def cluster(self, X, **kwargs): return self.cluster_fit_data(X) def update_arguments( - self, n_clusters=None, equal_size=None, seed=None, **kwargs + self, + n_clusters=None, + equal_size=None, + seed=None, + dtype=None, + **kwargs, ): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - n_clusters : int + n_clusters: int The number of used clusters. - equal_size : bool + equal_size: bool Whether the clusters are forced to have the same size. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -67,29 +87,29 @@ def update_arguments( self.n_clusters = int(n_clusters) if equal_size is not None: self.equal_size = equal_size - if seed is not None: - self.seed = seed + # Set the parameters of the parent class + super().update_arguments( + seed=seed, + dtype=dtype, + ) return self - def randomized_clusters(self, indicies, n_data, **kwargs): - "Randomized indicies used for each cluster." - # Permute the indicies - i_perm = self.get_permutation(indicies) + def randomized_clusters(self, indices, n_data, **kwargs): + "Randomized indices used for each cluster." + # Permute the indices + i_perm = self.get_permutation(indices) # Ensure equal sizes of clusters if chosen if self.equal_size: i_perm = self.ensure_equal_sizes(i_perm, n_data) - i_clusters = np.array_split(i_perm, self.n_clusters) + i_clusters = array_split(i_perm, self.n_clusters) return i_clusters - def get_permutation(self, indicies): - "Permute the indicies" - if self.seed: - rng = np.random.default_rng(seed=self.seed) - return rng.permutation(indicies) - return np.random.permutation(indicies) + def get_permutation(self, indices): + "Permute the indices" + return self.rng.permutation(indices) def ensure_equal_sizes(self, i_perm, n_data, **kwargs): - "Extend the permuted indicies so the clusters have equal sizes." + "Extend the permuted indices so the clusters have equal sizes." # Find the number of excess points left n_left = n_data % self.n_clusters # Find the number of points that should be added @@ -97,15 +117,15 @@ def ensure_equal_sizes(self, i_perm, n_data, **kwargs): n_missing = self.n_clusters - n_left else: n_missing = 0 - # Extend the permuted indicies + # Extend the permuted indices if n_missing > 0: if n_missing > n_data: - i_perm = np.append( + i_perm = append( i_perm, - np.tile(i_perm, (n_missing // n_data) + 1)[:n_missing], + tile(i_perm, (n_missing // n_data) + 1)[:n_missing], ) else: - i_perm = np.append(i_perm, i_perm[:n_missing]) + i_perm = append(i_perm, i_perm[:n_missing]) return i_perm def get_arguments(self): @@ -115,6 +135,7 @@ def get_arguments(self): n_clusters=self.n_clusters, equal_size=self.equal_size, seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/ensemble/clustering/random_number.py b/catlearn/regression/gp/ensemble/clustering/random_number.py index 2dd54e93..fed91677 100644 --- a/catlearn/regression/gp/ensemble/clustering/random_number.py +++ b/catlearn/regression/gp/ensemble/clustering/random_number.py @@ -1,86 +1,117 @@ -import numpy as np +from numpy import append, arange, array_split, tile from .random import RandomClustering class RandomClustering_number(RandomClustering): - def __init__(self, data_number=25, seed=None, **kwargs): + """ + Clustering algorithm class for data sets. + It uses randomized clusters for clustering. + It uses a fixed number of data points in each cluster. + """ + + def __init__(self, data_number=25, seed=None, dtype=float, **kwargs): """ - Clustering class object for data sets. - The K-means++ algorithm for clustering. + Initialize the clustering algorithm. Parameters: - data_number : int + data_number: int The number of data point in each cluster. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - super().__init__(data_number=data_number, seed=seed, **kwargs) + super(RandomClustering, self).__init__( + data_number=data_number, + seed=seed, + dtype=dtype, + **kwargs, + ) def cluster_fit_data(self, X, **kwargs): - # Make indicies + # Make indices n_data = len(X) - indicies = np.arange(n_data) + indices = arange(n_data) # Calculate the number of clusters self.n_clusters = int(n_data // self.data_number) if n_data - (self.n_clusters * self.data_number): self.n_clusters = self.n_clusters + 1 - # If only one cluster is used give the full data + # If only one cluster is used, give the full data if self.n_clusters == 1: - return [indicies] + return [indices] # Randomly make clusters - i_clusters = self.randomized_clusters(indicies, n_data) - # Return the cluster indicies + i_clusters = self.randomized_clusters(indices, n_data) + # Return the cluster indices return i_clusters - def update_arguments(self, data_number=None, seed=None, **kwargs): + def update_arguments( + self, + data_number=None, + seed=None, + dtype=None, + **kwargs, + ): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - data_number : int + data_number: int The number of data point in each cluster. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ if data_number is not None: self.data_number = int(data_number) - if seed is not None: - self.seed = seed + # Set the parameters of the parent class + super(RandomClustering, self).update_arguments( + seed=seed, + dtype=dtype, + ) return self - def randomized_clusters(self, indicies, n_data, **kwargs): - # Permute the indicies - i_perm = self.get_permutation(indicies) + def randomized_clusters(self, indices, n_data, **kwargs): + # Permute the indices + i_perm = self.get_permutation(indices) # Ensure equal sizes of clusters i_perm = self.ensure_equal_sizes(i_perm, n_data) - i_clusters = np.array_split(i_perm, self.n_clusters) + i_clusters = array_split(i_perm, self.n_clusters) return i_clusters def ensure_equal_sizes(self, i_perm, n_data, **kwargs): - "Extend the permuted indicies so the clusters have equal sizes." + "Extend the permuted indices so the clusters have equal sizes." # Find the number of points that should be added n_missing = (self.n_clusters * self.data_number) - n_data - # Extend the permuted indicies + # Extend the permuted indices if n_missing > 0: if n_missing > n_data: - i_perm = np.append( + i_perm = append( i_perm, - np.tile(i_perm, (n_missing // n_data) + 1)[:n_missing], + tile(i_perm, (n_missing // n_data) + 1)[:n_missing], ) else: - i_perm = np.append(i_perm, i_perm[:n_missing]) + i_perm = append(i_perm, i_perm[:n_missing]) return i_perm def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(data_number=self.data_number, seed=self.seed) + arg_kwargs = dict( + data_number=self.data_number, + seed=self.seed, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict(n_clusters=self.n_clusters) # Get the objects made within the class diff --git a/catlearn/regression/gp/ensemble/ensemble.py b/catlearn/regression/gp/ensemble/ensemble.py index ae38c308..f081dcf9 100644 --- a/catlearn/regression/gp/ensemble/ensemble.py +++ b/catlearn/regression/gp/ensemble/ensemble.py @@ -1,65 +1,116 @@ -import numpy as np +from numpy import asarray, exp +import pickle +import warnings from ..means.constant import Prior_constant +from ..calculator.default_model import get_default_model class EnsembleModel: + """ + Ensemble model of machine learning models. + The ensemble model is used to combine the predictions + of multiple machine learning models. + """ + def __init__( self, model=None, use_variance_ensemble=True, use_softmax=False, use_same_prior_mean=True, + dtype=float, **kwargs, ): """ - Ensemble model of machine learning models. + Initialize the ensemble model. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - use_variance_ensemble : bool + use_variance_ensemble: bool Whether to use the predicted inverse variances to weight the predictions. Else an average of the predictions is used. - use_softmax : bool + use_softmax: bool Whether to use the softmax of the predicted inverse variances as weights. It is only active if use_variance_ensemble=True, too. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. + dtype: type + The data type of the arrays. """ # Make default model if it is not given if model is None: - from ..calculator.mlmodel import get_default_model - - model = get_default_model() + model = get_default_model(dtype=dtype) # Set the arguments self.update_arguments( model=model, use_variance_ensemble=use_variance_ensemble, use_softmax=use_softmax, use_same_prior_mean=use_same_prior_mean, + dtype=dtype, **kwargs, ) def train(self, features, targets, **kwargs): """ Train the model with training features and targets. + Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array - Training targets with N data points - or - targets : (N,1+D) array - Training targets in first column and derivatives - of each feature in the next columns if use_derivatives is True. + targets: (N,1) array or (N,1+D) array + Training targets with N data points. + If use_derivatives=True, the training targets is in + first column and derivatives is in the next columns. + Returns: self: The trained model object itself. """ raise NotImplementedError() + def optimize( + self, + features, + targets, + retrain=True, + hp=None, + pdis=None, + verbose=False, + **kwargs, + ): + """ + Optimize the hyperparameter of the model and its kernel. + + Parameters: + features: (N,D) array or (N) list of fingerprint objects + Training features with N data points. + targets: (N,1) array or (N,D+1) array + Training targets with or without derivatives with + N data points. + retrain: bool + Whether to retrain the model after the optimization. + hp: dict + Use a set of hyperparameters to optimize from + else the current set is used. + maxiter: int + Maximum number of iterations used by local or + global optimization method. + pdis: dict + A dict of prior distributions for each hyperparameter type. + verbose: bool + Print the optimized hyperparameters and + the object function value. + + Returns: + list: List of solution dictionaries with objective function value, + optimized hyperparameters, success statement, + and number of used evaluations. + """ + raise NotImplementedError() + def predict( self, features, @@ -75,28 +126,28 @@ def predict( coefficients from training data. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - get_derivatives : bool + get_derivatives: bool Whether to predict the derivatives of the prediction mean. - get_variance : bool + get_variance: bool Whether to predict the variance of the targets. - include_noise : bool + include_noise: bool Whether to include the noise of data in the predicted variance. - get_derivtives_var : bool + get_derivtives_var: bool Whether to predict the variance of the derivatives of the targets. - get_var_derivatives : bool + get_var_derivatives: bool Whether to calculate the derivatives of the predicted variance of the targets. Returns: - Y_predict : (M,1) or (M,1+D) array + Y_predict: (M,1) or (M,1+D) array The predicted mean values with or without derivatives. - var : (M,1) or (M,1+D) array + var: (M,1) or (M,1+D) array The predicted variance of the targets with or without derivatives. - var_deriv : (M,D) array + var_deriv: (M,D) array The derivatives of the predicted variance of the targets. """ # Calculate the predicted values for one model @@ -154,64 +205,76 @@ def predict_mean(self, features, get_derivatives=False, **kwargs): from training data. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - get_derivatives : bool + get_derivatives: bool Whether to predict the derivatives of the prediction mean. Returns: - Y_predict : (M,1) array + Y_predict: (M,1) array The predicted mean values if get_derivatives=False or - Y_predict : (M,1+D) array + Y_predict: (M,1+D) array The predicted mean values and its derivatives if get_derivatives=True. """ # Check if the variance was needed for prediction mean if self.use_variance_ensemble: - raise Exception( + raise AttributeError( "The predict_mean function is not defined " "with use_variance_ensemble=True!" ) # Calculate the predicted values for one model if self.n_models == 1: return self.model_prediction_mean( - self.model, features, get_derivatives=get_derivatives, **kwargs + self.model, + features, + get_derivatives=get_derivatives, + **kwargs, ) # Calculate the predicted values for multiple model Y_preds = [] for model in self.models: Y_predict = self.model_prediction_mean( - model, features, get_derivatives=get_derivatives, **kwargs + model, + features, + get_derivatives=get_derivatives, + **kwargs, ) Y_preds.append(Y_predict) return self.ensemble( - Y_preds, get_derivatives=get_derivatives, get_variance=False + Y_preds, + get_derivatives=get_derivatives, + get_variance=False, ) def predict_variance( - self, features, get_derivatives=False, include_noise=False, **kwargs + self, + features, + get_derivatives=False, + include_noise=False, + **kwargs, ): """ Calculate the predicted variance of the test targets. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - KQX : (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array The kernel matrix of the test and training features. If KQX=None, it is calculated. - get_derivatives : bool + get_derivatives: bool Whether to predict the uncertainty of the derivatives of the targets. - include_noise : bool + include_noise: bool Whether to include the noise of data in the predicted variance Returns: - var : (M,1) array + var: (M,1) array The predicted variance of the targets if get_derivatives=False. or - var : (M,1+D) array + var: (M,1+D) array The predicted variance of the targets and its derivatives if get_derivatives=True. @@ -224,56 +287,33 @@ def calculate_variance_derivatives(self, features, **kwargs): of the test targets. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - KQX : (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array The kernel matrix of the test and training features. If KQX=None, it is calculated. Returns: - var_deriv : (M,D) array + var_deriv: (M,D) array The derivatives of the predicted variance of the targets. """ raise NotImplementedError() - def optimize( - self, - features, - targets, - retrain=True, - hp=None, - pdis=None, - verbose=False, - **kwargs, - ): + def get_hyperparams(self, **kwargs): """ - Optimize the hyperparameter of the model and its kernel. + Get the hyperparameters for the model and the kernel. - Parameters: - features : (N,D) array or (N) list of fingerprint objects - Training features with N data points. - targets : (N,1) array or (N,D+1) array - Training targets with or without derivatives with - N data points. - retrain : bool - Whether to retrain the model after the optimization. - hp : dict - Use a set of hyperparameters to optimize from - else the current set is used. - maxiter : int - Maximum number of iterations used by local or - global optimization method. - pdis : dict - A dict of prior distributions for each hyperparameter type. - verbose : bool - Print the optimized hyperparameters and - the object function value. Returns: - list : List of solution dictionaries with objective function value, - optimized hyperparameters, success statement, - and number of used evaluations. + dict: The hyperparameters in the log-space from + the model and kernel class if multiple models are not defined. + or + list: A list of dictionaries with the hyperparameters + in the log-space from the model and kernel class + if multiple models are defined. """ - raise NotImplementedError() + if len(self.models): + return [model.get_hyperparams() for model in self.models] + return self.model.get_hyperparams() def get_use_derivatives(self): "Get whether the derivatives of the targets are used." @@ -283,12 +323,113 @@ def get_use_fingerprint(self): "Get whether a fingerprint is used as the features." return self.model.get_use_fingerprint() + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the data type of the attributes + self.model.set_dtype(dtype=dtype, **kwargs) + self.copy_prior() + if len(self.models): + for model in self.models: + model.set_dtype(dtype=dtype, **kwargs) + return self + + def set_seed(self, seed, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.model.set_seed(seed) + if len(self.models): + for model in self.models: + model.set_seed(seed) + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives/gradients for training and predictions. + + Parameters: + use_derivatives: bool + Use derivatives/gradients for training and predictions. + + Returns: + self: The updated object itself. + """ + # Set whether to use derivatives for the kernel + self.model.set_use_derivatives(use_derivatives) + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether to use a fingerprint as the features. + + Parameters: + use_fingerprint: bool + Use a fingerprint as the features. + + Returns: + self: The updated object itself. + """ + # Set whether to use a fingerprint for the features + self.model.set_use_fingerprint(use_fingerprint) + return self + + def save_model(self, filename="model.pkl", **kwargs): + """ + Save the model object to a file. + + Parameters: + filename: str + The name of the file where the object is saved. + + Returns: + self: The object itself. + """ + with open(filename, "wb") as file: + pickle.dump(self, file) + return self + + def load_model(self, filename="model.pkl", **kwargs): + """ + Load the model object from a file. + + Parameters: + filename: str + The name of the file where the object is saved. + + Returns: + model: The loaded model object. + """ + with open(filename, "rb") as file: + model = pickle.load(file) + return model + def update_arguments( self, model=None, use_variance_ensemble=None, use_softmax=None, use_same_prior_mean=None, + dtype=None, **kwargs, ): """ @@ -296,19 +437,21 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - use_variance_ensemble : bool + use_variance_ensemble: bool Whether to use the predicted inverse variances to weight the predictions. Else an average of the predictions is used. - use_softmax : bool + use_softmax: bool Whether to use the softmax of the predicted inverse variances as weights. It is only active if use_variance_ensemble=True, too. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. @@ -316,16 +459,29 @@ def update_arguments( if model is not None: self.model = model.copy() # Set descriptor of the ensemble model - self.n_models = 1 - self.models = [] - # Get the prior mean instance - self.prior = self.model.prior.copy() + self.reset_models() if use_variance_ensemble is not None: self.use_variance_ensemble = use_variance_ensemble if use_softmax is not None: self.use_softmax = use_softmax if use_same_prior_mean is not None: self.use_same_prior_mean = use_same_prior_mean + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype, **kwargs) + return self + + def copy_prior(self): + "Copy the prior of the model." + self.prior = self.model.prior.copy() + return self + + def reset_models(self, **kwargs): + "Reset the models." + # Set descriptor of the ensemble model + self.n_models = 1 + self.models = [] + # Get the prior mean instance + self.copy_prior() return self def model_training(self, model, features, targets, **kwargs): @@ -377,11 +533,17 @@ def model_prediction( ) def model_prediction_mean( - self, model, features, get_derivatives=False, **kwargs + self, + model, + features, + get_derivatives=False, + **kwargs, ): "Predict mean with the model." return model.predict_mean( - features, get_derivatives=get_derivatives, **kwargs + features, + get_derivatives=get_derivatives, + **kwargs, ) def model_prediction_variance( @@ -423,13 +585,13 @@ def ensemble( The variance weighted ensemble is used if variance_ensemble=True. """ # Transform the input to arrays - Y_preds = np.array(Y_preds) + Y_preds = asarray(Y_preds) if get_variance: - var_preds = np.array(var_preds) + var_preds = asarray(var_preds) else: var_preds = None if get_var_derivatives and var_derivs is not None: - var_derivs = np.array(var_derivs) + var_derivs = asarray(var_derivs) else: var_derivs = None # Perform ensemble of the predictions @@ -474,23 +636,22 @@ def ensemble_mean( var_predict = None var_deriv = None # Calculate the prediction mean - Y_predict = np.mean(Y_preds, axis=0) + Y_predict = Y_preds.mean(axis=0) # Calculate the predicted variance if get_variance: - var_predict = np.mean( - var_preds + ((Y_preds - Y_predict) ** 2), axis=0 + var_predict = (var_preds + ((Y_preds - Y_predict) ** 2)).mean( + axis=0 ) # Calculate the derivative of the predicted variance if get_var_derivatives: - var_deriv = np.mean( + var_deriv = ( var_derivs + ( 2.0 * (Y_preds[:, :, 0] - Y_predict[:, 0]) * (Y_preds[:, :, 1:] - Y_predict[:, 1:]) - ), - axis=0, - ) + ) + ).mean(axis=0) return Y_predict, var_predict, var_deriv def ensemble_variance( @@ -516,28 +677,26 @@ def ensemble_variance( var_preds, var_derivs, get_derivatives ) # Calculate the prediction mean - Y_predict = np.sum(weights * Y_preds, axis=0) + Y_predict = (weights * Y_preds).sum(axis=0) # Calculate the derivative of the prediction mean if get_derivatives: # Add extra contribution from weight derivatives - Y_predict[:, 1:] += np.sum( - Y_preds[:, :, 0:1] * weights_deriv, axis=0 + Y_predict[:, 1:] += (Y_preds[:, :, 0:1] * weights_deriv).sum( + axis=0 ) # Calculate the predicted variance if get_variance: - var_predict = np.sum( - weights * (var_preds + ((Y_preds - Y_predict) ** 2)), axis=0 - ) + var_predict = ( + weights * (var_preds + ((Y_preds - Y_predict) ** 2)) + ).sum(axis=0) if get_derivtives_var: - import warnings - warnings.warn( "Check if it is the right expression for" "the variance of the derivatives!" ) # Calculate the derivative of the predicted variance if get_var_derivatives: - var_deriv = np.sum( + var_deriv = ( weights * ( var_derivs @@ -546,36 +705,39 @@ def ensemble_variance( * (Y_preds[:, :, 0:1] - Y_predict[:, 0:1]) * (Y_preds[:, :, 1:] - Y_predict[:, 1:]) ) - ), - axis=0, - ) - var_deriv += np.sum(var_preds[:, :, 0:1] * weights_deriv, axis=0) + ) + ).sum(axis=0) + var_deriv += (var_preds[:, :, 0:1] * weights_deriv).sum(axis=0) return Y_predict, var_predict, var_deriv def get_weights( - self, var_preds=None, var_derivs=None, get_derivatives=False, **kwargs + self, + var_preds=None, + var_derivs=None, + get_derivatives=False, + **kwargs, ): "Calculate the weights." weights_deriv = None if var_preds is None: - raise Exception("The predicted variance is missing!") + raise AttributeError("The predicted variance is missing!") # Use the predicted variance to weight predictions if self.use_softmax: - var_coef = np.exp(-var_preds[:, :, 0:1]) + var_coef = exp(-var_preds[:, :, 0:1]) else: var_coef = 1.0 / var_preds[:, :, 0:1] # Normalize the weights - weights = var_coef / np.sum(var_coef, axis=0) + weights = var_coef / var_coef.sum(axis=0) # Calculate the derivative of the prediction mean if get_derivatives: # Calculate the derivative of the weights if self.use_softmax: weights_deriv = weights * ( - np.sum(weights * var_derivs, axis=0) - var_derivs + (weights * var_derivs).sum(axis=0) - var_derivs ) else: weights_deriv = weights * ( - np.sum(weights * var_coef * var_derivs, axis=0) + (weights * var_coef * var_derivs).sum(axis=0) - (var_coef * var_derivs) ) return weights, weights_deriv @@ -600,6 +762,7 @@ def get_arguments(self): use_variance_ensemble=self.use_variance_ensemble, use_softmax=self.use_softmax, use_same_prior_mean=self.use_same_prior_mean, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict(n_models=self.n_models) diff --git a/catlearn/regression/gp/ensemble/ensemble_clustering.py b/catlearn/regression/gp/ensemble/ensemble_clustering.py index 3c8d41ca..f5a64302 100644 --- a/catlearn/regression/gp/ensemble/ensemble_clustering.py +++ b/catlearn/regression/gp/ensemble/ensemble_clustering.py @@ -1,8 +1,16 @@ -import numpy as np -from .ensemble import EnsembleModel +from numpy import array, ndarray +from .ensemble import EnsembleModel, get_default_model +from .clustering.k_means_number import K_means_number class EnsembleClustering(EnsembleModel): + """ + Ensemble model of machine learning models. + The ensemble model is used to combine the predictions + of multiple machine learning models. + The ensemle models are chosen by a clustering algorithm. + """ + def __init__( self, model=None, @@ -10,40 +18,38 @@ def __init__( use_variance_ensemble=True, use_softmax=False, use_same_prior_mean=True, + dtype=float, **kwargs, ): """ - Ensemble model of machine learning models with ensembles - from a clustering algorithm.. + Initialize the ensemble model. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - clustering : Clustering class object + clustering: Clustering class object The clustering method used to split the data to different models. - use_variance_ensemble : bool + use_variance_ensemble: bool Whether to use the predicted inverse variances to weight the predictions. Else an average of the predictions is used. - use_softmax : bool + use_softmax: bool Whether to use the softmax of the predicted inverse variances as weights. It is only active if use_variance_ensemble=True, too. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. + dtype: type + The data type of the arrays. """ # Make default model if it is not given if model is None: - from ..calculator.mlmodel import get_default_model - - model = get_default_model() + model = get_default_model(dtype=dtype) # Make default clustering if it is not given if clustering is None: - from .clustering.k_means_number import K_means_number - - clustering = K_means_number() + clustering = K_means_number(dtype=dtype) # Set the arguments self.update_arguments( model=model, @@ -51,6 +57,7 @@ def __init__( use_variance_ensemble=use_variance_ensemble, use_softmax=use_softmax, use_same_prior_mean=use_same_prior_mean, + dtype=dtype, **kwargs, ) @@ -124,6 +131,18 @@ def optimize( self.models.append(model) return sols + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + # Set the data type of the clustering + self.clustering.set_dtype(dtype=dtype) + return self + + def set_seed(self, seed, **kwargs): + super().set_seed(seed, **kwargs) + # Set the random seed of the clustering + self.clustering.set_seed(seed=seed) + return self + def update_arguments( self, model=None, @@ -131,6 +150,7 @@ def update_arguments( use_variance_ensemble=None, use_softmax=None, use_same_prior_mean=None, + dtype=None, **kwargs, ): """ @@ -138,53 +158,53 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - clustering : Clustering class object + clustering: Clustering class object The clustering method used to split the data to different models. - use_variance_ensemble : bool + use_variance_ensemble: bool Whether to use the predicted inverse variances to weight the predictions. Else an average of the predictions is used. - use_softmax : bool + use_softmax: bool Whether to use the softmax of the predicted inverse variances as weights. It is only active if use_variance_ensemble=True, too. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ - if model is not None: - self.model = model.copy() - # Set descriptor of the ensemble model - self.n_models = 1 - self.models = [] - # Get the prior mean instance - self.prior = self.model.prior.copy() if clustering is not None: self.clustering = clustering.copy() - if use_variance_ensemble is not None: - self.use_variance_ensemble = use_variance_ensemble - if use_softmax is not None: - self.use_softmax = use_softmax - if use_same_prior_mean is not None: - self.use_same_prior_mean = use_same_prior_mean + # Set the parameters for the parent class + super().update_arguments( + model=model, + use_variance_ensemble=use_variance_ensemble, + use_softmax=use_softmax, + use_same_prior_mean=use_same_prior_mean, + dtype=dtype, + ) return self def cluster(self, features, targets, **kwargs): "Cluster the data." - if isinstance(features[0], (np.ndarray, list)): - X = features.copy() + if isinstance(features[0], (ndarray, list)): + X = array(features, dtype=self.dtype) else: - X = np.array([feature.get_vector() for feature in features]) - cluster_indicies = self.clustering.cluster_fit_data(X) + X = array( + [feature.get_vector() for feature in features], + dtype=self.dtype, + ) + cluster_indices = self.clustering.cluster_fit_data(X) return [ - (features[indicies_ki], targets[indicies_ki]) - for indicies_ki in cluster_indicies + (features[indices_ki], targets[indices_ki]) + for indices_ki in cluster_indices ] def get_arguments(self): @@ -196,6 +216,7 @@ def get_arguments(self): use_variance_ensemble=self.use_variance_ensemble, use_softmax=self.use_softmax, use_same_prior_mean=self.use_same_prior_mean, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict(n_models=self.n_models) diff --git a/catlearn/regression/gp/fingerprint/__init__.py b/catlearn/regression/gp/fingerprint/__init__.py index bf916eb6..3b81bdc5 100644 --- a/catlearn/regression/gp/fingerprint/__init__.py +++ b/catlearn/regression/gp/fingerprint/__init__.py @@ -1,10 +1,10 @@ from .fingerprint import Fingerprint from .fingerprintobject import FingerprintObject -from .geometry import get_all_distances, get_inverse_distances, mic_distance from .cartesian import Cartesian +from .distances import Distances from .invdistances import InvDistances from .invdistances2 import InvDistances2 -from .sorteddistances import SortedDistances +from .sorteddistances import SortedInvDistances from .sumdistances import SumDistances from .sumdistancespower import SumDistancesPower from .meandistances import MeanDistances @@ -14,13 +14,11 @@ __all__ = [ "Fingerprint", "FingerprintObject", - "get_all_distances", - "get_inverse_distances", - "mic_distance", "Cartesian", + "Distances", "InvDistances", "InvDistances2", - "SortedDistances", + "SortedInvDistances", "SumDistances", "SumDistancesPower", "MeanDistances", diff --git a/catlearn/regression/gp/fingerprint/cartesian.py b/catlearn/regression/gp/fingerprint/cartesian.py index 4c398491..ff37f441 100644 --- a/catlearn/regression/gp/fingerprint/cartesian.py +++ b/catlearn/regression/gp/fingerprint/cartesian.py @@ -1,38 +1,30 @@ -import numpy as np +from numpy import asarray, identity from .fingerprint import Fingerprint +from .geometry import get_constraints class Cartesian(Fingerprint): - def __init__( - self, - reduce_dimensions=True, - use_derivatives=True, - **kwargs, - ): - """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The cartesian coordinate fingerprint is generated. + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The cartesian coordinate fingerprint is generated. + """ - Parameters: - reduce_dimensions : bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Calculate and store derivatives of the fingerprint wrt. - the cartesian coordinates. - """ - # Set the arguments - super().__init__( - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - **kwargs, - ) - - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): + def make_fingerprint(self, atoms, **kwargs): "The calculation of the cartesian coordinates fingerprint" - vector = (atoms.get_positions()[not_masked]).reshape(-1) + # Get the masked and not masked atoms + not_masked, _ = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + ) + # Get the cartesian coordinates of the moved atoms + vector = asarray( + atoms.get_positions()[not_masked], + dtype=self.dtype, + ).reshape(-1) + # Get the derivatives if requested if self.use_derivatives: - derivative = np.identity(len(vector)) + derivative = identity(len(vector)) else: derivative = None return vector, derivative diff --git a/catlearn/regression/gp/fingerprint/distances.py b/catlearn/regression/gp/fingerprint/distances.py new file mode 100644 index 00000000..6a8841bb --- /dev/null +++ b/catlearn/regression/gp/fingerprint/distances.py @@ -0,0 +1,588 @@ +from numpy import arange, asarray, full, repeat, sqrt, zeros +from .geometry import ( + check_atoms, + get_all_distances, + get_constraints, + get_covalent_distances, + get_mask_indices, + get_periodic_softmax, + get_periodic_sum, +) +from .fingerprint import Fingerprint + + +class Distances(Fingerprint): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The distances fingerprint is generated. + The distances are scaled with covalent radii. + """ + + def __init__( + self, + reduce_dimensions=True, + use_derivatives=True, + wrap=True, + include_ncells=False, + periodic_sum=False, + periodic_softmax=True, + mic=False, + all_ncells=True, + cell_cutoff=4.0, + dtype=float, + **kwargs, + ): + """ + Initialize the fingerprint constructor. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_derivatives: bool + Calculate and store derivatives of the fingerprint wrt. + the cartesian coordinates. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells + when periodic boundary conditions are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic is faster than periodic_softmax, + but the derivatives are discontinuous. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ + # Set the arguments + self.update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, + periodic_softmax=periodic_softmax, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + dtype=dtype, + **kwargs, + ) + + def make_fingerprint(self, atoms, **kwargs): + # Get the masked and not masked atoms + not_masked, masked = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + ) + # Check if there are any not masked atoms + if len(not_masked) == 0: + fp = zeros((0), dtype=self.dtype) + if self.use_derivatives: + return fp, zeros((0, 0), dtype=self.dtype) + return fp, None + # Initialize the masking and indices + ( + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + ) = get_mask_indices(atoms, not_masked=not_masked, masked=masked) + # Get the periodicity + pbc = atoms.pbc + # Check what distance method should be used + ( + use_vector, + use_include_ncells, + use_periodic_softmax, + use_periodic_sum, + use_mic, + ) = self.use_dis_method(pbc=pbc, **kwargs) + # Check whether to calculate neighboring cells + use_ncells = ( + use_include_ncells or use_periodic_softmax or use_periodic_sum + ) + # Get all the distances and their vectors + dist, dist_vec = self.get_distances( + atoms=atoms, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + use_vector=use_vector, + include_ncells=use_ncells, + mic=use_mic, + ) + # Calculate the fingerprint and its derivatives + fp, g = self.calc_fp( + dist=dist, + dist_vec=dist_vec, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + atomic_numbers=atoms.get_atomic_numbers(), + tags=atoms.get_tags(), + use_include_ncells=use_include_ncells, + use_periodic_sum=use_periodic_sum, + use_periodic_softmax=use_periodic_softmax, + ) + return fp, g + + def calc_fp( + self, + dist, + dist_vec, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + atomic_numbers, + tags=None, + use_include_ncells=False, + use_periodic_sum=False, + use_periodic_softmax=False, + **kwargs, + ): + "Calculate the fingerprint." + # Add small number to avoid division by zero to the distances + dist = sqrt(dist**2 + self.eps) + # Get the covalent distances + covdis = get_covalent_distances( + atomic_numbers=atomic_numbers, + not_masked=not_masked, + masked=masked, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + dtype=self.dtype, + ) + # Set the correct shape of the covalent distances + if use_include_ncells or use_periodic_sum or use_periodic_softmax: + covdis = covdis[None, ...] + # Calculate the fingerprint + fp = dist / covdis + # Check what distance method should be used + if use_periodic_softmax: + # Calculate the fingerprint with the periodic softmax + fp, g = get_periodic_softmax( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + covdis=covdis, + use_inv_dis=False, + use_derivatives=self.use_derivatives, + eps=self.eps, + **kwargs, + ) + elif use_periodic_sum: + # Calculate the fingerprint with the periodic sum + fp, g = get_periodic_sum( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + use_inv_dis=False, + use_derivatives=self.use_derivatives, + **kwargs, + ) + else: + # Get the derivative of the fingerprint + if self.use_derivatives: + g = dist_vec * (-fp / (dist**2))[..., None] + else: + g = None + # Update the fingerprint with the modification + fp, g = self.modify_fp( + fp=fp, + g=g, + atomic_numbers=atomic_numbers, + tags=tags, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + use_include_ncells=use_include_ncells, + **kwargs, + ) + return fp, g + + def insert_to_deriv_matrix( + self, + g, + not_masked, + masked, + nmi, + nmj, + use_include_ncells=False, + **kwargs, + ): + """ + Insert the distance vectors into the derivative matrix. + """ + # Get the length of the distance vector parts + len_nm_m, len_nm, _ = self.get_length_dist( + not_masked, + masked, + nmi, + ) + # Get the indices for the distances + i_m = arange(len_nm_m) + i_nm_r = len_nm_m // len(not_masked) + i_nm = repeat(arange(len(not_masked)), i_nm_r) + i_nm_nm = arange(len_nm) + len_nm_m + # Check if neighboring cells should be used + if use_include_ncells: + # Get the number of neighboring cells + c_dim = len(g) + # Make the derivative matrix + deriv_matrix = zeros( + (c_dim, len(g[0]), len(not_masked), 3), + dtype=self.dtype, + ) + else: + # Make the derivative matrix + deriv_matrix = zeros( + (len(g), len(not_masked), 3), + dtype=self.dtype, + ) + # Fill the derivative matrix for masked with not masked + deriv_matrix[..., i_m, i_nm, :] = g[..., i_m, :] + # Fill the derivative matrix for not masked with not masked + g_nm = g[..., i_nm_nm, :] + deriv_matrix[..., i_nm_nm, nmi, :] = g_nm + deriv_matrix[..., i_nm_nm, nmj, :] = -g_nm + # Reshape the derivative matrix + deriv_matrix = deriv_matrix.reshape(-1, len(not_masked) * 3) + return deriv_matrix + + def use_dis_method(self, pbc, **kwargs): + """ + Check what distance method should be used." + + Parameters: + pbc: bool + The periodic boundary conditions. + + Returns: + use_vector: bool + Whether to use the vector of the distances. + use_include_ncells: bool + Whether to include the neighboring cells when calculating + the distances. + use_periodic_softmax: bool + Whether to use the periodic softmax. + use_periodic_sum: bool + Whether to use the periodic sum. + use_mic: bool + Whether to use the minimum image convention. + """ + if not pbc.any(): + return self.use_derivatives, False, False, False, False + if self.include_ncells: + return True, True, False, False, False + if self.periodic_softmax: + return True, False, True, False, False + if self.periodic_sum: + return True, False, False, True, False + if self.mic: + return True, False, False, False, True + return self.use_derivatives, False, False, False, False + + def modify_fp( + self, + fp, + g, + atomic_numbers, + tags, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + use_include_ncells=False, + **kwargs, + ): + "Modify the fingerprint." + # Reshape the fingerprint + if use_include_ncells: + fp = fp.reshape(-1) + # Insert the derivatives into the derivative matrix + if g is not None: + g = self.insert_to_deriv_matrix( + g=g, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + use_include_ncells=use_include_ncells, + ) + return fp, g + + def element_setup( + self, + atomic_numbers, + tags, + not_masked, + masked, + use_include_ncells=False, + c_dim=None, + **kwargs, + ): + """ + Get all informations of the atom combinations and split them + into types. + """ + # Check if the atomic setup is the same + if self.reuse_combinations: + if ( + self.atomic_numbers is not None + or self.not_masked is not None + or self.tags is not None + or self.split_indices is not None + ): + atoms_equal = check_atoms( + atomic_numbers=self.atomic_numbers, + atomic_numbers_test=atomic_numbers, + tags=self.tags, + tags_test=tags, + not_masked=self.not_masked, + not_masked_test=not_masked, + **kwargs, + ) + if atoms_equal: + return self.split_indices + # Save the atomic setup + self.atomic_numbers = atomic_numbers + self.not_masked = not_masked + self.tags = tags + # Merge element type and their tags + if not self.use_tags: + tags = zeros((len(atomic_numbers)), dtype=int) + if len(not_masked): + combis_nm = list(zip(atomic_numbers[not_masked], tags[not_masked])) + else: + combis_nm = [] + if len(masked): + combis_m = list(zip(atomic_numbers[masked], tags[masked])) + else: + combis_m = [] + split_indices = {} + t = 0 + for i, i_nm in enumerate(combis_nm): + i1 = i + 1 + for j_m in combis_m: + split_indices.setdefault(i_nm + j_m, []).append(t) + t += 1 + for j_nm in combis_nm[i1:]: + split_indices.setdefault(i_nm + j_nm, []).append(t) + t += 1 + # Include the neighboring cells + if use_include_ncells and c_dim is not None: + n_combi = full((c_dim, 1), t, dtype=int) + split_indices = { + k: (asarray(v) + n_combi).reshape(-1) + for k, v in split_indices.items() + } + else: + split_indices = {k: asarray(v) for k, v in split_indices.items()} + # Save the split indices + self.split_indices = split_indices + return split_indices + + def update_arguments( + self, + reduce_dimensions=None, + use_derivatives=None, + wrap=None, + include_ncells=None, + periodic_sum=None, + periodic_softmax=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + dtype=None, + **kwargs, + ): + """ + Update the class with its arguments. + The existing arguments are used if they are not given. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_derivatives: bool + Calculate and store derivatives of the fingerprint wrt. + the cartesian coordinates. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells + when periodic boundary conditions are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic is faster than periodic_softmax, + but the derivatives are discontinuous. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated instance itself. + """ + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + dtype=dtype, + ) + if wrap is not None: + self.wrap = wrap + if include_ncells is not None: + self.include_ncells = include_ncells + if periodic_sum is not None: + self.periodic_sum = periodic_sum + if periodic_softmax is not None: + self.periodic_softmax = periodic_softmax + if mic is not None: + self.mic = mic + if all_ncells is not None: + self.all_ncells = all_ncells + if cell_cutoff is not None: + self.cell_cutoff = abs(float(cell_cutoff)) + if not hasattr(self, "not_masked"): + self.not_masked = None + if not hasattr(self, "masked"): + self.masked = None + if not hasattr(self, "atomic_numbers"): + self.atomic_numbers = None + if not hasattr(self, "tags"): + self.tags = None + if not hasattr(self, "split_indices"): + self.split_indices = None + # Tags is not implemented + self.use_tags = False + self.reuse_combinations = False + return self + + def get_distances( + self, + atoms, + not_masked=None, + masked=None, + nmi=None, + nmj=None, + nmi_ind=None, + nmj_ind=None, + use_vector=False, + include_ncells=False, + mic=False, + **kwargs, + ): + """ + Get the distances and their vectors. + """ + return get_all_distances( + atoms=atoms, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + use_vector=use_vector, + wrap=self.wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + dtype=self.dtype, + **kwargs, + ) + + def get_length_dist(self, not_masked, masked, nmi, **kwargs): + "Get the length of the distance vector parts." + # Get the length of the distance vector parts + len_nm_m = len(not_masked) * len(masked) + len_nm = len(nmi) + # Get the full length of the distance vector + len_all = len_nm_m + len_nm + return len_nm_m, len_nm, len_all + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + reduce_dimensions=self.reduce_dimensions, + use_derivatives=self.use_derivatives, + wrap=self.wrap, + include_ncells=self.include_ncells, + periodic_sum=self.periodic_sum, + periodic_softmax=self.periodic_softmax, + mic=self.mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + dtype=self.dtype, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/fingerprint/fingerprint.py b/catlearn/regression/gp/fingerprint/fingerprint.py index 37c5dc75..4115c187 100644 --- a/catlearn/regression/gp/fingerprint/fingerprint.py +++ b/catlearn/regression/gp/fingerprint/fingerprint.py @@ -1,30 +1,39 @@ -import numpy as np -from ase.constraints import FixAtoms +from numpy import array, finfo +from .geometry import get_constraints from .fingerprintobject import FingerprintObject class Fingerprint: + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + """ + def __init__( self, reduce_dimensions=True, use_derivatives=True, + dtype=float, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. + Initialize the fingerprint constructor. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the arguments self.update_arguments( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + dtype=dtype, **kwargs, ) @@ -33,20 +42,16 @@ def __call__(self, atoms, **kwargs): Convert atoms to fingerprint and return the fingerprint object. Parameters: - atoms : ASE Atoms + atoms: ASE Atoms The ASE Atoms object that are converted to a fingerprint. Returns: FingerprintObject: Object with the fingerprint array and its derivatives if requested. """ - # Get the constraints from ASE Atoms - not_masked, masked = self.get_constraints(atoms) # Calculate the fingerprint and its derivatives if requested vector, derivative = self.make_fingerprint( atoms, - not_masked=not_masked, - masked=masked, **kwargs, ) # Make the fingerprint object and store the arrays within @@ -65,10 +70,58 @@ def get_reduce_dimensions(self): """ return self.reduce_dimensions + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives/forces in the targets. + + Parameters: + use_derivatives: bool + Whether to use derivatives/forces in the targets. + + Returns: + self: The updated object itself. + """ + # Set the use derivatives + self.use_derivatives = use_derivatives + return self + + def set_reduce_dimensions(self, reduce_dimensions, **kwargs): + """ + Set whether to reduce the fingerprint space if constrains are used. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + + Returns: + self: The updated object itself. + """ + # Set the reduce dimensions + self.reduce_dimensions = reduce_dimensions + return self + + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set a small number to avoid division by zero + self.eps = 1.1 * finfo(self.dtype).eps + return self + def update_arguments( self, reduce_dimensions=None, use_derivatives=None, + dtype=None, **kwargs, ): """ @@ -76,56 +129,78 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. + dtype: type + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated instance itself. """ if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions + self.set_reduce_dimensions(reduce_dimensions) if use_derivatives is not None: - self.use_derivatives = use_derivatives + self.set_use_derivatives(use_derivatives) + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) + if not hasattr(self, "not_masked"): + self.not_masked = None + if not hasattr(self, "masked"): + self.masked = None return self - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): + def make_fingerprint(self, atoms, **kwargs): "The calculation of the fingerprint" raise NotImplementedError() - def get_constraints(self, atoms, **kwargs): - """ - Get the indicies of the atoms that does not have fixed constraints. - - Parameters: - atoms : ASE Atoms - The ASE Atoms object with a calculator. - - Returns: - not_masked : list - A list of indicies for the moving atoms - if constraints are used. - masked : list - A list of indicies for the fixed atoms if constraints are used. - - """ - not_masked = list(range(len(atoms))) - if not self.reduce_dimensions: - return not_masked, [] - constraints = atoms.constraints - if len(constraints) > 0: - masked = np.concatenate( - [ - c.get_indices() - for c in constraints - if isinstance(c, FixAtoms) - ] + def get_not_masked(self, atoms, masked=None, recalc=False, **kwargs): + "Get the not masked atoms." + # Use the stored values if recalculation is not requested + if not recalc and self.not_masked is not None: + return self.not_masked + # Recalculate the not masked atoms + if masked is None: + not_masked, masked = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + **kwargs, ) - masked = set(masked) - return list(set(not_masked).difference(masked)), list(masked) - return not_masked, [] + self.masked = array(masked, dtype=int) + else: + i_all = set(range(len(atoms))) + not_masked = list(i_all.difference(set(masked))) + not_masked = sorted(not_masked) + self.not_masked = array(not_masked, dtype=int) + return self.not_masked + + def get_masked(self, atoms, not_masked=None, recalc=False, **kwargs): + "Get the masked atoms." + # Use the stored values if recalculation is not requested + if not recalc and self.masked is not None: + return self.masked + if not_masked is None: + not_masked, masked = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + **kwargs, + ) + self.not_masked = array(not_masked, dtype=int) + else: + i_all = set(range(len(atoms))) + masked = list(i_all.difference(set(not_masked))) + masked = sorted(masked) + self.masked = array(masked, dtype=int) + return self.masked + + def reset_masked(self): + "Reset the masked atoms." + self.masked = None + self.not_masked = None + return self def get_arguments(self): "Get the arguments of the class itself." @@ -133,6 +208,7 @@ def get_arguments(self): arg_kwargs = dict( reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/fingerprint/fingerprintobject.py b/catlearn/regression/gp/fingerprint/fingerprintobject.py index d03a2548..42dda8d0 100644 --- a/catlearn/regression/gp/fingerprint/fingerprintobject.py +++ b/catlearn/regression/gp/fingerprint/fingerprintobject.py @@ -1,9 +1,16 @@ +from numpy import asarray + + class FingerprintObject: + """ + Fingerprint object class that has the fingerprint vector for an + ASE Atoms instance. + The derivatives wrt. to the cartesian coordinates can also be saved. + """ + def __init__(self, vector, derivative=None, **kwargs): """ - Fingerprint object class that has the fingerprint vector - for an Atoms object. - The derivatives wrt. to the cartesian coordinates can also be saved. + Initialize the fingerprint object. Parameters: vector: (N) array @@ -11,23 +18,23 @@ def __init__(self, vector, derivative=None, **kwargs): derivative: (N,D) array (optional) Fingerprint derivative wrt. atoms cartesian coordinates. """ - self.vector = vector.copy() + self.vector = asarray(vector) if derivative is None: self.derivative = None else: - self.derivative = derivative.copy() + self.derivative = asarray(derivative) def get_vector(self, **kwargs): "Get the fingerprint vector." - return self.vector.copy() + return self.vector def get_derivatives(self, d=None, **kwargs): "Get the derivative of the fingerprint wrt. the cartesian coordinates." if self.derivative is None: return None if d is None: - return self.derivative.copy() - return self.derivative[:, d].copy() + return self.derivative + return self.derivative[:, d] def get_derivative_dimension(self, **kwargs): """ diff --git a/catlearn/regression/gp/fingerprint/fpwrapper.py b/catlearn/regression/gp/fingerprint/fpwrapper.py index 6e962c57..03cb6929 100644 --- a/catlearn/regression/gp/fingerprint/fpwrapper.py +++ b/catlearn/regression/gp/fingerprint/fpwrapper.py @@ -1,20 +1,27 @@ from .fingerprint import Fingerprint -import numpy as np +from .geometry import get_constraints +from numpy import asarray, concatenate, transpose, zeros class FingerprintWrapperGPAtom(Fingerprint): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The fingerprint is generated by wrapping the fingerprint class + from gpatom. + (https://gitlab.com/gpatom/ase-gpatom) + """ + def __init__( self, fingerprint, reduce_dimensions=True, use_derivatives=True, + dtype=float, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The fingerprint is generated by wrapping the fingerprint class - from gpatom. + Initialize the fingerprint constructor. Parameters: fingerprint: gpatom class. @@ -24,11 +31,15 @@ def __init__( use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( fingerprint=fingerprint, reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + dtype=dtype, **kwargs, ) @@ -37,6 +48,7 @@ def update_arguments( fingerprint=None, reduce_dimensions=None, use_derivatives=None, + dtype=None, **kwargs, ): """ @@ -51,31 +63,52 @@ def update_arguments( use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated instance itself. """ + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + dtype=dtype, + ) if fingerprint is not None: self.fingerprint = fingerprint - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if use_derivatives is not None: - self.use_derivatives = use_derivatives return self - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): + def make_fingerprint(self, atoms, **kwargs): "The calculation of the gp-atom fingerprint" + # Get the masked and not masked atoms + not_masked, _ = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + ) + # Check if there are any not masked atoms + if len(not_masked) == 0: + fp = zeros((0), dtype=self.dtype) + if self.use_derivatives: + return fp, zeros((0, 0), dtype=self.dtype) + return fp, None + # Get the fingerprint fp = self.fingerprint( - atoms, calc_gradients=self.use_derivatives, **kwargs + atoms, + calc_gradients=self.use_derivatives, + **kwargs, ) - vector = fp.vector.copy() if self.use_derivatives: - derivative = fp.reduce_coord_gradients().copy() + derivative = fp.reduce_coord_gradients() # enforced not_masked since it is not possible in ASE-GPATOM - derivative = np.concatenate(derivative[not_masked], axis=1) + derivative = concatenate( + derivative[not_masked], + axis=1, + dtype=self.dtype, + ) else: derivative = None - return vector, derivative + return asarray(fp.vector, dtype=self.dtype), derivative def get_arguments(self): "Get the arguments of the class itself." @@ -84,6 +117,7 @@ def get_arguments(self): fingerprint=self.fingerprint, reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -93,19 +127,25 @@ def get_arguments(self): class FingerprintWrapperDScribe(Fingerprint): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The fingerprint is generated by wrapping the fingerprint class + from dscribe (>=2.1). + (https://github.com/SINGROUP/dscribe) + """ + def __init__( self, fingerprint, reduce_dimensions=True, use_derivatives=True, fingerprint_kwargs={}, + dtype=float, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The fingerprint is generated by wrapping the fingerprint class - from dscribe (>=2.1). + Initialize the fingerprint constructor. Parameters: fingerprint: dscribe class instance (>=2.1). @@ -117,12 +157,16 @@ def __init__( the cartesian coordinates. fingerprint_kwargs: dict Kwargs for the fingerprint function call. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( fingerprint=fingerprint, reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, fingerprint_kwargs=fingerprint_kwargs, + dtype=dtype, **kwargs, ) @@ -132,6 +176,7 @@ def update_arguments( reduce_dimensions=None, use_derivatives=None, fingerprint_kwargs=None, + dtype=None, **kwargs, ): """ @@ -148,22 +193,38 @@ def update_arguments( the cartesian coordinates. fingerprint_kwargs: dict Kwargs for the fingerprint function call. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated instance itself. """ + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + dtype=dtype, + ) if fingerprint is not None: self.fingerprint = fingerprint - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if use_derivatives is not None: - self.use_derivatives = use_derivatives if fingerprint_kwargs is not None: self.fingerprint_kwargs = fingerprint_kwargs.copy() return self - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): + def make_fingerprint(self, atoms, **kwargs): "The calculation of the dscribe fingerprint" + # Get the masked and not masked atoms + not_masked, _ = get_constraints( + atoms, + reduce_dimensions=self.reduce_dimensions, + ) + # Check if there are any not masked atoms + if len(not_masked) == 0: + fp = zeros((0), dtype=self.dtype) + if self.use_derivatives: + return fp, zeros((0, 0), dtype=self.dtype) + return fp, None + # Get the fingerprint if self.use_derivatives: derivative, vector = self.fingerprint.derivatives( atoms, @@ -171,15 +232,16 @@ def make_fingerprint(self, atoms, not_masked, masked, **kwargs): return_descriptor=True, **self.fingerprint_kwargs, ) + derivative = asarray(derivative, dtype=self.dtype) if len(derivative.shape) == 4: - derivative = np.transpose(derivative, (0, 3, 1, 2)) + derivative = transpose(derivative, (0, 3, 1, 2)) else: - derivative = np.transpose(derivative, (2, 0, 1)) + derivative = transpose(derivative, (2, 0, 1)) derivative = derivative.reshape(-1, len(not_masked) * 3) else: vector = self.fingerprint.create(atoms, **self.fingerprint_kwargs) derivative = None - return vector.reshape(-1), derivative + return asarray(vector.reshape(-1), dtype=self.dtype), derivative def get_arguments(self): "Get the arguments of the class itself." @@ -189,6 +251,7 @@ def get_arguments(self): reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, fingerprint_kwargs=self.fingerprint_kwargs, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/fingerprint/geometry.py b/catlearn/regression/gp/fingerprint/geometry.py index f6a05656..86fb23b8 100644 --- a/catlearn/regression/gp/fingerprint/geometry.py +++ b/catlearn/regression/gp/fingerprint/geometry.py @@ -1,41 +1,336 @@ -import numpy as np +from numpy import ( + arange, + asarray, + ceil, + concatenate, + cos, + einsum, + exp, + matmul, + pi, + sin, + sqrt, + triu_indices, + where, +) +from numpy.linalg import pinv import itertools from scipy.spatial.distance import cdist from ase.data import covalent_radii +from ase.constraints import FixAtoms + + +def get_constraints(atoms, reduce_dimensions=True, **kwargs): + """ + Get the indices of the atoms that does not have fixed constraints. + + Parameters: + atoms: ASE Atoms + The ASE Atoms instance. + reduce_dimensions: bool + Whether to fix or mask some of the atoms. + + Returns: + not_masked: (Nnm) list + A list of indices for the moving atoms if constraints are used. + masked: (Nm) list + A list of indices for the fixed atoms if constraints are used. + + """ + not_masked = list(range(len(atoms))) + if reduce_dimensions and len(atoms.constraints): + masked = [ + c.get_indices() + for c in atoms.constraints + if isinstance(c, FixAtoms) + ] + if len(masked): + masked = set(concatenate(masked)) + not_masked = list(set(not_masked).difference(masked)) + not_masked = sorted(not_masked) + masked = list(masked) + else: + masked = [] + return asarray(not_masked), asarray(masked) + + +def get_mask_indices( + atoms, + not_masked=None, + masked=None, + nmi=None, + nmj=None, + nmi_ind=None, + nmj_ind=None, + **kwargs, +): + """ + Get the indices of the atoms that are masked and not masked. + + Parameters: + atoms: ASE Atoms + The ASE Atoms instance. + not_masked: (Nnm) list (optional) + A list of indices for the moving atoms if constraints are used. + Else all atoms are treated to be moving. + masked: (Nn) list (optional) + A list of indices for the fixed atoms if constraints are used. + nmi: list (optional) + The upper triangle indices of the not masked atoms. + nmj: list (optional) + The upper triangle indices of the not masked atoms. + nmi_ind: list (optional) + The indices of the not masked atoms. + nmj_ind: list (optional) + The indices of the not masked atoms. + + Returns: + not_masked: (Nnm) list + A list of indices for the moving atoms if constraints are used. + masked: (Nm) list + A list of indices for the fixed atoms if constraints are used. + nmi: list + The upper triangle indices of the not masked atoms. + nmi_ind: list + The indices of the not masked atoms. + nmj_ind: list + The indices of the not masked atoms. + """ + # If a not masked list is given, all atoms is treated to be not masked + if not_masked is None: + not_masked = arange(len(atoms)) + # If a masked list is not given, it is calculated from the not masked + if masked is None: + masked = asarray( + list(set(range(len(atoms))).difference(set(not_masked))) + ) + # Make indices of not masked atoms with itself + if nmi is None or nmj is None or nmi_ind is None or nmj_ind is None: + nmi, nmj = triu_indices(len(not_masked), k=1, m=None) + nmi_ind = not_masked[nmi] + nmj_ind = not_masked[nmj] + return not_masked, masked, nmi, nmj, nmi_ind, nmj_ind + + +def check_atoms( + atomic_numbers, + atomic_numbers_test, + tags=None, + tags_test=None, + cell=None, + cell_test=None, + pbc=None, + pbc_test=None, + not_masked=None, + not_masked_test=None, + **kwargs, +): + """ + Check if the atoms instance is the same as the input. + + Parameters: + atomic_numbers: (N) list + The atomic numbers of the atoms. + atomic_numbers_test: (N) list + The atomic numbers of the tested atoms. + tags: (N) list (optional) + The tags of the atoms. + tags_test: (N) list (optional) + The tags of the tested atoms. + cell: (3, 3) array (optional) + The cell vectors. + cell_test: (3, 3) array (optional) + The cell vectors of the tested atoms. + pbc: (3) list (optional) + The periodic boundary conditions. + pbc_test: (3) list (optional) + The periodic boundary conditions of the tested atoms. + not_masked: (Nnm) list (optional) + A list of indices for the moving atoms if constraints are used. + not_masked_test: (Nnm) list (optional) + A list of indices for the moving atoms if constraints + are used in the tested atoms. + + Returns: + bool: If the atoms are the same. + """ + if len(atomic_numbers_test) != len(atomic_numbers): + return False + if not_masked is not None and not_masked_test is not None: + if (not_masked_test != not_masked).any(): + return False + if (atomic_numbers_test != atomic_numbers).any(): + return False + if tags is not None and tags_test is not None: + if (tags_test != tags).any(): + return False + if cell is not None and cell_test is not None: + if (cell_test != cell).any(): + return False + if pbc is not None and pbc_test is not None: + if (pbc_test != pbc).any(): + return False + return True + + +def get_ncells( + cell, + pbc, + all_ncells=False, + cell_cutoff=4.0, + atomic_numbers=None, + remove0=False, + dtype=float, + **kwargs, +): + """ + Get all neighboring cells within the cutoff. + + Parameters: + cell: (3, 3) array + The cell vectors. + pbc: (3) list + The periodic boundary conditions. + all_ncells: bool + If all neighboring cells within a cutoff should be used. + cell_cutoff: float + The distance cutoff for neighboring cells. + atomic_numbers: list + The atomic numbers of the atoms. + It is only used when all_ncells is True. + remove0: bool + If the zero vector should + be removed from the neighboring cells. + dtype: type + The data type of the arrays + + Returns: + cells_p: (Nc, 3) array + The displacements from all combinations of the neighboring cells. + """ + # Check if all neighboring cells should be used + if all_ncells: + # Get the inverse of the cell + cinv = pinv(cell) + # Get the maximum covalent distance + atomic_numbers_set = list(set(atomic_numbers)) + covrad = covalent_radii[atomic_numbers_set] + max_cov = 2.0 * covrad.max() + # Get the cutoff distance from the maximum covalent distance + cutoff = max_cov * cell_cutoff + # Get the coordinates to cutoff in lattice coordinates + ccut = cutoff * cinv + # Get the number of neighboring cells in each direction + ncells = ceil(abs(ccut).max(axis=0)).astype(int) + # Only use neighboring cells if the dimension is periodic + ncells = where(pbc, ncells, 0) + else: + # Only use neighboring cells if the dimension is periodic + ncells = where(pbc, 1, 0) + # Get all neighboring cells + b = [list(range(-i, i + 1)) for i in ncells] + # Make all periodic combinations + p_arrays = list(itertools.product(*b)) + # Remove the initial combination + p_arrays.remove((0, 0, 0)) + # Add the zero vector in the beginning + if not remove0: + p_arrays = [(0, 0, 0)] + p_arrays + # Calculate all displacement vector from the cell vectors + p_arrays = asarray(p_arrays, dtype=dtype) + cells_p = matmul(p_arrays, cell, dtype=dtype) + return cells_p def get_full_distance_matrix( atoms, not_masked=None, + use_vector=False, + wrap=True, + include_ncells=False, mic=False, - vector=False, - wrap=False, + all_ncells=False, + cell_cutoff=4.0, + dtype=float, **kwargs, ): """ Get the full cartesian distance matrix between the atomes and including - the vectors if vector=True. + the vectors if requested. + + Parameters: + atoms: ASE Atoms + The ASE Atoms instance. + not_masked: Nnm list (optional) + A list of indices for the moving atoms if constraints are used. + Else all atoms are treated to be moving. + use_vector: bool + If the distance vectors should be returned. + wrap: bool + If the atoms should be wrapped to the cell. + include_ncells: bool + If neighboring cells should be included. + all_ncells: bool + If all neighboring cells within a cutoff should be used. + mic: bool + If the minimum image convention should be used. + cell_cutoff: float + The distance cutoff for neighboring cells. + dtype: type + The data type of the arrays + + Returns: + dist: (N, Nnm) or (Nc, N, Nnm) array + The full distance matrix. + dist_vec: (N, Nnm, 3) or (Nc, N, Nnm, 3) array + The full distance matrix with directions if use_vector=True. """ - # If a not masked list is given all atoms is treated to be not masked + # If a not masked list is not given all atoms is treated to be not masked if not_masked is None: - not_masked = np.arange(len(atoms)) + not_masked = arange(len(atoms)) # Get the atomic positions - pos = atoms.get_positions(wrap=wrap) - # Get distance vectors - if vector or mic: - dist_vec = pos - pos[not_masked, None] + pos = asarray(atoms.get_positions(wrap=wrap), dtype=dtype) # Get the periodic boundary conditions pbc = atoms.pbc.copy() - # Check if the minimum image convention is used and if there is any pbc - if not mic or sum(pbc) == 0: - # Get only the distances - if not vector: - return cdist(pos[not_masked], pos), None - return np.linalg.norm(dist_vec, axis=-1), dist_vec - # Get the cell vectors - cell = np.array(atoms.cell) - # Get the minimum image convention distances and distance vectors - return mic_distance(dist_vec, cell, pbc, vector=vector, **kwargs) + is_pbc = pbc.any() + # Check whether to calculate distance vectors + if use_vector or (is_pbc and (include_ncells or mic)): + # Get distance vectors + dist_vec = pos - pos[not_masked, None] + else: + dist_vec = None + # Return the distances + D = cdist(pos[not_masked], pos) + D = asarray(D, dtype=dtype) + return D, None + # Check if neighboring cells should be included + if include_ncells and is_pbc: + cells_p = get_ncells( + cell=atoms.get_cell(), + pbc=pbc, + atomic_numbers=atoms.get_atomic_numbers(), + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + dtype=dtype, + ) + # Calculate the distances to the atoms in all unit cell + dist_vec = dist_vec + cells_p[:, None, None, :] + dist = sqrt(einsum("ijlk,ijlk->ijl", dist_vec, dist_vec)) + return dist, dist_vec + elif mic and is_pbc: + # Get the distances with minimum image convention + dist, dist_vec = mic_distance( + dist_vec=dist_vec, + cell=atoms.get_cell(), + pbc=pbc, + use_vector=use_vector, + dtype=dtype, + **kwargs, + ) + return dist, dist_vec + # Calculate the distances and return + dist = sqrt(einsum("ijl,ijl->ij", dist_vec, dist_vec)) + return dist, dist_vec def get_all_distances( @@ -43,72 +338,280 @@ def get_all_distances( not_masked=None, masked=None, nmi=None, + nmj=None, nmi_ind=None, nmj_ind=None, + use_vector=False, + wrap=True, + include_ncells=False, mic=False, - vector=False, - wrap=False, + all_ncells=False, + cell_cutoff=4.0, + dtype=float, **kwargs, ): """ Get the unique cartesian distances between the atomes and including - the vectors if vector=True. + the vectors if use_vector=True. + + Parameters: + atoms: ASE Atoms + The ASE Atoms instance. + not_masked: Nnm list (optional) + A list of indices for the moving atoms if constraints are used. + Else all atoms are treated to be moving. + masked: Nm list (optional) + A list of indices for the fixed atoms if constraints are used. + nmi: list (optional) + The upper triangle indices of the not masked atoms. + nmi_ind: list (optional) + The indices of the not masked atoms. + nmj_ind: list (optional) + The indices of the not masked atoms. + use_vector: bool + If the distance vectors should be returned. + wrap: bool + If the atoms should be wrapped to the cell. + mic: bool + If the minimum image convention should be used. + include_ncells: bool + If neighboring cells should be included. + all_ncells: bool + If all neighboring cells within a cutoff should be used. + cell_cutoff: float + The distance cutoff for neighboring cells. + dtype: type + The data type of the arrays + + Returns: + dist: (Nnm*Nm+(Nnm*(Nnm-1)/2)) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2)) array + The unique distances. + dist_vec: (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) or + (Nc, Nnm*N+(Nnm*(Nnm-1)/2), 3) array + The unique distances with directions if use_vector=True. """ - # If a not masked list is given all atoms is treated to be not masked - if not_masked is None: - not_masked = np.arange(len(atoms)) - if masked is None: - masked = np.array( - list(set(np.arange(len(atoms))).difference(set(not_masked))) - ) - # Make indicies - if nmi is None or nmi_ind is None or nmj_ind is None: - nmi, nmj = np.triu_indices(len(not_masked), k=1, m=None) - nmi_ind = not_masked[nmi] - nmj_ind = not_masked[nmj] + # Make indices + not_masked, masked, nmi, _, nmi_ind, nmj_ind = get_mask_indices( + atoms, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + ) # Get the atomic positions - pos = atoms.get_positions(wrap=wrap) - # Get distance vectors - if vector or mic: - if len(masked): - dist_vec = np.concatenate( - [ - (pos[masked] - pos[not_masked, None]).reshape(-1, 3), - pos[nmj_ind] - pos[nmi_ind], - ], - axis=0, - ) - else: - dist_vec = pos[nmj_ind] - pos[nmi_ind] + pos = asarray(atoms.get_positions(wrap=wrap), dtype=dtype) # Get the periodic boundary conditions pbc = atoms.pbc.copy() - # Check if the minimum image convention is used and if there is any pbc - if not mic or sum(pbc) == 0: - if not vector: - d = cdist(pos[not_masked], pos) - if len(masked): - return ( - np.concatenate( - [d[:, masked].reshape(-1), d[nmi, nmj_ind]], - axis=0, - ), - None, - ) - return d[nmi, nmj_ind], None - return np.linalg.norm(dist_vec, axis=-1), dist_vec - # Get the cell vectors - cell = np.array(atoms.cell) - # Get the minimum image convention distances and distance vectors - return mic_distance(dist_vec, cell, pbc, vector=vector, **kwargs) - - -def mic_distance(dist_vec, cell, pbc, vector=False, **kwargs): - "Get the minimum image convention of the distances." + is_pbc = pbc.any() + # Check whether to calculate distance vectors + if use_vector or (is_pbc and (include_ncells or mic)): + # Get distance vectors + dist_vec = get_distance_vectors( + pos, + not_masked, + masked, + nmi_ind, + nmj_ind, + ) + else: + # Get the distances + dist = get_distances( + pos, + not_masked, + masked, + nmi, + nmj_ind, + ) + return dist, None + # Check if neighboring cells should be included + if include_ncells and is_pbc: + cells_p = get_ncells( + cell=atoms.get_cell(), + pbc=pbc, + atomic_numbers=atoms.get_atomic_numbers(), + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + dtype=dtype, + ) + # Calculate the distances to the atoms in all unit cell + dist_vec = dist_vec + cells_p[:, None, :] + dist = sqrt(einsum("ijl,ijl->ij", dist_vec, dist_vec)) + return dist, dist_vec + elif mic and is_pbc: + # Get the distances with minimum image convention + dist, dist_vec = mic_distance( + dist_vec=dist_vec, + cell=atoms.get_cell(), + pbc=pbc, + use_vector=use_vector, + dtype=dtype, + **kwargs, + ) + return dist, dist_vec + # Calculate the distances and return + dist = sqrt(einsum("ij,ij->i", dist_vec, dist_vec)) + return dist, dist_vec + + +def get_distances( + pos, + not_masked, + masked, + nmi, + nmj_ind, + **kwargs, +): + """ + Get the unique distances. + + Parameters: + pos: (N, 3) array + The atomic positions. + not_masked: Nnm list + A list of indices for the moving atoms if constraints are used. + masked: Nm list + A list of indices for the fixed atoms if constraints are used. + nmi: list + The upper triangle indices of the not masked atoms. + nmj_ind: list + The indices of the not masked atoms. + + Returns: + dist: (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The unique distances. + """ + # Get the distances matrix + d = cdist(pos[not_masked], pos) + d = asarray(d, dtype=pos.dtype) + # Get the distances of the not masked atoms + dist = d[nmi, nmj_ind] + if len(masked): + # Get the distances of the masked atoms + dist = concatenate( + [d[:, masked].reshape(-1), dist], + axis=0, + ) + return dist + + +def get_distance_vectors( + pos, + not_masked, + masked, + nmi_ind, + nmj_ind, + **kwargs, +): + """ + Get the unique distance vectors. + + Parameters: + pos: (N, 3) array + The atomic positions. + not_masked: Nnm list + A list of indices for the moving atoms if constraints are used. + masked: Nm list + A list of indices for the fixed atoms if constraints are used. + nmi_ind: list + The indices of the not masked atoms. + nmj_ind: list + The indices of the not masked atoms. + + Returns: + dist_vec: (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The unique distance vectors. + """ + # Calculate the distance vectors for the not masked atoms + dist_vec = pos[nmj_ind] - pos[nmi_ind] + # Check if masked atoms are used + if len(masked): + # Calculate the distance vectors for the masked atoms + dist_vec = concatenate( + [ + (pos[masked] - pos[not_masked, None]).reshape(-1, 3), + dist_vec, + ], + axis=0, + ) + return dist_vec + + +def get_covalent_distances( + atomic_numbers, + not_masked, + masked, + nmi_ind, + nmj_ind, + dtype=float, + **kwargs, +): + """ + Get the covalent distances. + + Parameters: + atomic_numbers: (N) list + The atomic numbers of the atoms. + not_masked: Nnm list + A list of indices for the moving atoms if constraints are used. + masked: Nm list + A list of indices for the fixed atoms if constraints are used. + nmi_ind: list + The indices of the not masked atoms. + nmj_ind: list + The indices of the not masked atoms. + dtype: type + The data type of the arrays. + + Returns: + covdis: (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The covalent distances. + """ + # Get the covalent radii + covrad = asarray(covalent_radii[atomic_numbers], dtype=dtype) + # Calculate the covalent distances for the not masked atoms + covdis = covrad[nmj_ind] + covrad[nmi_ind] + # Check if masked atoms are used + if len(masked): + # Calculate the covalent distances for the masked atoms + covdis = concatenate( + [ + (covrad[masked] + covrad[not_masked, None]).reshape(-1), + covdis, + ], + axis=0, + ) + return covdis + + +def mic_distance(dist_vec, cell, pbc, use_vector=False, dtype=float, **kwargs): + """ + Get the minimum image convention of the distances. + + Parameters: + dist_vec: (N, Nnm, 3) or (Nnm*Nm+(Nnm*(Nnm-1)/2) , 3) array + The distance vectors. + cell: (3, 3) array + The cell vectors. + pbc: (3) list + The periodic boundary conditions. + use_vector: bool + If the distance vectors should be returned. + dtype: type + The data type of the arrays + + Returns: + dist: (N, Nnm) or ((Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The shortest distances. + dist_vec: (N, Nnm, 3) or ((Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The shortest distance vectors if requested. + """ # Get the squared cell vectors cell2 = cell**2 # Save the shortest distances v2min = dist_vec**2 - if vector: + if use_vector: vmin = dist_vec.copy() else: vmin = None @@ -138,11 +641,11 @@ def mic_distance(dist_vec, cell, pbc, vector=False, **kwargs): vmin, d_c, cell, - vector=vector, + use_vector=use_vector, **kwargs, ) else: - v2min = np.sum(v2min, axis=-1) + v2min = v2min.sum(axis=-1) if sum(pbc_nc): # Do an extensive mic for the dimension that is not cubic v2min, vmin = mic_general_distance( @@ -151,10 +654,11 @@ def mic_distance(dist_vec, cell, pbc, vector=False, **kwargs): vmin, cell, pbc_nc, - vector=vector, + use_vector=use_vector, + dtype=dtype, **kwargs, ) - return np.sqrt(v2min), vmin + return sqrt(v2min), vmin def mic_cubic_distance( @@ -163,7 +667,7 @@ def mic_cubic_distance( vmin, d_c, cell, - vector=False, + use_vector=False, **kwargs, ): """ @@ -176,31 +680,33 @@ def mic_cubic_distance( dv_new = dist_vec[..., d] + cell[d, d] dv2_new = dv_new**2 # Save the new distances if they are shorter - i = np.where(dv2_new < v2min[..., d]) + i = where(dv2_new < v2min[..., d]) v2min[(*i, d)] = dv2_new[(*i,)] - if vector: + if use_vector: vmin[(*i, d)] = dv_new[(*i,)] # Calculate the distances to the atoms in the previous unit cell dv_new = dist_vec[..., d] - cell[d, d] dv2_new = dv_new**2 # Save the new distances if they are shorter - i = np.where(dv2_new < v2min[..., d]) + i = where(dv2_new < v2min[..., d]) v2min[(*i, d)] = dv2_new[(*i,)] - if vector: + if use_vector: vmin[(*i, d)] = dv_new[(*i,)] # Calculate the distances - if vector: - return np.sum(v2min, axis=-1), vmin - return np.sum(v2min, axis=-1), None + v2min = v2min.sum(axis=-1) + if use_vector: + return v2min, vmin + return v2min, None def mic_general_distance( dist_vec, - Dmin, + v2min, vmin, cell, pbc_nc, - vector=False, + use_vector=False, + dtype=float, **kwargs, ): """ @@ -208,154 +714,278 @@ def mic_general_distance( an extensive mic search. """ # Calculate all displacement vectors from the cell vectors - cells_p = get_periodicities(cell, pbc_nc) + cells_p = get_ncells( + cell=cell, + pbc=pbc_nc, + all_ncells=False, + remove0=True, + dtype=dtype, + ) # Iterate over all combinations for p_array in cells_p: # Calculate the distances to the atoms in the next unit cell dv_new = dist_vec + p_array - D_new = np.sum(dv_new**2, axis=-1) + D_new = (dv_new**2).sum(axis=-1) # Save the new distances if they are shorter - i = np.where(D_new < Dmin) - Dmin[(*i,)] = D_new[(*i,)] - if vector: + i = where(D_new < v2min) + v2min[(*i,)] = D_new[(*i,)] + if use_vector: vmin[(*i,)] = dv_new[(*i,)] # Calculate the distances - if vector: - return Dmin, vmin - return Dmin, None + if use_vector: + return v2min, vmin + return v2min, None -def get_periodicities(cell, pbc, remove0=True, **kwargs): - "Get all displacement vectors from the periodicity and cell vectors." - # Make all periodic combinations - b = [[-1, 0, 1] if p else [0] for p in pbc] - p_arrays = list(itertools.product(*b)) - # Remove the initial combination - if remove0: - p_arrays.remove((0, 0, 0)) - # Calculate all displacement vector from the cell vectors - p_arrays = np.array(p_arrays) - cells_p = np.matmul(p_arrays, cell) - return cells_p +def get_periodic_sum( + dist_eps, + dist_vec, + fpinner, + use_inv_dis=True, + use_derivatives=True, + **kwargs, +): + """ + Get the periodic sum of the distances. + Parameters: + dist_eps: (Nc, N, Nnm) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2)) array + The distances with a small number added. + dist_vec: (Nc, N, Nnm, 3) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2), 3) array + The distance vectors. + fpinner: (Nc, N, Nnm) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2)) array + The inner fingerprint. + use_inv_dis: bool + Whether the inverse distance is used. + use_derivatives: bool + If the derivatives of the fingerprint should be returned. -def get_inverse_distances( - atoms, - not_masked=None, - masked=None, - nmi=None, - nmj=None, - nmi_ind=None, - nmj_ind=None, + Returns: + fp: (N, Nnm) or (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The fingerprint. + g: (N, Nnm, 3) or (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The derivatives of the fingerprint if requested. + """ + # Calculate the fingerprint + fp = fpinner.sum(axis=0) + # Calculate the derivatives of the fingerprint + if use_derivatives: + # Calculate the derivatives of the distances + if use_inv_dis: + inner_deriv = fpinner / (dist_eps**2) + else: + inner_deriv = -fpinner / (dist_eps**2) + # Calculate the derivatives of the fingerprint + g = einsum("c...d,c...->...d", dist_vec, inner_deriv) + else: + g = None + return fp, g + + +def get_periodic_softmax( + dist_eps, + dist_vec, + fpinner, + covdis, + use_inv_dis=True, use_derivatives=True, - use_covrad=True, - periodic_softmax=True, - mic=False, - wrap=True, eps=1e-16, **kwargs, ): """ - Get the inverse cartesian distances between the atomes. - The derivatives can also be obtained. + Get the periodic softmax of the distances. + + Parameters: + dist_eps: (Nc, N, Nnm) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2)) array + The distances with a small number added. + dist_vec: (Nc, N, Nnm, 3) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2), 3) array + The distance vectors. + fpinner: (Nc, N, Nnm) or (Nc, Nnm*N+(Nnm*(Nnm-1)/2)) array + The inner fingerprint. + If use_inv_dis is True, the fingerprint is the covalent distances + divided by distances. + Else, the fingerprint is distances divided by the covalent + distances. + covdis: (N, Nnm) or (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The covalent distances. + use_inv_dis: bool + Whether the inverse distance is used. + use_derivatives: bool + If the derivatives of the fingerprint should be returned. + eps: float + A small number to avoid division by zero. + + Returns: + fp: (N, Nnm) or (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The fingerprint. + g: (N, Nnm, 3) or (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The derivatives of the fingerprint if requested. """ - # If a not masked list is given all atoms is treated to be not masked - if not_masked is None: - not_masked = np.arange(len(atoms)) - if masked is None: - masked = np.array( - list(set(np.arange(len(atoms))).difference(set(not_masked))) - ) - # Make indicies - if nmi is None or nmj is None or nmi_ind is None or nmj_ind is None: - nmi, nmj = np.triu_indices(len(not_masked), k=1, m=None) - nmi_ind = not_masked[nmi] - nmj_ind = not_masked[nmj] - # Get the covalent radii - if use_covrad: - covrad = covalent_radii[atoms.get_atomic_numbers()] - if len(masked): - covrad = np.concatenate( - [ - (covrad[masked] + covrad[not_masked, None]).reshape(-1), - covrad[nmj_ind] + covrad[nmi_ind], - ], - axis=0, - ) + # Calculate weights + if use_inv_dis: + w = exp(-((1.0 / fpinner) ** 2)) + else: + w = exp(-(fpinner**2)) + w = w / (w.sum(axis=0) + eps) + # Calculate the all the fingerprint elements with their weights + fp_w = fpinner * w + # Calculate the fingerprint + fp = fp_w.sum(axis=0) + # Calculate the derivatives of the fingerprint + if use_derivatives: + # Calculate the derivatives of the distances + if use_inv_dis: + inner_deriv = 1.0 / (dist_eps**2) else: - covrad = covrad[nmj_ind] + covrad[nmi_ind] + inner_deriv = -1.0 / (dist_eps**2) + # Calculate the derivatives of the weights + inner_deriv += (2.0 / (covdis**2)) * (1.0 - (fp / fpinner)) + # Calculate the inner derivative + inner_deriv = fp_w * inner_deriv + # Calculate the derivatives of the fingerprint + g = einsum("c...d,c...->...d", dist_vec, inner_deriv) else: - covrad = 1.0 - # Get inverse distances - if periodic_softmax and atoms.pbc.any(): - # Use a softmax function to weight the inverse distances - distances, vec_distances = get_all_distances( - atoms, - not_masked=not_masked, - masked=masked, - nmi=nmi, - nmj_ind=nmj_ind, - mic=False, - vector=True, - wrap=wrap, - **kwargs, - ) - # Calculate all displacement vectors from the cell vectors - cells_p = get_periodicities(atoms.get_cell(), atoms.pbc, remove0=False) - c_dim = len(cells_p) - # Calculate the distances to the atoms in all unit cell - d = vec_distances + cells_p.reshape(c_dim, 1, 3) - # Add small number to avoid division by zero to the distances - dnorm = np.linalg.norm(d, axis=-1) + eps - # Calculate weights - dcov = dnorm / covrad - w = np.exp(-(dcov**2)) - w = w / np.sum(w, axis=0) - # Calculate inverse distances - finner = w / dcov - f = np.sum(finner, axis=0) - # Calculate derivatives of inverse distances - if use_derivatives: - inner = (2.0 * (1.0 - (dcov * f))) / (covrad**2) - inner = inner + (1.0 / (dnorm**2)) - gij = np.sum(d * (finner * inner).reshape(c_dim, -1, 1), axis=0) + g = None + return fp, g + + +def cosine_cutoff( + x, + use_derivatives=False, + xs_cutoff=3.0, + xe_cutoff=4.0, + **kwargs, +): + """ + Cosine cutoff function. + Modification of eq. 24 in https://doi.org/10.1002/qua.24927. + + Parameters: + x: float or array of floats + The input values for the cutoff function. + use_derivatives: bool + If the derivatives of the cutoff function should be returned. + xs_cutoff: float + The start of the cutoff function. + xe_cutoff: float + The end of the cutoff function. + + Returns: + fc: float or array of floats + The cutoff function values. + The fingerprint. + gc: float or array of floats + The derivatives of the cutoff function. + """ + # Calculate the scale of the cutoff function + x_scale = xe_cutoff - xs_cutoff + # Calculate the cutoff function + fc_inner = pi * (x - xs_cutoff) / x_scale + fc = 0.5 * (1.0 + cos(fc_inner)) + # Crop the cutoff function + fc_rs = x <= xs_cutoff + fc_re = x >= xe_cutoff + fc = where(fc_rs, 1.0, fc) + fc = where(fc_re, 0.0, fc) + # Calculate the derivative of the cutoff function + if use_derivatives: + gc = (-0.5 * pi / x_scale) * sin(fc_inner) + gc = where(fc_rs, 0.0, gc) + gc = where(fc_re, 0.0, gc) + return fc, gc + return fc, None + + +def sine_activation( + x, + use_derivatives=False, + xs_activation=3.0, + xe_activation=4.0, + **kwargs, +): + """ + Sine activation function. + + Parameters: + x: float or array of floats + The input values for the activation function. + use_derivatives: bool + If the derivatives of the activation function should be returned. + xs_activation: float + The start of the activation function. + xe_activation: float + The end of the activation function. + + Returns: + fc: float or array of floats + The activation function values. + The fingerprint. + gc: float or array of floats + The derivatives of the activation function. + """ + # Calculate the scale of the activation function + x_scale = xe_activation - xs_activation + # Calculate the activation function + fc_inner = pi * (x - xs_activation) / x_scale + fc = 0.5 * (1.0 - cos(fc_inner)) + # Crop the activation function + fc_rs = x <= xs_activation + fc_re = x >= xe_activation + fc = where(fc_rs, 0.0, fc) + fc = where(fc_re, 1.0, fc) + # Calculate the derivative of the activation function + if use_derivatives: + gc = (0.5 * pi / x_scale) * sin(fc_inner) + gc = where(fc_rs, 0.0, gc) + gc = where(fc_re, 0.0, gc) + return fc, gc + return fc, None + + +def fp_cosine_cutoff(fp, g, rs_cutoff=3.0, re_cutoff=4.0, eps=1e-16, **kwargs): + """ + Cosine cutoff function. + Modification of eq. 24 in https://doi.org/10.1002/qua.24927. + A small value has been added to the inverse distance to avoid division + by zero. + + Parameters: + fp: (N, Nnm) or (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The fingerprint. + g: (N, Nnm, 3) or (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The derivatives of the fingerprint. + rs_cutoff: float + The start of the cutoff function. + re_cutoff: float + The end of the cutoff function. + eps: float + A small number to avoid division by zero. + + Returns: + fp: (N, Nnm) or (Nnm*Nm+(Nnm*(Nnm-1)/2)) array + The fingerprint. + g: (N, Nnm, 3) or (Nnm*Nm+(Nnm*(Nnm-1)/2), 3) array + The derivatives of the fingerprint. + """ + # Calculate the inverse fingerprint with small number added + fp_inv = 1.0 / (fp + eps) + # Check if the derivatives are requested + if g is not None: + use_derivatives = True else: - distances, vec_distances = get_all_distances( - atoms, - not_masked=not_masked, - masked=masked, - nmi=nmi, - nmj_ind=nmj_ind, - mic=mic, - vector=use_derivatives, - wrap=wrap, - **kwargs, - ) - # Add small number to avoid division by zero to the distances - distances = distances + eps - # Calculate inverse distances - f = covrad / distances - # Calculate derivatives of inverse distances - if use_derivatives: - gij = vec_distances * (covrad / (distances**3)).reshape(-1, 1) + use_derivatives = False + # Calculate the cutoff function + fc, gc = cosine_cutoff( + fp_inv, + use_derivatives=use_derivatives, + xs_cutoff=rs_cutoff, + xe_cutoff=re_cutoff, + **kwargs, + ) + # If the derivatives are requested, calculate them if use_derivatives: - # Convert derivatives to the right matrix form - n_total = len(f) - g = np.zeros((n_total, len(not_masked) * 3)) - # The derivative of not fixed (not masked) and fixed atoms - n_nm_m = len(not_masked) * len(masked) - if n_nm_m: - i_g = np.repeat(np.arange(n_nm_m), 3) - j_g = 3 * np.arange(len(not_masked)).reshape(-1, 1) - j_g = j_g + np.array([0, 1, 2]) - j_g = np.tile(j_g, (1, len(masked))).reshape(-1) - g[i_g, j_g] = gij[:n_nm_m].reshape(-1) - # The derivative of not fixed (not masked) and not fixed atoms - if len(nmi): - i_g = np.repeat(np.arange(n_nm_m, n_total), 3) - j_gi = (3 * nmi.reshape(-1, 1) + np.array([0, 1, 2])).reshape(-1) - j_gj = (3 * nmj.reshape(-1, 1) + np.array([0, 1, 2])).reshape(-1) - g[i_g, j_gi] = gij[n_nm_m:].reshape(-1) - g[i_g, j_gj] = -g[i_g, j_gi] - return f, g - return f, None + gc *= fp_inv**2 + g = g * (fc + fp * gc)[..., None] + # Multiply the fingerprint with the cutoff function + fp = fp * fc + return fp, g diff --git a/catlearn/regression/gp/fingerprint/invdistances.py b/catlearn/regression/gp/fingerprint/invdistances.py index 3ca6ee2e..ccf2191c 100644 --- a/catlearn/regression/gp/fingerprint/invdistances.py +++ b/catlearn/regression/gp/fingerprint/invdistances.py @@ -1,54 +1,107 @@ -import numpy as np -import itertools -from .fingerprint import Fingerprint -from .geometry import get_inverse_distances +from .geometry import ( + fp_cosine_cutoff, + get_covalent_distances, + get_periodic_softmax, + get_periodic_sum, +) +from .distances import Distances -class InvDistances(Fingerprint): +class InvDistances(Distances): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The inverse distances are constructed as the fingerprint. + The inverse distances are scaled with covalent radii. + """ + def __init__( self, reduce_dimensions=True, use_derivatives=True, + wrap=True, + include_ncells=False, + periodic_sum=False, periodic_softmax=True, mic=False, - wrap=True, - eps=1e-16, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=False, + rs_cutoff=3.0, + re_cutoff=4.0, + dtype=float, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The inverse distance fingerprint constructer class. - The inverse distances are scaled with covalent radii. + Initialize the fingerprint constructor. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the arguments super().__init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, periodic_softmax=periodic_softmax, mic=mic, - wrap=wrap, - eps=eps, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, **kwargs, ) @@ -56,10 +109,17 @@ def update_arguments( self, reduce_dimensions=None, use_derivatives=None, + wrap=None, + include_ncells=None, + periodic_sum=None, periodic_softmax=None, mic=None, - wrap=None, - eps=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, + dtype=None, **kwargs, ): """ @@ -67,195 +127,173 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated instance itself. """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if use_derivatives is not None: - self.use_derivatives = use_derivatives - if periodic_softmax is not None: - self.periodic_softmax = periodic_softmax - if mic is not None: - self.mic = mic - if wrap is not None: - self.wrap = wrap - if eps is not None: - self.eps = abs(float(eps)) - return self - - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - i_nm = np.arange(n_nmasked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - ) - # Return the fingerprints and their derivatives - return fij, gij - - def element_setup( - self, - atoms, - indicies, - not_masked=None, - masked=None, - i_nm=None, - i_m=None, - nm_bool=True, - **kwargs, - ): - "Get all informations of the atoms and split them into types." - # Merge element type and their tags - combis = list(zip(atoms.get_atomic_numbers(), atoms.get_tags())) - # Find all unique combinations - unique_combis = np.array(list(set(combis))) - n_unique = len(unique_combis) - # Get the Booleans for what combination it belongs to - bools = np.all( - np.array(combis).reshape(-1, 1, 2) == unique_combis, axis=2 + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, + periodic_softmax=periodic_softmax, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + dtype=dtype, ) - if not nm_bool: - split_indicies = [indicies[ind] for ind in bools.T] - return split_indicies, split_indicies.copy(), n_unique - # Classify all non-fixed atoms in their unique combination - nmasked_indicies = [i_nm[ind] for ind in bools[not_masked].T] - # Classify all fixed atoms in their unique combination - masked_indicies = [i_m[ind] for ind in bools[masked].T] - return nmasked_indicies, masked_indicies, n_unique + if use_cutoff is not None: + self.use_cutoff = use_cutoff + if rs_cutoff is not None: + self.rs_cutoff = abs(float(rs_cutoff)) + if re_cutoff is not None: + self.re_cutoff = abs(float(re_cutoff)) + return self - def get_contributions( + def calc_fp( self, - atoms, + dist, + dist_vec, not_masked, masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, + nmi, + nmj, + nmi_ind, + nmj_ind, + atomic_numbers, + tags=None, + use_include_ncells=False, + use_periodic_sum=False, + use_periodic_softmax=False, **kwargs, ): - # Get the indicies for not fixed and not fixed atoms interactions - nmi, nmj = np.triu_indices(n_nmasked, k=1, m=None) - nmi_ind = not_masked[nmi] - nmj_ind = not_masked[nmj] - f, g = get_inverse_distances( - atoms, + "Calculate the fingerprint." + # Add small number to avoid division by zero to the distances + dist += self.eps + # Get the covalent distances + covdis = get_covalent_distances( + atomic_numbers=atomic_numbers, + not_masked=not_masked, + masked=masked, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + dtype=self.dtype, + ) + # Set the correct shape of the covalent distances + if use_include_ncells or use_periodic_sum or use_periodic_softmax: + covdis = covdis[None, ...] + # Calculate the fingerprint + fp = covdis / dist + # Check what distance method should be used + if use_periodic_softmax: + # Calculate the fingerprint with the periodic softmax + fp, g = get_periodic_softmax( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + covdis=covdis, + use_inv_dis=True, + use_derivatives=self.use_derivatives, + eps=self.eps, + **kwargs, + ) + elif use_periodic_sum: + # Calculate the fingerprint with the periodic sum + fp, g = get_periodic_sum( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + use_inv_dis=True, + use_derivatives=self.use_derivatives, + **kwargs, + ) + else: + # Get the derivative of the fingerprint + if self.use_derivatives: + g = dist_vec * (fp / (dist**2))[..., None] + else: + g = None + # Apply the cutoff function + if self.use_cutoff: + fp, g = self.apply_cutoff(fp, g, **kwargs) + # Update the fingerprint with the modification + fp, g = self.modify_fp( + fp=fp, + g=g, + atomic_numbers=atomic_numbers, + tags=tags, not_masked=not_masked, masked=masked, nmi=nmi, nmj=nmj, nmi_ind=nmi_ind, nmj_ind=nmj_ind, - use_derivatives=self.use_derivatives, - use_covrad=True, - periodic_softmax=self.periodic_softmax, - mic=self.mic, - wrap=self.wrap, - eps=self.eps, + use_include_ncells=use_include_ncells, **kwargs, ) - return f, g, nmi, nmj + return fp, g - def get_indicies( - self, - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, - **kwargs, - ): - "Get all the indicies of the interactions." - # Make the indicies of not fixed and fixed atoms interactions - indicies_nm_m = np.arange(n_nm_m, dtype=int).reshape( - n_nmasked, n_masked - ) - # Make the indicies of not fixed and fixed atoms interactions - indicies_nm_nm = np.zeros((n_nmasked, n_nmasked), dtype=int) - indicies_nm_nm[nmi, nmj] = indicies_nm_nm[nmj, nmi] = np.arange( - n_nm_m, n_total, dtype=int + def apply_cutoff(self, fp, g, **kwargs): + "Get the cutoff function." + return fp_cosine_cutoff( + fp, + g, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + eps=self.eps, + **kwargs, ) - return indicies_nm_m, indicies_nm_nm - - def get_indicies_combination( - self, - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, - **kwargs, - ): - """ - Get all the indicies in the fingerprint for - the specific combination of atom types. - """ - indicies_comb = [] - i_nm_ci = nmasked_indicies[ci].reshape(-1, 1) - if ci == cj: - indicies_comb = list( - indicies_nm_m[i_nm_ci, masked_indicies[cj]].reshape(-1) - ) - ind_prod = np.array( - list(itertools.combinations(nmasked_indicies[ci], 2)) - ) - if len(ind_prod): - indicies_comb = indicies_comb + list( - indicies_nm_nm[ind_prod[:, 0], ind_prod[:, 1]] - ) - else: - indicies_comb = list( - indicies_nm_m[i_nm_ci, masked_indicies[cj]].reshape(-1) - ) - indicies_comb = indicies_comb + list( - indicies_nm_m[ - nmasked_indicies[cj].reshape(-1, 1), masked_indicies[ci] - ].reshape(-1) - ) - indicies_comb = indicies_comb + list( - indicies_nm_nm[i_nm_ci, nmasked_indicies[cj]].reshape(-1) - ) - return indicies_comb, len(indicies_comb) def get_arguments(self): "Get the arguments of the class itself." @@ -263,10 +301,17 @@ def get_arguments(self): arg_kwargs = dict( reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, + wrap=self.wrap, + include_ncells=self.include_ncells, + periodic_sum=self.periodic_sum, periodic_softmax=self.periodic_softmax, mic=self.mic, - wrap=self.wrap, - eps=self.eps, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/fingerprint/invdistances2.py b/catlearn/regression/gp/fingerprint/invdistances2.py index 7197bf1c..b5eed15d 100644 --- a/catlearn/regression/gp/fingerprint/invdistances2.py +++ b/catlearn/regression/gp/fingerprint/invdistances2.py @@ -2,79 +2,43 @@ class InvDistances2(InvDistances): - def __init__( - self, - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=True, - mic=False, - wrap=True, - eps=1e-16, - **kwargs, - ): - """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The inverse squared distance fingerprint constructer class. - The inverse squared distances are scaled with covalent radii. - - Parameters: - reduce_dimensions : bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Calculate and store derivatives of the fingerprint wrt. - the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances when - periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. - """ - # Set the arguments - super().__init__( - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - periodic_softmax=periodic_softmax, - mic=mic, - wrap=wrap, - eps=eps, - **kwargs, - ) + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The squared inverse distances are constructed as the fingerprint. + The squared inverse distances are scaled with covalent radii. + """ - def get_contributions( + def modify_fp( self, - atoms, + fp, + g, + atomic_numbers, + tags, not_masked, masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, + nmi, + nmj, + nmi_ind, + nmj_ind, + use_include_ncells=False, **kwargs, ): - # Get the fingerprint and indicies from InvDistances - f, g, nmi, nmj = super().get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - **kwargs, - ) + "Modify the fingerprint." + # Adjust the derivatives so they are squared + if g is not None: + g = (2.0 * fp)[..., None] * g + g = self.insert_to_deriv_matrix( + g=g, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + use_include_ncells=use_include_ncells, + ) + # Reshape the fingerprint + if use_include_ncells: + fp = fp.reshape(-1) # Adjust the fingerprint so it is squared - if self.use_derivatives: - g = (2.0 * f).reshape(-1, 1) * g - f = f**2 - return f, g, nmi, nmj + fp = fp**2 + return fp, g diff --git a/catlearn/regression/gp/fingerprint/meandistances.py b/catlearn/regression/gp/fingerprint/meandistances.py index f7207117..35223f98 100644 --- a/catlearn/regression/gp/fingerprint/meandistances.py +++ b/catlearn/regression/gp/fingerprint/meandistances.py @@ -1,124 +1,95 @@ -import numpy as np -from .invdistances import InvDistances +from numpy import asarray, zeros +from .sumdistances import SumDistances -class MeanDistances(InvDistances): - def __init__( +class MeanDistances(SumDistances): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The mean of inverse distance fingerprint constructer class. + The inverse distances are scaled with covalent radii. + """ + + def modify_fp_pairs( self, - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=True, - mic=False, - wrap=True, - eps=1e-16, + fp, + g, + not_masked, + use_include_ncells, + split_indices_nm, + split_indices, **kwargs, ): - """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The mean of inverse distance fingerprint constructer class. - The inverse distances are scaled with covalent radii. - - Parameters: - reduce_dimensions : bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Calculate and store derivatives of the fingerprint wrt. - the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances when - periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. - """ - # Set the arguments - super().__init__( - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - periodic_softmax=periodic_softmax, - mic=mic, - wrap=wrap, - eps=eps, - **kwargs, - ) - - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - indicies = np.arange(n_atoms) - i_nm = np.arange(n_nmasked) - i_m = np.arange(n_masked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - ) - # Get all the indicies of the interactions - indicies_nm_m, indicies_nm_nm = self.get_indicies( - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, + # Mean the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.mean(axis=0) + if g is not None: + g = g.mean(axis=0) + # Make the new fingerprint + fp_new = zeros( + (len(split_indices_nm), len(split_indices)), + dtype=self.dtype, ) - # Make the arrays of fingerprints and their derivatives - f = [] - g = [] - # Get all informations of the atoms and split them into types - nmasked_indicies, masked_indicies, n_unique = self.element_setup( - atoms, - indicies, - not_masked, - masked, - i_nm, - i_m, - nm_bool=True, - ) - # Get all combinations of the atom types - combinations = zip(*np.triu_indices(n_unique, k=0, m=None)) - # Run over all combinations - for ci, cj in combinations: - # Find the indicies in the fingerprints for the combinations - indicies_comb, len_i_comb = self.get_indicies_combination( - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, + # Calculate the new derivatives + if g is not None: + # Make the new derivatives + g_new = zeros( + ( + len(split_indices_nm), + len(split_indices), + len(not_masked), + 3, + ), + dtype=self.dtype, ) - if len_i_comb: - # Mean the fingerprints for the combinations - f, g = self.mean_fp(f, g, fij, gij, indicies_comb) - return np.array(f), np.array(g) + # Mean the fingerprint and derivatives + for i, i_v in enumerate(split_indices_nm.values()): + fp_i = fp[i_v] + g_i = g[i_v] + g_ij = g_i[:, not_masked].sum(axis=0) + for j, (comb, j_v) in enumerate(split_indices.items()): + fp_new[i, j] = fp_i[:, j_v].mean() + n_comb = len(i_v) * len(j_v) + g_new[i, j, i_v] = g_i[:, j_v].sum(axis=1) / n_comb + if comb in split_indices_nm: + ij_comb = split_indices_nm[comb] + g_new[i, j, ij_comb] -= g_ij[ij_comb] / n_comb + return fp_new.reshape(-1), g_new.reshape(-1, len(not_masked) * 3) + # Mean the fingerprints + for i, i_v in enumerate(split_indices_nm.values()): + fp_i = fp[i_v] + for j, j_v in enumerate(split_indices.values()): + fp_new[i, j] = fp_i[:, j_v].mean() + return fp_new.reshape(-1), None - def mean_fp(self, f, g, fij, gij, indicies_comb, **kwargs): - "Mean of the fingerprints." - f.append(np.mean(fij[indicies_comb])) - if self.use_derivatives: - g.append(np.mean(gij[indicies_comb], axis=0)) - return f, g + def modify_fp_elements( + self, + fp, + g, + not_masked, + use_include_ncells, + split_indices_nm, + **kwargs, + ): + # Mean the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.mean(axis=0) + if g is not None: + g = g.mean(axis=0) + # Mean the fingerprints + n_atoms = fp.shape[1] + fp = fp.mean(axis=1) + fp = asarray( + [fp[i_v].mean() for i_v in split_indices_nm.values()], + dtype=self.dtype, + ) + # Calculate the new derivatives + if g is not None: + g_new = zeros((len(split_indices_nm), len(not_masked), 3)) + for i, i_v in enumerate(split_indices_nm.values()): + g_new[i, i_v] = g[i_v].sum(axis=1) + g_new[i] -= g[i_v][:, not_masked].sum(axis=0) + g_new[i] /= len(i_v) * n_atoms + g_new = g_new.reshape(-1, len(not_masked) * 3) + return fp, g_new + return fp, None diff --git a/catlearn/regression/gp/fingerprint/meandistancespower.py b/catlearn/regression/gp/fingerprint/meandistancespower.py index c53e5e64..3bd34409 100644 --- a/catlearn/regression/gp/fingerprint/meandistancespower.py +++ b/catlearn/regression/gp/fingerprint/meandistancespower.py @@ -1,240 +1,96 @@ -import numpy as np -from .meandistances import MeanDistances +from numpy import asarray, zeros +from .sumdistancespower import SumDistancesPower -class MeanDistancesPower(MeanDistances): - def __init__( - self, - reduce_dimensions=True, - use_derivatives=True, - periodic_softmax=True, - mic=False, - wrap=True, - eps=1e-16, - power=2, - use_roots=True, - **kwargs, - ): - """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The mean of dfferent powers of - the inverse distances fingerprint constructer class. - The inverse distances are scaled with covalent radii. +class MeanDistancesPower(SumDistancesPower): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The mean of multiple powers of the inverse distance fingerprint + constructer class. + The inverse distances are scaled with covalent radii. + """ - Parameters: - reduce_dimensions : bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Calculate and store derivatives of the fingerprint wrt. - the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances when - periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. - power: int - The power of the inverse distances. - use_roots: bool - Whether to use roots of the power elements. - """ - # Set the arguments - super().__init__( - reduce_dimensions=reduce_dimensions, - use_derivatives=use_derivatives, - periodic_softmax=periodic_softmax, - mic=mic, - wrap=wrap, - eps=eps, - power=power, - use_roots=use_roots, - **kwargs, - ) - - def update_arguments( + def modify_fp_pairs( self, - reduce_dimensions=None, - use_derivatives=None, - periodic_softmax=None, - mic=None, - wrap=None, - eps=None, - power=None, - use_roots=None, + fp, + g, + not_masked, + use_include_ncells, + split_indices_nm, + split_indices, **kwargs, ): - """ - Update the class with its arguments. - The existing arguments are used if they are not given. - - Parameters: - reduce_dimensions : bool - Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool - Calculate and store derivatives of the fingerprint wrt. - the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances - when periodic boundary conditions are used. - mic : bool - Minimum Image Convention (Shortest distances when - periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. - mic is faster than periodic_softmax, - but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. - power: int - The power of the inverse distances. - use_roots: bool - Whether to use roots of the power elements. - - Returns: - self: The updated instance itself. - """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if use_derivatives is not None: - self.use_derivatives = use_derivatives - if periodic_softmax is not None: - self.periodic_softmax = periodic_softmax - if mic is not None: - self.mic = mic - if wrap is not None: - self.wrap = wrap - if eps is not None: - self.eps = abs(float(eps)) - if power is not None: - self.power = int(power) - if use_roots is not None: - self.use_roots = use_roots - return self - - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - indicies = np.arange(n_atoms) - i_nm = np.arange(n_nmasked) - i_m = np.arange(n_masked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - ) - # Get all the indicies of the interactions - indicies_nm_m, indicies_nm_nm = self.get_indicies( - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, + # Mean the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.mean(axis=0) + if g is not None: + g = g.mean(axis=0) + # Make the new fingerprint + fp_new = zeros( + (len(split_indices_nm), len(split_indices)), + dtype=self.dtype, ) - # Make the arrays of fingerprints and their derivatives - f = [] - g = [] - # Get all informations of the atoms and split them into types - nmasked_indicies, masked_indicies, n_unique = self.element_setup( - atoms, - indicies, - not_masked, - masked, - i_nm, - i_m, - nm_bool=True, - ) - # Get all combinations of the atom types - combinations = zip(*np.triu_indices(n_unique, k=0, m=None)) - # Run over all combinations - for ci, cj in combinations: - # Find the indicies in the fingerprints for the combinations - indicies_comb, len_i_comb = self.get_indicies_combination( - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, + # Calculate the new derivatives + if g is not None: + # Make the new derivatives + g_new = zeros( + ( + len(split_indices_nm), + len(split_indices), + len(not_masked), + 3, + ), + dtype=self.dtype, ) - if len_i_comb: - # Mean the fingerprints for the combinations - f, g = self.mean_fp_power( - f, g, fij, gij, indicies_comb, len_i_comb - ) - return np.array(f), np.array(g) + # Mean the fingerprint and derivatives + for i, i_v in enumerate(split_indices_nm.values()): + fp_i = fp[i_v] + g_i = g[i_v] + g_ij = g_i[:, not_masked].sum(axis=0) + for j, (comb, j_v) in enumerate(split_indices.items()): + fp_new[i, j] = fp_i[:, j_v].mean() + n_comb = len(i_v) * len(j_v) + g_new[i, j, i_v] = g_i[:, j_v].sum(axis=1) / n_comb + if comb in split_indices_nm: + ij_comb = split_indices_nm[comb] + g_new[i, j, ij_comb] -= g_ij[ij_comb] / n_comb + return fp_new.reshape(-1), g_new.reshape(-1, len(not_masked) * 3) + # Mean the fingerprints + for i, i_v in enumerate(split_indices_nm.values()): + fp_i = fp[i_v] + for j, j_v in enumerate(split_indices.values()): + fp_new[i, j] = fp_i[:, j_v].mean() + return fp_new.reshape(-1), None - def mean_fp_power( + def modify_fp_elements( self, - f, + fp, g, - fij, - gij, - indicies_comb, - len_i_comb, + not_masked, + use_include_ncells, + split_indices_nm, **kwargs, ): - "Mean of the fingerprints." - powers = np.arange(1, self.power + 1) - fij_powers = fij[indicies_comb].reshape(-1, 1) ** powers - fij_means = np.mean(fij_powers, axis=0) - if self.use_roots: - f.extend(fij_means ** (1.0 / powers)) - else: - f.extend(fij_means) - if self.use_derivatives: - g.append(np.mean(gij[indicies_comb], axis=0)) - fg_prod = np.mean( - fij_powers[:, :-1].T.reshape(self.power - 1, len_i_comb, 1) - * gij[indicies_comb], - axis=1, - ) - if self.use_roots: - fpowers = (1.0 - powers[1:]) / powers[1:] - g.extend(fg_prod * (fij_means[1:] ** fpowers).reshape(-1, 1)) - else: - g.extend(powers[1:].reshape(-1, 1) * fg_prod) - return f, g - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - reduce_dimensions=self.reduce_dimensions, - use_derivatives=self.use_derivatives, - periodic_softmax=self.periodic_softmax, - mic=self.mic, - wrap=self.wrap, - eps=self.eps, - power=self.power, - use_roots=self.use_roots, + # Mean the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.mean(axis=0) + if g is not None: + g = g.mean(axis=0) + # Mean the fingerprints + n_atoms = fp.shape[1] + fp = fp.mean(axis=1) + fp = asarray( + [fp[i_v].mean() for i_v in split_indices_nm.values()], + dtype=self.dtype, ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs + # Calculate the new derivatives + if g is not None: + g_new = zeros((len(split_indices_nm), len(not_masked), 3)) + for i, i_v in enumerate(split_indices_nm.values()): + g_new[i, i_v] = g[i_v].sum(axis=1) + g_new[i] -= g[i_v][:, not_masked].sum(axis=0) + g_new[i] /= len(i_v) * n_atoms + g_new = g_new.reshape(-1, len(not_masked) * 3) + return fp, g_new + return fp, None diff --git a/catlearn/regression/gp/fingerprint/sorteddistances.py b/catlearn/regression/gp/fingerprint/sorteddistances.py index dc4cf544..659dca44 100644 --- a/catlearn/regression/gp/fingerprint/sorteddistances.py +++ b/catlearn/regression/gp/fingerprint/sorteddistances.py @@ -1,147 +1,344 @@ -import numpy as np +from numpy import argsort, concatenate from .invdistances import InvDistances -class SortedDistances(InvDistances): +class SortedInvDistances(InvDistances): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The sorted inverse distance fingerprint constructer class. + The inverse distances are scaled with covalent radii. + """ + def __init__( self, reduce_dimensions=True, use_derivatives=True, + wrap=True, + include_ncells=False, + periodic_sum=False, periodic_softmax=True, mic=False, - wrap=True, - eps=1e-16, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=False, + rs_cutoff=3.0, + re_cutoff=4.0, + dtype=float, + use_tags=False, + use_sort_all=False, + reuse_combinations=True, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The sorted inverse distance fingerprint constructer class. - The inverse distances are scaled with covalent radii. + Initialize the fingerprint class. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_sort_all: bool + Whether sort all the the combinations independently of the + pairs. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. """ # Set the arguments super().__init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + wrap=wrap, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + include_ncells=include_ncells, + periodic_sum=periodic_sum, periodic_softmax=periodic_softmax, mic=mic, - wrap=wrap, - eps=eps, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + use_tags=use_tags, + use_sort_all=use_sort_all, + reuse_combinations=reuse_combinations, **kwargs, ) - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - indicies = np.arange(n_atoms) - i_nm = np.arange(n_nmasked) - i_m = np.arange(n_masked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - ) - # Get all the indicies of the interactions - indicies_nm_m, indicies_nm_nm = self.get_indicies( - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, - ) - # Make the arrays of fingerprints and their derivatives - f = np.zeros(n_total) - g = np.zeros((n_total, int(n_nmasked * 3))) - # Get all informations of the atoms and split them into types - nmasked_indicies, masked_indicies, n_unique = self.element_setup( - atoms, - indicies, + def modify_fp( + self, + fp, + g, + atomic_numbers, + tags, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + use_include_ncells=False, + **kwargs, + ): + "Modify the fingerprint." + # Sort the fingerprint + if self.use_sort_all: + fp, indices = self.sort_fp_all( + fp, + use_include_ncells=use_include_ncells, + **kwargs, + ) + else: + fp, indices = self.sort_fp_pair( + fp, + atomic_numbers, + tags, + not_masked, + masked, + use_include_ncells=use_include_ncells, + **kwargs, + ) + # Sort the fingerprints and their derivatives + fp = fp[indices] + # Insert the derivatives into the derivative matrix + if g is not None: + g = self.insert_to_deriv_matrix( + g=g, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + use_include_ncells=use_include_ncells, + ) + g = g[indices] + return fp, g + + def sort_fp_all(self, fp, use_include_ncells=False, **kwargs): + "Get the indices for sorting the fingerprint." + # Reshape the fingerprint + if use_include_ncells: + fp = fp.reshape(-1) + # Get the sorted indices + indices = argsort(fp) + return fp, indices + + def sort_fp_pair( + self, + fp, + atomic_numbers, + tags, + not_masked, + masked, + use_include_ncells=False, + **kwargs, + ): + "Get the indices for sorting the fingerprint." + # Get the indices of the atomic combinations + split_indices = self.element_setup( + atomic_numbers, + tags, not_masked, masked, - i_nm, - i_m, - nm_bool=True, + use_include_ncells=use_include_ncells, + c_dim=len(fp), + **kwargs, ) - # Get all combinations of the atom types - combinations = zip(*np.triu_indices(n_unique, k=0, m=None)) - temp_len = 0 - # Run over all combinations - for ci, cj in combinations: - # Find the indicies in the fingerprints for the combinations - indicies_comb, len_i_comb = self.get_indicies_combination( - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, - ) - if len_i_comb: - # Sort the fingerprints for the combinations - len_new = temp_len + len_i_comb - f, g = self.sort_fp( - f, - g, - fij, - gij, - indicies_comb, - temp_len, - len_new, - ) - temp_len = len_new - return f, g + # Reshape the fingerprint + if use_include_ncells: + fp = fp.reshape(-1) + # Sort the indices after inverse distance magnitude + indices = [indi[argsort(fp[indi])] for indi in split_indices.values()] + indices = concatenate(indices) + return fp, indices - def sort_fp( + def update_arguments( self, - f, - g, - fij, - gij, - indicies_comb, - temp_len, - len_new, + reduce_dimensions=None, + use_derivatives=None, + wrap=None, + include_ncells=None, + periodic_sum=None, + periodic_softmax=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, + dtype=None, + use_tags=None, + use_sort_all=None, + reuse_combinations=None, **kwargs, ): - "Sort the fingerprints after inverse distance magnitude." - i_sort = np.argsort(fij[indicies_comb])[::-1] - i_sort = np.array(indicies_comb)[i_sort] - f[temp_len:len_new] = fij[i_sort] - if self.use_derivatives: - g[temp_len:len_new] = gij[i_sort] - return f, g + """ + Update the class with its arguments. + The existing arguments are used if they are not given. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_derivatives: bool + Calculate and store derivatives of the fingerprint wrt. + the cartesian coordinates. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells + when periodic boundary conditions are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic is faster than periodic_softmax, + but the derivatives are discontinuous. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_sort_all: bool + Whether sort all the the combinations independently of the + pairs. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. + + Returns: + self: The updated instance itself. + """ + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, + periodic_softmax=periodic_softmax, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + ) + if use_tags is not None: + self.use_tags = use_tags + if use_sort_all is not None: + self.use_sort_all = use_sort_all + if reuse_combinations is not None: + self.reuse_combinations = reuse_combinations + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + reduce_dimensions=self.reduce_dimensions, + use_derivatives=self.use_derivatives, + wrap=self.wrap, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + include_ncells=self.include_ncells, + periodic_sum=self.periodic_sum, + periodic_softmax=self.periodic_softmax, + mic=self.mic, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + dtype=self.dtype, + use_tags=self.use_tags, + use_sort_all=self.use_sort_all, + reuse_combinations=self.reuse_combinations, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/fingerprint/sumdistances.py b/catlearn/regression/gp/fingerprint/sumdistances.py index a3644291..ecf6c95f 100644 --- a/catlearn/regression/gp/fingerprint/sumdistances.py +++ b/catlearn/regression/gp/fingerprint/sumdistances.py @@ -1,124 +1,563 @@ -import numpy as np +from numpy import arange, asarray, zeros +from ase.data import covalent_radii from .invdistances import InvDistances +from ..fingerprint.geometry import ( + check_atoms, + get_full_distance_matrix, + get_periodic_softmax, + get_periodic_sum, +) class SumDistances(InvDistances): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The sum of inverse distance fingerprint constructer class. + The inverse distances are scaled with covalent radii. + """ + def __init__( self, reduce_dimensions=True, use_derivatives=True, + wrap=True, + include_ncells=False, + periodic_sum=False, periodic_softmax=True, mic=False, - wrap=True, - eps=1e-16, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=False, + rs_cutoff=3.0, + re_cutoff=4.0, + dtype=float, + use_tags=False, + use_pairs=True, + reuse_combinations=True, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The sum of inverse distance fingerprint constructer class. - The inverse distances are scaled with covalent radii. + Initialize the fingerprint constructor. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_pairs: bool + Whether to use pairs of elements or use all elements. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. """ # Set the arguments super().__init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, periodic_softmax=periodic_softmax, mic=mic, - wrap=wrap, - eps=eps, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + use_tags=use_tags, + use_pairs=use_pairs, + reuse_combinations=reuse_combinations, **kwargs, ) - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - indicies = np.arange(n_atoms) - i_nm = np.arange(n_nmasked) - i_m = np.arange(n_masked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, + def modify_fp( + self, + fp, + g, + atomic_numbers, + tags, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + use_include_ncells, + **kwargs, + ): + "Modify the fingerprint." + # Get the indices of the atomic combinations + split_indices_nm, split_indices = self.element_setup( + atomic_numbers, + tags, not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, + **kwargs, ) - # Get all the indicies of the interactions - indicies_nm_m, indicies_nm_nm = self.get_indicies( - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, + # Modify the fingerprint + if self.use_pairs: + # Use pairs of elements + fp, g = self.modify_fp_pairs( + fp=fp, + g=g, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + split_indices=split_indices, + **kwargs, + ) + else: + # Use all elements + fp, g = self.modify_fp_elements( + fp=fp, + g=g, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + **kwargs, + ) + return fp, g + + def modify_fp_pairs( + self, + fp, + g, + not_masked, + use_include_ncells, + split_indices_nm, + split_indices, + **kwargs, + ): + "Modify the fingerprint over pairs of elements." + # Sum the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.sum(axis=0) + if g is not None: + g = g.sum(axis=0) + # Make the new fingerprint + fp_new = zeros( + (len(split_indices_nm), len(split_indices)), + dtype=self.dtype, ) - # Make the arrays of fingerprints and their derivatives - f = [] - g = [] - # Get all informations of the atoms and split them into types - nmasked_indicies, masked_indicies, n_unique = self.element_setup( - atoms, - indicies, - not_masked, - masked, - i_nm, - i_m, - nm_bool=True, + # Sum the fingerprints + for i, i_v in enumerate(split_indices_nm.values()): + fp_i = fp[i_v] + for j, j_v in enumerate(split_indices.values()): + fp_new[i, j] = fp_i[:, j_v].sum() + fp_new = fp_new.reshape(-1) + # Calculate the new derivatives + if g is not None: + # Make the new derivatives + g_new = zeros( + ( + len(split_indices_nm), + len(split_indices), + len(not_masked), + 3, + ), + dtype=self.dtype, + ) + # Sum the derivatives + for i, i_v in enumerate(split_indices_nm.values()): + g_i = g[i_v] + g_ij = g_i[:, not_masked].sum(axis=0) + for j, (comb, j_v) in enumerate(split_indices.items()): + g_new[i, j, i_v] = g_i[:, j_v].sum(axis=1) + if comb in split_indices_nm: + ij_comb = split_indices_nm[comb] + g_new[i, j, ij_comb] -= g_ij[ij_comb] + g_new = g_new.reshape(-1, len(not_masked) * 3) + return fp_new, g_new + return fp_new, None + + def modify_fp_elements( + self, + fp, + g, + not_masked, + use_include_ncells, + split_indices_nm, + **kwargs, + ): + "Modify the fingerprint over all elements." + # Sum the fingerprints and derivatives if neighboring cells are used + if use_include_ncells: + fp = fp.sum(axis=0) + if g is not None: + g = g.sum(axis=0) + # Sum the fingerprints + fp = fp.sum(axis=1) + fp = asarray( + [fp[i_v].sum() for i_v in split_indices_nm.values()], + dtype=self.dtype, ) - # Get all combinations of the atom types - combinations = zip(*np.triu_indices(n_unique, k=0, m=None)) - # Run over all combinations - for ci, cj in combinations: - # Find the indicies in the fingerprints for the combinations - indicies_comb, len_i_comb = self.get_indicies_combination( - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, + # Calculate the new derivatives + if g is not None: + g_new = zeros((len(split_indices_nm), len(not_masked), 3)) + for i, i_v in enumerate(split_indices_nm.values()): + g_new[i, i_v] = g[i_v].sum(axis=1) + g_new[i] -= g[i_v][:, not_masked].sum(axis=0) + g_new = g_new.reshape(-1, len(not_masked) * 3) + return fp, g_new + return fp, None + + def update_arguments( + self, + reduce_dimensions=None, + use_derivatives=None, + wrap=None, + include_ncells=None, + periodic_sum=None, + periodic_softmax=None, + mic=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, + dtype=None, + use_tags=None, + use_pairs=None, + reuse_combinations=None, + **kwargs, + ): + """ + Update the class with its arguments. + The existing arguments are used if they are not given. + + Parameters: + reduce_dimensions: bool + Whether to reduce the fingerprint space if constrains are used. + use_derivatives: bool + Calculate and store derivatives of the fingerprint wrt. + the cartesian coordinates. + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells + when periodic boundary conditions are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic is faster than periodic_softmax, + but the derivatives are discontinuous. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_pairs: bool + Whether to use pairs of elements or use all elements. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. + + Returns: + self: The updated instance itself. + """ + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, + periodic_softmax=periodic_softmax, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + ) + if use_tags is not None: + self.use_tags = use_tags + if use_pairs is not None: + self.use_pairs = use_pairs + if reuse_combinations is not None: + self.reuse_combinations = reuse_combinations + if not hasattr(self, "split_indices_nm"): + self.split_indices_nm = None + return self + + def calc_fp( + self, + dist, + dist_vec, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + atomic_numbers, + tags=None, + use_include_ncells=False, + use_periodic_sum=False, + use_periodic_softmax=False, + **kwargs, + ): + "Calculate the fingerprint." + # Add small number to avoid division by zero to the distances + dist += self.eps + # Get the covalent distances + covdis = self.get_covalent_distances( + atomic_numbers=atomic_numbers, + not_masked=not_masked, + ) + # Check if the distances include the neighboring cells + if use_include_ncells or use_periodic_sum or use_periodic_softmax: + covdis = covdis[None, ...] + # Get the index of the not masked atoms + i_nm = arange(len(not_masked)) + # Calculate the inverse distances + fp = covdis / dist + # Check what distance method should be used + if use_periodic_softmax: + # Calculate the fingerprint with the periodic softmax + fp, g = get_periodic_softmax( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + covdis=covdis, + use_inv_dis=True, + use_derivatives=self.use_derivatives, + eps=self.eps, + **kwargs, ) - if len_i_comb: - # Sum the fingerprints for the combinations - f, g = self.sum_fp(f, g, fij, gij, indicies_comb) - return np.array(f), np.array(g) + elif use_periodic_sum: + # Calculate the fingerprint with the periodic sum + fp, g = get_periodic_sum( + dist_eps=dist, + dist_vec=dist_vec, + fpinner=fp, + use_inv_dis=True, + use_derivatives=self.use_derivatives, + **kwargs, + ) + else: + # Get the derivative of the fingerprint + if self.use_derivatives: + g = dist_vec * (fp / (dist**2))[..., None] + else: + g = None + # Apply the cutoff function + if self.use_cutoff: + fp, g = self.apply_cutoff(fp, g, **kwargs) + # Remove self interaction + fp[..., i_nm, not_masked] = 0.0 + # Update the fingerprint with the modification + fp, g = self.modify_fp( + fp=fp, + g=g, + atomic_numbers=atomic_numbers, + tags=tags, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + nmi_ind=nmi_ind, + nmj_ind=nmj_ind, + use_include_ncells=use_include_ncells, + **kwargs, + ) + return fp, g + + def get_distances( + self, + atoms, + not_masked=None, + masked=None, + nmi=None, + nmj=None, + nmi_ind=None, + nmj_ind=None, + use_vector=False, + include_ncells=False, + mic=False, + **kwargs, + ): + """ + Get the distances and their vectors. + """ + return get_full_distance_matrix( + atoms=atoms, + not_masked=not_masked, + use_vector=use_vector, + wrap=self.wrap, + include_ncells=include_ncells, + mic=mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + dtype=self.dtype, + ) + + def get_covalent_distances(self, atomic_numbers, not_masked): + "Get the covalent distances of the atoms." + cov_dis = covalent_radii[atomic_numbers] + return asarray(cov_dis + cov_dis[not_masked, None], dtype=self.dtype) - def sum_fp(self, f, g, fij, gij, indicies_comb, **kwargs): - "Sum of the fingerprints." - f.append(np.sum(fij[indicies_comb])) - if self.use_derivatives: - g.append(np.sum(gij[indicies_comb], axis=0)) - return f, g + def element_setup( + self, + atomic_numbers, + tags, + not_masked, + **kwargs, + ): + """ + Get all informations of the atom combinations and split them + into types. + """ + # Check if the atomic setup is the same + if self.reuse_combinations: + if ( + self.atomic_numbers is not None + or self.not_masked is not None + or self.tags is not None + or self.split_indices is not None + or self.split_indices_nm is not None + ): + atoms_equal = check_atoms( + atomic_numbers=self.atomic_numbers, + atomic_numbers_test=atomic_numbers, + tags=self.tags, + tags_test=tags, + not_masked=self.not_masked, + not_masked_test=not_masked, + **kwargs, + ) + if atoms_equal: + return self.split_indices_nm, self.split_indices + # Save the atomic numbers and tags + self.atomic_numbers = atomic_numbers + self.tags = tags + self.not_masked = not_masked + # Get the atomic types of the atoms + if not self.use_tags: + tags = zeros((len(atomic_numbers)), dtype=int) + combis = list(zip(atomic_numbers, tags)) + split_indices = {} + for i, combi in enumerate(combis): + split_indices.setdefault(combi, []).append(i) + self.split_indices = split_indices + # Get the atomic types of the not masked atoms + combis = list(zip(atomic_numbers[not_masked], tags[not_masked])) + split_indices_nm = {} + for i, combi in enumerate(combis): + split_indices_nm.setdefault(combi, []).append(i) + self.split_indices_nm = split_indices_nm + return split_indices_nm, split_indices + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + reduce_dimensions=self.reduce_dimensions, + use_derivatives=self.use_derivatives, + wrap=self.wrap, + include_ncells=self.include_ncells, + periodic_sum=self.periodic_sum, + periodic_softmax=self.periodic_softmax, + mic=self.mic, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + dtype=self.dtype, + use_tags=self.use_tags, + use_pairs=self.use_pairs, + reuse_combinations=self.reuse_combinations, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/fingerprint/sumdistancespower.py b/catlearn/regression/gp/fingerprint/sumdistancespower.py index 73e37233..af3e2ccf 100644 --- a/catlearn/regression/gp/fingerprint/sumdistancespower.py +++ b/catlearn/regression/gp/fingerprint/sumdistancespower.py @@ -1,46 +1,102 @@ -import numpy as np +from numpy import zeros from .sumdistances import SumDistances class SumDistancesPower(SumDistances): + """ + Fingerprint constructor class that convert an atoms instance into + a fingerprint instance with vector and derivatives. + The sum of multiple powers of the inverse distance fingerprint + constructer class. + The inverse distances are scaled with covalent radii. + """ + def __init__( self, reduce_dimensions=True, use_derivatives=True, + wrap=True, + include_ncells=False, + periodic_sum=False, periodic_softmax=True, mic=False, - wrap=True, - eps=1e-16, - power=2, + all_ncells=True, + cell_cutoff=4.0, + use_cutoff=False, + rs_cutoff=3.0, + re_cutoff=4.0, + dtype=float, + use_tags=False, + use_pairs=True, + reuse_combinations=True, + power=4, use_roots=True, **kwargs, ): """ - Fingerprint constructer class that convert atoms object into - a fingerprint object with vector and derivatives. - The sum of dfferent powers of - the inverse distances fingerprint constructer class. - The inverse distances are scaled with covalent radii. + Initialize the fingerprint constructor. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_pairs: bool + Whether to use pairs of elements or use all elements. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. power: int The power of the inverse distances. use_roots: bool @@ -50,23 +106,225 @@ def __init__( super().__init__( reduce_dimensions=reduce_dimensions, use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, periodic_softmax=periodic_softmax, mic=mic, - wrap=wrap, - eps=eps, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + use_tags=use_tags, + use_pairs=use_pairs, + reuse_combinations=reuse_combinations, power=power, use_roots=use_roots, **kwargs, ) + def modify_fp( + self, + fp, + g, + atomic_numbers, + tags, + not_masked, + masked, + nmi, + nmj, + nmi_ind, + nmj_ind, + use_include_ncells, + **kwargs, + ): + # Get the indices of the atomic combinations + split_indices_nm, split_indices = self.element_setup( + atomic_numbers, + tags, + not_masked, + **kwargs, + ) + # Get the number of atomic combinations + if self.use_pairs: + fp_len = len(split_indices_nm) * len(split_indices) + else: + fp_len = len(split_indices_nm) + # Create the new fingerprint and derivatives + fp_new = zeros( + (fp_len, self.power), + dtype=self.dtype, + ) + g_new = zeros( + ( + fp_len, + self.power, + 3 * len(not_masked), + ), + dtype=self.dtype, + ) + # Loop over the powers + for p in range(self.power): + power = p + 1 + if power > 1: + # Calculate the power of the inverse distances at power > 1 + fp_new[:, p], g_new[:, p] = self.modify_fp_powers( + fp=fp, + g=g, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + split_indices=split_indices, + power=power, + ) + else: + # Special case for power equal to 1 + fp_new[:, p], g_new[:, p] = self.modify_fp_power1( + fp=fp, + g=g, + not_masked=not_masked, + masked=masked, + nmi=nmi, + nmj=nmj, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + split_indices=split_indices, + ) + # Reshape fingerprint and derivatives + fp_new = fp_new.reshape(-1) + # Return the new fingerprint and derivatives + if g is not None: + g_new = g_new.reshape(-1, 3 * len(not_masked)) + return fp_new, g_new + return fp_new, None + + def modify_fp_power1( + self, + fp, + g, + not_masked, + masked, + nmi, + nmj, + use_include_ncells, + split_indices_nm, + split_indices, + **kwargs, + ): + """ + Calculate the sum of the inverse distances at power = 1 + for each sets of atomic combinations. + """ + # Modify the fingerprint + if self.use_pairs: + # Use pairs of elements + fp_new, g_new = self.modify_fp_pairs( + fp=fp, + g=g, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + split_indices=split_indices, + **kwargs, + ) + else: + # Use all elements + fp_new, g_new = self.modify_fp_elements( + fp=fp, + g=g, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + **kwargs, + ) + # Add a small number to avoid division by zero + fp_new += self.eps + return fp_new, g_new + + def modify_fp_powers( + self, + fp, + g, + not_masked, + masked, + nmi, + nmj, + use_include_ncells, + split_indices_nm, + split_indices, + power, + **kwargs, + ): + """ + Calculate the sum of the inverse distances at power > 1 + for each sets of atomic combinations. + """ + # Calculate the power of the inverse distances + fp_new = fp**power + # Calculate the derivatives + if g is not None: + g_new = (fp ** (power - 1))[..., None] * g + else: + g_new = None + # Modify the fingerprint + if self.use_pairs: + # Use pairs of elements + fp_new, g_new = self.modify_fp_pairs( + fp=fp_new, + g=g_new, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + split_indices=split_indices, + **kwargs, + ) + else: + # Use all elements + fp_new, g_new = self.modify_fp_elements( + fp=fp_new, + g=g_new, + not_masked=not_masked, + use_include_ncells=use_include_ncells, + split_indices_nm=split_indices_nm, + **kwargs, + ) + # Add a small number to avoid division by zero + fp_new += self.eps + # Calculate the root of the sum + if self.use_roots: + if g is not None: + mroot = (1.0 / power) - 1.0 + g_new = g_new * (fp_new**mroot)[..., None] + root = 1.0 / power + fp_new = fp_new**root + else: + if g is not None: + g_new *= power + return fp_new, g_new + def update_arguments( self, reduce_dimensions=None, use_derivatives=None, + wrap=None, + include_ncells=None, + periodic_sum=None, periodic_softmax=None, mic=None, - wrap=None, - eps=None, + all_ncells=None, + cell_cutoff=None, + use_cutoff=None, + rs_cutoff=None, + re_cutoff=None, + dtype=None, + use_tags=None, + use_pairs=None, + reuse_combinations=None, power=None, use_roots=None, **kwargs, @@ -76,24 +334,66 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - reduce_dimensions : bool + reduce_dimensions: bool Whether to reduce the fingerprint space if constrains are used. - use_derivatives : bool + use_derivatives: bool Calculate and store derivatives of the fingerprint wrt. the cartesian coordinates. - periodic_softmax : bool - Use a softmax weighting of the squared distances + wrap: bool + Whether to wrap the atoms to the unit cell or not. + include_ncells: bool + Include the neighboring cells when calculating the distances. + The fingerprint will include the neighboring cells. + include_ncells will replace periodic_softmax and mic. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_sum: bool + Use a sum of the distances to neighboring cells when periodic boundary conditions are used. - mic : bool + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + periodic_softmax: bool + Use a softmax weighting on the distances to neighboring cells + from the squared distances when periodic boundary conditions + are used. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. + mic: bool Minimum Image Convention (Shortest distances when periodic boundary conditions are used). - Either use mic or periodic_softmax, not both. + Either use mic, periodic_sum, periodic_softmax, or + include_ncells. mic is faster than periodic_softmax, but the derivatives are discontinuous. - wrap: bool - Whether to wrap the atoms to the unit cell or not. - eps : float - Small number to avoid division by zero. + all_ncells: bool + Use all neighboring cells when calculating the distances. + cell_cutoff is used to check how many neighboring cells are + needed. + cell_cutoff: float + The cutoff distance for the neighboring cells. + It is the scaling of the maximum covalent distance. + use_cutoff: bool + Whether to use a cutoff function for the inverse distance + fingerprint. + The cutoff function is a cosine cutoff function. + rs_cutoff: float + The starting distance for the cutoff function being 1. + re_cutoff: float + The ending distance for the cutoff function being 0. + re_cutoff must be larger than rs_cutoff. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + use_tags: bool + Use the tags of the atoms to identify the atoms as + another type. + use_pairs: bool + Whether to use pairs of elements or use all elements. + reuse_combinations: bool + Whether to reuse the combinations of the elements. + The change in the atomic numbers and tags will be checked + to see if they are unchanged. + If False, the combinations are calculated each time. power: int The power of the inverse distances. use_roots: bool @@ -102,139 +402,50 @@ def update_arguments( Returns: self: The updated instance itself. """ - if reduce_dimensions is not None: - self.reduce_dimensions = reduce_dimensions - if use_derivatives is not None: - self.use_derivatives = use_derivatives - if periodic_softmax is not None: - self.periodic_softmax = periodic_softmax - if mic is not None: - self.mic = mic - if wrap is not None: - self.wrap = wrap - if eps is not None: - self.eps = abs(float(eps)) + super().update_arguments( + reduce_dimensions=reduce_dimensions, + use_derivatives=use_derivatives, + wrap=wrap, + include_ncells=include_ncells, + periodic_sum=periodic_sum, + periodic_softmax=periodic_softmax, + mic=mic, + all_ncells=all_ncells, + cell_cutoff=cell_cutoff, + use_cutoff=use_cutoff, + rs_cutoff=rs_cutoff, + re_cutoff=re_cutoff, + dtype=dtype, + use_tags=use_tags, + use_pairs=use_pairs, + reuse_combinations=reuse_combinations, + ) if power is not None: self.power = int(power) if use_roots is not None: self.use_roots = use_roots return self - def make_fingerprint(self, atoms, not_masked, masked, **kwargs): - "Calculate the fingerprint and its derivative." - # Set parameters of array sizes - n_atoms = len(atoms) - n_nmasked = len(not_masked) - n_masked = n_atoms - n_nmasked - n_nm_m = n_nmasked * n_masked - n_nm_nm = int(0.5 * n_nmasked * (n_nmasked - 1)) - n_total = n_nm_m + n_nm_nm - # Make indicies arrays - not_masked = np.array(not_masked, dtype=int) - masked = np.array(masked, dtype=int) - indicies = np.arange(n_atoms) - i_nm = np.arange(n_nmasked) - i_m = np.arange(n_masked) - # Calculate all the fingerprints and their derivatives - fij, gij, nmi, nmj = self.get_contributions( - atoms, - not_masked, - masked, - i_nm, - n_total, - n_nmasked, - n_masked, - n_nm_m, - ) - # Get all the indicies of the interactions - indicies_nm_m, indicies_nm_nm = self.get_indicies( - n_nmasked, - n_masked, - n_total, - n_nm_m, - nmi, - nmj, - ) - # Make the arrays of fingerprints and their derivatives - f = [] - g = [] - # Get all informations of the atoms and split them into types - nmasked_indicies, masked_indicies, n_unique = self.element_setup( - atoms, - indicies, - not_masked, - masked, - i_nm, - i_m, - nm_bool=True, - ) - # Get all combinations of the atom types - combinations = zip(*np.triu_indices(n_unique, k=0, m=None)) - # Run over all combinations - for ci, cj in combinations: - # Find the indicies in the fingerprints for the combinations - indicies_comb, len_i_comb = self.get_indicies_combination( - ci, - cj, - nmasked_indicies, - masked_indicies, - indicies_nm_m, - indicies_nm_nm, - ) - if len_i_comb: - # Sum the fingerprints for the combinations - f, g = self.sum_fp_power( - f, - g, - fij, - gij, - indicies_comb, - len_i_comb, - ) - return np.array(f), np.array(g) - - def sum_fp_power( - self, - f, - g, - fij, - gij, - indicies_comb, - len_i_comb, - **kwargs, - ): - "Sum of the fingerprints." - powers = np.arange(1, self.power + 1) - fij_powers = fij[indicies_comb].reshape(-1, 1) ** powers - fij_sums = np.sum(fij_powers, axis=0) - if self.use_roots: - f.extend(fij_sums ** (1.0 / powers)) - else: - f.extend(fij_sums) - if self.use_derivatives: - g.append(np.sum(gij[indicies_comb], axis=0)) - fg_prod = np.sum( - fij_powers[:, :-1].T.reshape(self.power - 1, len_i_comb, 1) - * gij[indicies_comb], - axis=1, - ) - if self.use_roots: - fpowers = (1.0 - powers[1:]) / powers[1:] - g.extend(fg_prod * (fij_sums[1:] ** fpowers).reshape(-1, 1)) - else: - g.extend(powers[1:].reshape(-1, 1) * fg_prod) - return f, g - def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization arg_kwargs = dict( reduce_dimensions=self.reduce_dimensions, use_derivatives=self.use_derivatives, + wrap=self.wrap, + include_ncells=self.include_ncells, + periodic_sum=self.periodic_sum, periodic_softmax=self.periodic_softmax, mic=self.mic, - wrap=self.wrap, - eps=self.eps, + all_ncells=self.all_ncells, + cell_cutoff=self.cell_cutoff, + use_cutoff=self.use_cutoff, + rs_cutoff=self.rs_cutoff, + re_cutoff=self.re_cutoff, + dtype=self.dtype, + use_tags=self.use_tags, + use_pairs=self.use_pairs, + reuse_combinations=self.reuse_combinations, power=self.power, use_roots=self.use_roots, ) diff --git a/catlearn/regression/gp/hpboundary/boundary.py b/catlearn/regression/gp/hpboundary/boundary.py index a10aca91..41cd93d8 100644 --- a/catlearn/regression/gp/hpboundary/boundary.py +++ b/catlearn/regression/gp/hpboundary/boundary.py @@ -1,28 +1,59 @@ -import numpy as np +from numpy import ( + array, + concatenate, + exp, + finfo, + full, + linspace, + log, + sqrt, +) +from numpy.random import default_rng, Generator, RandomState class HPBoundaries: - def __init__(self, bounds_dict={}, scale=1.0, log=True, **kwargs): + """ + Boundary conditions for the hyperparameters. + A dictionary with boundary conditions of the hyperparameters + can be given as an argument. + Machine precisions are used as boundary conditions for + the hyperparameters not given in the dictionary. + """ + + def __init__( + self, + bounds_dict={}, + scale=1.0, + use_log=True, + seed=None, + dtype=float, + **kwargs, + ): """ - Boundary conditions for the hyperparameters. - A dictionary with boundary conditions of the hyperparameters - can be given as an argument. - Machine precisions are used as boundary conditions for - the hyperparameters not given in the dictionary. + Initialize the boundary conditions for the hyperparameters. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ self.update_arguments( bounds_dict=bounds_dict, scale=scale, - log=log, + use_log=use_log, + seed=seed, + dtype=dtype, **kwargs, ) @@ -31,19 +62,19 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): Create and update the boundary conditions for the hyperparameters. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. Returns: - self : The object itself. + self: The object itself. """ # Update parameters self.make_parameters_set(parameters) @@ -57,72 +88,73 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): ) return self - def get_bounds(self, parameters=None, array=False, **kwargs): + def get_bounds(self, parameters=None, use_array=False, **kwargs): """ Get the boundary conditions of the hyperparameters. Parameters : - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - array : bool + use_array: bool Whether to get an array or a dictionary as output. Returns: - (H,2) array : The boundary conditions as an array if array=True. + (H,2) array: The boundary conditions as an array if use_array=True. or - dict : A dictionary of the boundary conditions. + dict: A dictionary of the boundary conditions. """ # Make the sorted unique hyperparameters if they are given parameters_set = self.get_parameters_set(parameters=parameters) # Make the boundary conditions for the given hyperparameters - if array: - return np.concatenate( - [self.bounds_dict[para] for para in parameters_set], axis=0 + if use_array: + return concatenate( + [self.bounds_dict[para] for para in parameters_set], + axis=0, ) return {para: self.bounds_dict[para].copy() for para in parameters_set} - def get_hp(self, parameters=None, array=False, **kwargs): + def get_hp(self, parameters=None, use_array=False, **kwargs): """ Get the guess of the hyperparameters. The mean of the boundary conditions in log-space is used as the guess. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - array : bool + use_array: bool Whether to get an array or a dictionary as output. Returns: - (H) array : The guesses of the hyperparameters as an array - if array=True. + (H) array: The guesses of the hyperparameters as an array + if use_array=True. or - dict : A dictionary of the guesses of the hyperparameters. + dict: A dictionary of the guesses of the hyperparameters. """ # Make the sorted unique hyperparameters if they are given parameters_set = self.get_parameters_set(parameters=parameters) - if self.log: - if array: - return np.concatenate( + if self.use_log: + if use_array: + return concatenate( [ - np.mean(self.bounds_dict[para], axis=1) + self.bounds_dict[para].mean(axis=1) for para in parameters_set ] ) return { - para: np.mean(self.bounds_dict[para], axis=1) + para: self.bounds_dict[para].mean(axis=1) for para in parameters_set } - if array: - return np.concatenate( + if use_array: + return concatenate( [ - np.exp(np.mean(np.log(self.bounds_dict[para]), axis=1)) + exp(self.bounds_dict[para].mean(axis=1)) for para in parameters_set ] ) return { - para: np.exp(np.mean(np.log(self.bounds_dict[para]), axis=1)) + para: exp(log(self.bounds_dict[para]).mean(axis=1)) for para in parameters_set } @@ -132,21 +164,21 @@ def make_lines(self, parameters=None, ngrid=80, **kwargs): the boundary conditions. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - ngrid : int or (H) list + ngrid: int or (H) list An integer or a list with number of grid points in each dimension. Returns: - (H,) list : A list with grid points for each (H) hyperparameters. + (H,) list: A list with grid points for each (H) hyperparameters. """ - bounds = self.get_bounds(parameters=parameters, array=True) + bounds = self.get_bounds(parameters=parameters, use_array=True) if isinstance(ngrid, (int, float)): ngrid = [int(ngrid)] * len(bounds) return [ - np.linspace(bound[0], bound[1], ngrid[b]) + linspace(bound[0], bound[1], ngrid[b]) for b, bound in enumerate(bounds) ] @@ -156,49 +188,98 @@ def make_single_line(self, parameter, ngrid=80, i=0, **kwargs): the boundary conditions. Parameters: - parameters : str + parameters: str A string of the hyperparameter name. - ngrid : int + ngrid: int An integer with number of grid points in each dimension. - i : int + i: int The index of the hyperparameter used if multiple hyperparameters of the same type exist. Returns: - (ngrid) array : A grid of ngrid points for + (ngrid) array: A grid of ngrid points for the given hyperparameter. """ if not isinstance(ngrid, (int, float)): ngrid = ngrid[int(self.parameters.index(parameter) + i)] bound = self.bounds_dict[parameter][i] - return np.linspace(bound[0], bound[1], int(ngrid)) + return linspace(bound[0], bound[1], int(ngrid)) def sample_thetas(self, parameters=None, npoints=50, **kwargs): """ Sample hyperparameters from the boundary conditions. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - npoints : int + npoints: int Number of points to sample. Returns: - (npoints,H) array : An array with sampled hyperparameters. + (npoints,H) array: An array with sampled hyperparameters. """ - bounds = self.get_bounds(parameters=parameters, array=True) - return np.random.uniform( + bounds = self.get_bounds(parameters=parameters, use_array=True) + return self.rng.uniform( low=bounds[:, 0], high=bounds[:, 1], size=(int(npoints), len(bounds)), ) + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set a small number to avoid division by zero + self.eps = 1.1 * finfo(self.dtype).eps + # Update the data type of the boundary conditions + if hasattr(self, "bounds_dict"): + self.bounds_dict = { + key: array(value, dtype=self.dtype) + for key, value in self.bounds_dict.items() + } + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + def update_arguments( self, bounds_dict=None, scale=None, - log=None, + use_log=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -206,23 +287,36 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) if bounds_dict is not None: self.initiate_bounds_dict(bounds_dict) if scale is not None: self.scale = scale - if log is not None: - self.log = log + if use_log is not None: + self.use_log = use_log return self def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): @@ -234,10 +328,12 @@ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): bounds = {} for para in parameters_set: if para in self.bounds_dict: - bounds[para] = self.bounds_dict[para].copy() + bounds[para] = array(self.bounds_dict[para], dtype=self.dtype) else: - bounds[para] = np.full( - (parameters.count(para), 2), [eps_lower, eps_upper] + bounds[para] = full( + (parameters.count(para), 2), + [eps_lower, eps_upper], + dtype=self.dtype, ) return bounds @@ -248,16 +344,18 @@ def initiate_bounds_dict(self, bounds_dict, **kwargs): """ # Copy the boundary condition values self.bounds_dict = { - key: np.array(value) for key, value in bounds_dict.items() + key: array(value, dtype=self.dtype) + for key, value in bounds_dict.items() } if "correction" in self.bounds_dict.keys(): self.bounds_dict.pop("correction") # Extract the hyperparameter names self.parameters_set = sorted(bounds_dict.keys()) - self.parameters = sum( - [[para] * len(bounds_dict[para]) for para in self.parameters_set], - [], - ) + self.parameters = [ + para + for para in self.parameters_set + for _ in range(len(bounds_dict[para])) + ] return self def make_parameters_set(self, parameters, **kwargs): @@ -288,9 +386,9 @@ def get_n_parameters(self, parameters=None, **kwargs): def get_boundary_limits(self, **kwargs): "Get the machine precision limits for the hyperparameters." - eps_lower = 10 * np.sqrt(2.0 * np.finfo(float).eps) / self.scale - if self.log: - eps_lower = np.log(eps_lower) + eps_lower = 10 * sqrt(2.0 * self.eps) / self.scale + if self.use_log: + eps_lower = log(eps_lower) return eps_lower, -eps_lower return eps_lower, 1.0 / eps_lower @@ -300,7 +398,9 @@ def get_arguments(self): arg_kwargs = dict( bounds_dict=self.bounds_dict, scale=self.scale, - log=self.log, + log=self.use_log, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpboundary/educated.py b/catlearn/regression/gp/hpboundary/educated.py index 6bd929da..e43834f9 100644 --- a/catlearn/regression/gp/hpboundary/educated.py +++ b/catlearn/regression/gp/hpboundary/educated.py @@ -1,55 +1,70 @@ -import numpy as np +from numpy import array, asarray, full, log, sqrt from scipy.spatial.distance import pdist from .restricted import RestrictedBoundaries class EducatedBoundaries(RestrictedBoundaries): + """ + Boundary conditions for the hyperparameters with educated guess for + the length-scale, relative-noise, and prefactor hyperparameters. + Machine precisions are used as boundary conditions for + other hyperparameters not given in the dictionary. + """ + def __init__( self, bounds_dict={}, scale=1.0, - log=True, + use_log=True, max_length=True, use_derivatives=False, use_prior_mean=True, + seed=None, + dtype=float, **kwargs, ): """ - Boundary conditions for the hyperparameters with educated guess for - the length-scale, relative-noise, and prefactor hyperparameters. - Machine precisions are used as boundary conditions for - other hyperparameters not given in the dictionary. + Initialize the boundary conditions for the hyperparameters. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. - max_length : bool + max_length: bool Whether to use the maximum scaling for the length-scale or use a more reasonable scaling. - use_derivatives : bool + use_derivatives: bool Whether the derivatives of the target are used in the model. The boundary conditions of the length-scale hyperparameter(s) will change with the use_derivatives. The use_derivatives will be updated when update_bounds is called. - use_prior_mean : bool + use_prior_mean: bool Whether to use the prior mean to calculate the boundary of the prefactor hyperparameter. If use_prior_mean=False, the minimum and maximum target differences are used as the boundary conditions. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ self.update_arguments( bounds_dict=bounds_dict, scale=scale, - log=log, + use_log=use_log, max_length=max_length, use_derivatives=use_derivatives, use_prior_mean=use_prior_mean, + seed=seed, + dtype=dtype, **kwargs, ) @@ -57,10 +72,12 @@ def update_arguments( self, bounds_dict=None, scale=None, - log=None, + use_log=None, max_length=None, use_derivatives=None, use_prior_mean=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -68,41 +85,49 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. - max_length : bool + max_length: bool Whether to use the maximum scaling for the length-scale or use a more reasonable scaling. - use_derivatives : bool + use_derivatives: bool Whether the derivatives of the target are used in the model. The boundary conditions of the length-scale hyperparameter(s) will change with the use_derivatives. The use_derivatives will be updated when update_bounds is called. - use_prior_mean : bool + use_prior_mean: bool Whether to use the prior mean to calculate the boundary of the prefactor hyperparameter. If use_prior_mean=False, the minimum and maximum target differences are used as the boundary conditions. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if bounds_dict is not None: - self.initiate_bounds_dict(bounds_dict) - if scale is not None: - self.scale = scale - if log is not None: - self.log = log - if max_length is not None: - self.max_length = max_length - if use_derivatives is not None: - self.use_derivatives = use_derivatives + # Update the parameters of the parent class + super().update_arguments( + bounds_dict=bounds_dict, + scale=scale, + use_log=use_log, + max_length=max_length, + use_derivatives=use_derivatives, + seed=seed, + dtype=dtype, + ) + # Update the parameters of the class itself if use_prior_mean is not None: self.use_prior_mean = use_prior_mean return self @@ -117,7 +142,8 @@ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): elif para == "noise": if "noise_deriv" in parameters_set: bounds[para] = self.noise_bound( - Y[:, 0:1], eps_lower=eps_lower + Y[:, 0:1], + eps_lower=eps_lower, ) else: bounds[para] = self.noise_bound(Y, eps_lower=eps_lower) @@ -126,10 +152,12 @@ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): elif para == "prefactor": bounds[para] = self.prefactor_bound(X, Y, model) elif para in self.bounds_dict: - bounds[para] = self.bounds_dict[para].copy() + bounds[para] = array(self.bounds_dict[para], dtype=self.dtype) else: - bounds[para] = np.full( - (parameters.count(para), 2), [eps_lower, eps_upper] + bounds[para] = full( + (parameters.count(para), 2), + [eps_lower, eps_upper], + dtype=self.dtype, ) return bounds @@ -143,7 +171,7 @@ def prefactor_bound(self, X, Y, model, **kwargs): Y_mean = self.get_prior_mean(X, Y, model) Y_std = Y[:, 0:1] - Y_mean # Calculate the variance relative to the prior mean of the targets - a_mean = np.sqrt(np.mean(Y_std**2)) + a_mean = sqrt((Y_std**2).mean()) # Check that all the targets are not the same if a_mean == 0.0: a_mean = 1.00 @@ -153,16 +181,17 @@ def prefactor_bound(self, X, Y, model, **kwargs): else: # Calculate the differences in the target values dif = pdist(Y[:, 0:1]) + dif = asarray(dif, dtype=self.dtype) # Remove zero differences dif = dif[dif != 0.0] # Check that all the targets are not the same if len(dif) == 0: - dif = [1.0] - a_max = np.max(dif) * self.scale - a_min = np.min(dif) / self.scale - if self.log: - return np.array([[np.log(a_min), np.log(a_max)]]) - return np.array([[a_min, a_max]]) + dif = asarray([1.0], dtype=self.dtype) + a_max = dif.max() * self.scale + a_min = dif.min() / self.scale + if self.use_log: + return asarray([[log(a_min), log(a_max)]], dtype=self.dtype) + return asarray([[a_min, a_max]], dtype=self.dtype) def get_prior_mean(self, X, Y, model, **kwargs): "Get the prior mean value for the target only (without derivatives)." @@ -176,10 +205,12 @@ def get_arguments(self): arg_kwargs = dict( bounds_dict=self.bounds_dict, scale=self.scale, - log=self.log, + use_log=self.use_log, max_length=self.max_length, use_derivatives=self.use_derivatives, use_prior_mean=self.use_prior_mean, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpboundary/hptrans.py b/catlearn/regression/gp/hpboundary/hptrans.py index c2c400b1..cf000410 100644 --- a/catlearn/regression/gp/hpboundary/hptrans.py +++ b/catlearn/regression/gp/hpboundary/hptrans.py @@ -1,60 +1,80 @@ -import numpy as np +from numpy import ( + abs as abs_, + array, + concatenate, + exp, + finfo, + full, + linspace, + log, + where, +) from .boundary import HPBoundaries +from .strict import StrictBoundaries class VariableTransformation(HPBoundaries): + """ + Make variable transformation of hyperparameters into + an interval of (0,1). + A dictionary of mean and standard deviation values are used + to make Logistic transformations. + Boundary conditions can be used to calculate + the variable transformation parameters. + """ + def __init__( self, var_dict={}, bounds=None, s=0.14, - eps=np.finfo(float).eps, + seed=None, + dtype=float, **kwargs, ): """ - Make variable transformation of hyperparameters into - an interval of (0,1). - A dictionary of mean and standard deviation values are used - to make Logistic transformations. - Boundary conditions can be used to calculate - the variable transformation parameters. + Initialize the variable transformation of hyperparameters. Parameters: - var_dict : dict + var_dict: dict A dictionary with the variable transformation parameters (mean,std) for each hyperparameter. - bounds : Boundary condition class + bounds: Boundary condition class A Boundary condition class that make the boundaries of the hyperparameters. The boundaries are used to calculate the variable transformation parameters. - s : float + s: float The scale parameter in a Logistic distribution. It determines how large part of the distribution that is within the boundaries. s=0.5*p/(ln(p)-ln(1-p)) with p being the quantile that the boundaries constitute. - eps : float - The first value of a grid in the variable transformed - hyperparameter space. - The last value of a grid is 1.0-eps. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the default boundary conditions if bounds is None: - from .strict import StrictBoundaries - bounds = StrictBoundaries( bounds_dict={}, scale=1.0, - log=True, + use_log=True, use_prior_mean=True, + seed=seed, + dtype=dtype, ) # Set all the arguments self.update_arguments( var_dict=var_dict, bounds=bounds, s=s, - eps=eps, + seed=seed, + dtype=dtype, **kwargs, ) @@ -64,87 +84,88 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): Therefore, the variable transformation parameters are also updated. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. Returns: - self : The object itself. + self: The object itself. """ # Update the parameters used self.make_parameters_set(parameters) # Update the boundary conditions and get them self.bounds.update_bounds(model, X, Y, parameters) - self.bounds_dict = self.bounds.get_bounds(array=False) + self.bounds_dict = self.bounds.get_bounds(use_array=False) # Update the variable transformation parameters for para, bound in self.bounds_dict.items(): self.var_dict[para] = { - "mean": np.mean(bound, axis=1), - "std": self.s * np.abs(bound[:, 1] - bound[:, 0]), + "mean": bound.mean(axis=1), + "std": self.s * abs_(bound[:, 1] - bound[:, 0]), } return self def get_variable_transformation_parameters( self, parameters=None, - array=False, + use_array=False, **kwargs, ): """ Get the variable transformation parameters. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - array : bool + use_array: bool Whether to get an array for the mean and std or a dictionary as output. Returns: - dict : A dictionary of the variable transformation parameters. - If array=True, a dictionary with mean and std is given instead. + dict: A dictionary of the variable transformation parameters. + If use_array=True, a dictionary with mean and std is given instead. """ # Make the sorted unique hyperparameters if they are given parameters_set = self.get_parameters_set(parameters=parameters) - if array: + if use_array: var_dict_array = {} - var_dict_array["mean"] = np.concatenate( + var_dict_array["mean"] = concatenate( [self.var_dict[para]["mean"] for para in parameters_set], axis=0, ) - var_dict_array["std"] = np.concatenate( - [self.var_dict[para]["std"] for para in parameters_set], axis=0 + var_dict_array["std"] = concatenate( + [self.var_dict[para]["std"] for para in parameters_set], + axis=0, ) return var_dict_array return {para: self.var_dict[para].copy() for para in parameters_set} - def transformation(self, hp, array=False, **kwargs): + def transformation(self, hp, use_array=False, **kwargs): """ Transform the hyperparameters with the variable transformation to get a dictionary. Parameters: - hp : dict + hp: dict The dictionary of the hyperparameters - array : bool + use_array: bool Whether to get an array or a dictionary as output. Returns: - (H) array : The variable transformed hyperparameters as an array - if array=True. + (H) array: The variable transformed hyperparameters as an array + if use_array=True. or - dict : A dictionary of the variable transformed hyperparameters. + dict: A dictionary of the variable transformed hyperparameters. """ - if array: - return np.concatenate( + if use_array: + return concatenate( [ self.transform( theta, @@ -152,7 +173,7 @@ def transformation(self, hp, array=False, **kwargs): self.var_dict[para]["std"], ) for para, theta in hp.items() - ] + ], ) return { para: self.transform( @@ -163,25 +184,25 @@ def transformation(self, hp, array=False, **kwargs): for para, theta in hp.items() } - def reverse_trasformation(self, t, array=False, **kwargs): + def reverse_trasformation(self, t, use_array=False, **kwargs): """ Transform the variable transformed hyperparameters back to the hyperparameters dictionary. Parameters: - t : dict + t: dict The dictionary of the variable transformed hyperparameters - array : bool + use_array: bool Whether to get an array or a dictionary as output. Returns: - (H) array : The retransformed hyperparameters as an array - if array=True. + (H) array: The retransformed hyperparameters as an array + if use_array=True. or - dict : A dictionary of the retransformed hyperparameters. + dict: A dictionary of the retransformed hyperparameters. """ - if array: - return np.concatenate( + if use_array: + return concatenate( [ self.retransform( ti, @@ -189,7 +210,7 @@ def reverse_trasformation(self, t, array=False, **kwargs): self.var_dict[para]["std"], ) for para, ti in t.items() - ] + ], ) return { para: self.retransform( @@ -203,7 +224,7 @@ def reverse_trasformation(self, t, array=False, **kwargs): def get_bounds( self, parameters=None, - array=False, + use_array=False, transformed=False, **kwargs, ): @@ -211,43 +232,51 @@ def get_bounds( Get the boundary conditions of hyperparameters. Parameters : - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - array : bool + use_array: bool Whether to get an array or a dictionary as output. - transformed : bool + transformed: bool If transformed=True, the boundaries is in variable transformed space. If transformed=False, the boundaries is transformed back to hyperparameter space. Returns: - (H,2) array : The boundary conditions as an array if array=True. + (H,2) array: The boundary conditions as an array if use_array=True. or - dict : A dictionary of the boundary conditions. + dict: A dictionary of the boundary conditions. """ # Get the bounds in the variable transformed space if transformed: - if array: + if use_array: n_parameters = self.get_n_parameters(parameters=parameters) - return np.full((n_parameters, 2), [self.eps, 1.00 - self.eps]) + return full( + (n_parameters, 2), + [self.eps, 1.00 - self.eps], + dtype=self.dtype, + ) # Make the sorted unique hyperparameters if they are given parameters_set = self.get_parameters_set(parameters=parameters) return { - para: np.full( + para: full( (len(self.bounds_dict[para]), 2), [self.eps, 1.00 - self.eps], + dtype=self.dtype, ) for para in parameters_set } # Get the bounds in the hyperparameter space - return self.bounds.get_bounds(parameters=parameters, array=array) + return self.bounds.get_bounds( + parameters=parameters, + use_array=use_array, + ) def get_hp( self, parameters=None, - array=False, + use_array=False, transformed=False, **kwargs, ): @@ -256,36 +285,40 @@ def get_hp( The mean of the boundary conditions in log-space is used as the guess. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - array : bool + use_array: bool Whether to get an array or a dictionary as output. - transformed : bool + transformed: bool If transformed=True, the boundaries is in variable transformed space. If transformed=False, the boundaries is transformed back to hyperparameter space. Returns: - (H) array : The guesses of the hyperparameters as an array - if array=True. + (H) array: The guesses of the hyperparameters as an array + if use_array=True. or - dict : A dictionary of the guesses of the hyperparameters. + dict: A dictionary of the guesses of the hyperparameters. """ # Get the hyperparameter guess in the variable transformed space (0.5) if transformed: - if array: + if use_array: n_parameters = self.get_n_parameters(parameters=parameters) - return np.full((n_parameters), 0.50) + return full((n_parameters), 0.50, dtype=self.dtype) # Make the sorted unique hyperparameters if they are given parameters_set = self.get_parameters_set(parameters=parameters) return { - para: np.full((len(self.bounds_dict[para])), 0.50) + para: full( + (len(self.bounds_dict[para])), + 0.50, + dtype=self.dtype, + ) for para in parameters_set } # Get the hyperparameter guess in the hyperparameter space - return self.bounds.get_hp(parameters=parameters, array=array) + return self.bounds.get_hp(parameters=parameters, use_array=use_array) def make_lines( self, @@ -299,16 +332,16 @@ def make_lines( the boundary conditions. Parameters: - ngrid : int or (H) list + ngrid: int or (H) list An integer or a list with number of grid points in each dimension. - transformed : bool + transformed: bool If transformed=True, the grid is in variable transformed space. If transformed=False, the grid is transformed back to hyperparameter space. Returns: - (H,) list : A list with grid points for each (H) hyperparameters. + (H,) list: A list with grid points for each (H) hyperparameters. """ # Get the number of hyperparameters n_parameters = self.get_n_parameters(parameters=parameters) @@ -318,13 +351,13 @@ def make_lines( # The grid is made within the variable transformed hyperparameters if transformed: return [ - np.linspace(self.eps, 1.00 - self.eps, ngrid[i]) + linspace(self.eps, 1.00 - self.eps, ngrid[i], dtype=self.dtype) for i in range(n_parameters) ] # The grid is within the transformed space and it is then retransformed var_dict_array = self.get_variable_transformation_parameters( parameters=parameters, - array=True, + use_array=True, ) lines = [] for i, (vt_mean, vt_std) in enumerate( @@ -333,39 +366,49 @@ def make_lines( var_dict_array["std"], ) ): - t_line = np.linspace(self.eps, 1.00 - self.eps, ngrid[i]) + t_line = linspace( + self.eps, + 1.00 - self.eps, + ngrid[i], + dtype=self.dtype, + ) lines.append(self.retransform(t_line, vt_mean, vt_std)) return lines def make_single_line( - self, parameter, ngrid=80, i=0, transformed=False, **kwargs + self, + parameter, + ngrid=80, + i=0, + transformed=False, + **kwargs, ): """ Make grid in each dimension of the hyperparameters from the boundary conditions. Parameters: - parameters : str + parameters: str A string of the hyperparameter name. - ngrid : int + ngrid: int An integer with number of grid points in each dimension. - i : int + i: int The index of the hyperparameter used if multiple hyperparameters of the same type exist. - transformed : bool + transformed: bool If transformed=True, the grid is in variable transformed space. If transformed=False, the grid is transformed back to hyperparameter space. Returns: - (ngrid) array : A grid of ngrid points for + (ngrid) array: A grid of ngrid points for the given hyperparameter. """ # Make sure that a int of number grid points is used if not isinstance(ngrid, (int, float)): ngrid = ngrid[int(self.parameters.index(parameter) + i)] # The grid is made within the variable transformed hyperparameters - t_line = np.linspace(self.eps, 1.00 - self.eps, ngrid) + t_line = linspace(self.eps, 1.00 - self.eps, ngrid, dtype=self.dtype) if transformed: return t_line # The grid is transformed back to hyperparameter space @@ -376,7 +419,11 @@ def make_single_line( ) def sample_thetas( - self, parameters=None, npoints=50, transformed=False, **kwargs + self, + parameters=None, + npoints=50, + transformed=False, + **kwargs, ): """ Sample hyperparameters from the transformed hyperparameter space. @@ -384,23 +431,23 @@ def sample_thetas( then transformed back to the hyperparameter space. Parameters: - parameters : list of str or None + parameters: list of str or None A list of the specific used hyperparameter names as strings. If parameters=None, then the stored hyperparameters are used. - npoints : int + npoints: int Number of points to sample. - transformed : bool + transformed: bool If transformed=True, the grid is in variable transformed space. If transformed=False, the grid is transformed back to hyperparameter space. Returns: - (npoints,H) array : An array with sampled hyperparameters. + (npoints,H) array: An array with sampled hyperparameters. """ # Get the number of hyperparameters n_parameters = self.get_n_parameters(parameters=parameters) # Sample the hyperparameters from the transformed hyperparameter space - samples = np.random.uniform( + samples = self.rng.uniform( low=self.eps, high=1.00 - self.eps, size=(npoints, n_parameters), @@ -410,7 +457,8 @@ def sample_thetas( return samples # The samples are transformed back to hyperparameter space var_dict_array = self.get_variable_transformation_parameters( - parameters=parameters, array=True + parameters=parameters, + use_array=True, ) for i, (vt_mean, vt_std) in enumerate( zip(var_dict_array["mean"], var_dict_array["std"]) @@ -418,12 +466,34 @@ def sample_thetas( samples[:, i] = self.retransform(samples[:, i], vt_mean, vt_std) return samples + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + # Set the data type of the bounds + self.bounds.set_dtype(dtype, **kwargs) + # Set the data type of the variable transformation parameters + if hasattr(self, "var_dict"): + self.var_dict = { + key: { + "mean": array(value["mean"], dtype=self.dtype), + "std": array(value["std"], dtype=self.dtype), + } + for key, value in self.var_dict.items() + } + return self + + def set_seed(self, seed=None, **kwargs): + super().set_seed(seed, **kwargs) + # Set the seed of the bounds + self.bounds.set_seed(seed, **kwargs) + return self + def update_arguments( self, var_dict=None, bounds=None, s=None, - eps=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -431,55 +501,65 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - var_dict : dict + var_dict: dict A dictionary with the variable transformation parameters (mean,std) for each hyperparameter. - bounds : Boundary condition class + bounds: Boundary condition class A Boundary condition class that make the boundaries of the hyperparameters. The boundaries are used to calculate the variable transformation parameters. - s : float + s: float The scale parameter in a Logistic distribution. It determines how large part of the distribution that is within the boundaries. s=0.5*p/(ln(p)-ln(1-p)) with p being the quantile that the boundaries constitute. - eps : float - The first value of a grid in the variable transformed - hyperparameter space. - The last value of a grid is 1.0-eps. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if var_dict is not None: - self.initiate_var_dict(var_dict) + # Set the boundary condition instance if bounds is not None: self.initiate_bounds_dict(bounds) + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) + if var_dict is not None: + self.initiate_var_dict(var_dict) if s is not None: self.s = s - if eps is not None: - self.eps = eps return self def transform(self, theta, vt_mean, vt_std, **kwargs): "Transform the hyperparameters with the variable transformation." - return 1.0 / (1.0 + np.exp(-(theta - vt_mean) / vt_std)) + return 1.0 / (1.0 + exp(-(theta - vt_mean) / vt_std)) def retransform(self, ti, vt_mean, vt_std, **kwargs): """ Transform the variable transformed hyperparameters back to the hyperparameters. """ - return self.numeric_limits(vt_std * np.log(ti / (1.00 - ti)) + vt_mean) + return self.numeric_limits(vt_std * log(ti / (1.00 - ti)) + vt_mean) - def numeric_limits(self, value, dh=0.1 * np.log(np.finfo(float).max)): + def numeric_limits(self, value, dh=None): """ Replace hyperparameters if they are outside of the numeric limits in log-space. """ - return np.where(-dh < value, np.where(value < dh, value, dh), -dh) + if dh is None: + dh = 0.1 * log(finfo(self.dtype).max) + return where(-dh < value, where(value < dh, value, dh), -dh) def initiate_var_dict(self, var_dict, **kwargs): """ @@ -489,8 +569,8 @@ def initiate_var_dict(self, var_dict, **kwargs): # Copy the variable transformation parameters self.var_dict = { key: { - "mean": np.array(value["mean"]), - "std": np.array(value["std"]), + "mean": array(value["mean"], dtype=self.dtype), + "std": array(value["std"], dtype=self.dtype), } for key, value in var_dict.items() } @@ -498,19 +578,21 @@ def initiate_var_dict(self, var_dict, **kwargs): self.var_dict.pop("correction") # Extract the hyperparameters self.parameters_set = sorted(var_dict.keys()) - self.parameters = sum( - [[para] * len(var_dict[para]) for para in self.parameters_set], [] - ) + self.parameters = [ + para + for para in self.parameters_set + for _ in range(len(var_dict[para])) + ] return self def initiate_bounds_dict(self, bounds, **kwargs): "Make and store the hyperparameter bounds." # Copy the boundary condition object self.bounds = bounds.copy() - self.bounds_dict = self.bounds.get_bounds(array=False) + self.bounds_dict = self.bounds.get_bounds(use_array=False) # Make sure log-scale of the hyperparameters are used - if self.bounds.log is False: - raise Exception( + if self.bounds.use_log is False: + raise ValueError( "The Variable Transformation need to " "use boundary conditions in the log-scale!" ) @@ -523,7 +605,8 @@ def get_arguments(self): var_dict=self.var_dict, bounds=self.bounds, s=self.s, - eps=self.eps, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpboundary/length.py b/catlearn/regression/gp/hpboundary/length.py index 6fccbf97..558fa42d 100644 --- a/catlearn/regression/gp/hpboundary/length.py +++ b/catlearn/regression/gp/hpboundary/length.py @@ -1,58 +1,100 @@ -import numpy as np +from numpy import ( + array, + asarray, + fill_diagonal, + full, + inf, + log, + median, + ndarray, + sqrt, + zeros, +) from scipy.spatial.distance import pdist, squareform from .boundary import HPBoundaries class LengthBoundaries(HPBoundaries): + """ + Boundary conditions for the hyperparameters with educated guess for + the length-scale hyperparameter. + Machine precisions are used as default boundary conditions for + the rest of the hyperparameters not given in the dictionary. + """ + def __init__( self, bounds_dict={}, scale=1.0, - log=True, + use_log=True, max_length=True, use_derivatives=False, + seed=None, + dtype=float, **kwargs, ): """ - Boundary conditions for the hyperparameters with educated guess for - the length-scale hyperparameter. - Machine precisions are used as default boundary conditions for - the rest of the hyperparameters not given in the dictionary. + Initialize the boundary conditions for the hyperparameters. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. - max_length : bool + max_length: bool Whether to use the maximum scaling for the length-scale or use a more reasonable scaling. - use_derivatives : bool + use_derivatives: bool Whether the derivatives of the target are used in the model. The boundary conditions of the length-scale hyperparameter(s) will change with the use_derivatives. The use_derivatives will be updated when update_bounds is called. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ self.update_arguments( bounds_dict=bounds_dict, scale=scale, - log=log, + use_log=use_log, max_length=max_length, use_derivatives=use_derivatives, + seed=seed, + dtype=dtype, **kwargs, ) + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives for the targets. + + Parameters: + use_derivatives: bool + Use derivatives/gradients for targets. + + Returns: + self: The updated object itself. + """ + self.use_derivatives = use_derivatives + return self + def update_arguments( self, bounds_dict=None, scale=None, - log=None, + use_log=None, max_length=None, use_derivatives=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -60,36 +102,46 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds_dict : dict + bounds_dict: dict A dictionary with boundary conditions as numpy (H,2) arrays with two columns for each type of hyperparameter. - scale : float + scale: float Scale the boundary conditions. - log : bool + use_log: bool Whether to use hyperparameters in log-scale or not. - max_length : bool + max_length: bool Whether to use the maximum scaling for the length-scale or use a more reasonable scaling. - use_derivatives : bool + use_derivatives: bool Whether the derivatives of the target are used in the model. The boundary conditions of the length-scale hyperparameter(s) will change with the use_derivatives. The use_derivatives will be updated when update_bounds is called. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if bounds_dict is not None: - self.initiate_bounds_dict(bounds_dict) - if scale is not None: - self.scale = scale - if log is not None: - self.log = log + # Update the parameters of the parent class + super().update_arguments( + bounds_dict=bounds_dict, + scale=scale, + use_log=use_log, + seed=seed, + dtype=dtype, + ) + # Update the parameters of the class itself if max_length is not None: self.max_length = max_length if use_derivatives is not None: - self.use_derivatives = use_derivatives + self.set_use_derivatives(use_derivatives) return self def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): @@ -100,10 +152,12 @@ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): if para == "length": bounds[para] = self.length_bound(X, parameters.count(para)) elif para in self.bounds_dict: - bounds[para] = self.bounds_dict[para].copy() + bounds[para] = array(self.bounds_dict[para], dtype=self.dtype) else: - bounds[para] = np.full( - (parameters.count(para), 2), [eps_lower, eps_upper] + bounds[para] = full( + (parameters.count(para), 2), + [eps_lower, eps_upper], + dtype=self.dtype, ) return bounds @@ -113,51 +167,53 @@ def length_bound(self, X, l_dim, **kwargs): in the educated guess regime within a scale. """ # Get the minimum and maximum machine precision for exponential terms - exp_lower = np.sqrt(-1 / np.log(np.finfo(float).eps)) / self.scale - exp_max = np.sqrt(-1 / np.log(1 - np.finfo(float).eps)) * self.scale + exp_lower = sqrt(-1.0 / log(self.eps)) / self.scale + exp_max = sqrt(-1.0 / log(1 - self.eps)) * self.scale # Use a smaller maximum boundary if only one length-scale is used if not self.max_length or l_dim == 1: exp_max = 2.0 * self.scale # Scale the convergence if derivatives of targets are used if self.use_derivatives: exp_lower = exp_lower * 0.05 - lengths = np.zeros((l_dim, 2)) + lengths = zeros((l_dim, 2), dtype=self.dtype) # If only one features is given then end if len(X) == 1: lengths[:, 0] = exp_lower lengths[:, 1] = exp_max - if self.log: - return np.log(lengths) + if self.use_log: + return log(lengths) return lengths # Ensure that the features are a matrix - if not isinstance(X[0], (list, np.ndarray)): - X = np.array([fp.get_vector() for fp in X]) + if not isinstance(X[0], (list, ndarray)): + X = asarray([fp.get_vector() for fp in X], dtype=self.dtype) for d in range(l_dim): # Calculate distances if l_dim == 1: dis = pdist(X) else: - dis = pdist(X[:, d : d + 1]) + d1 = d + 1 + dis = pdist(X[:, d:d1]) + dis = asarray(dis, dtype=self.dtype) # Calculate the maximum length-scale - dis_max = exp_max * np.max(dis) + dis_max = exp_max * dis.max() if dis_max == 0.0: dis_min, dis_max = exp_lower, exp_max else: # The minimum length-scale from the nearest neighbor distance - dis_min = exp_lower * np.median(self.nearest_neighbors(dis)) + dis_min = exp_lower * median(self.nearest_neighbors(dis)) if dis_min == 0.0: dis_min = exp_lower # Transform into log-scale if specified lengths[d, 0], lengths[d, 1] = dis_min, dis_max - if self.log: - return np.log(lengths) + if self.use_log: + return log(lengths) return lengths def nearest_neighbors(self, dis, **kwargs): "Nearest neighbor distance." dis_matrix = squareform(dis) - np.fill_diagonal(dis_matrix, np.inf) - return np.min(dis_matrix, axis=1) + fill_diagonal(dis_matrix, inf) + return dis_matrix.min(axis=1) def get_use_derivatives(self, model, **kwargs): "Get whether the derivatives of targets are used in the model." @@ -170,9 +226,11 @@ def get_arguments(self): arg_kwargs = dict( bounds_dict=self.bounds_dict, scale=self.scale, - log=self.log, + use_log=self.use_log, max_length=self.max_length, use_derivatives=self.use_derivatives, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpboundary/restricted.py b/catlearn/regression/gp/hpboundary/restricted.py index 3ceac3ae..742f6e3f 100644 --- a/catlearn/regression/gp/hpboundary/restricted.py +++ b/catlearn/regression/gp/hpboundary/restricted.py @@ -1,49 +1,14 @@ -import numpy as np +from numpy import array, asarray, finfo, full, log, sqrt from .length import LengthBoundaries class RestrictedBoundaries(LengthBoundaries): - def __init__( - self, - bounds_dict={}, - scale=1.0, - log=True, - max_length=True, - use_derivatives=False, - **kwargs, - ): - """ - Boundary conditions for the hyperparameters with educated guess for - the length-scale and relative-noise hyperparameters. - Machine precisions are used as default boundary conditions for - the rest of the hyperparameters not given in the dictionary. - - Parameters: - bounds_dict : dict - A dictionary with boundary conditions as numpy (H,2) arrays - with two columns for each type of hyperparameter. - scale : float - Scale the boundary conditions. - log : bool - Whether to use hyperparameters in log-scale or not. - max_length : bool - Whether to use the maximum scaling for the length-scale or - use a more reasonable scaling. - use_derivatives : bool - Whether the derivatives of the target are used in the model. - The boundary conditions of the length-scale hyperparameter(s) - will change with the use_derivatives. - The use_derivatives will be updated when - update_bounds is called. - """ - super().__init__( - bounds_dict=bounds_dict, - scale=scale, - log=log, - max_length=max_length, - use_derivatives=use_derivatives, - **kwargs, - ) + """ + Boundary conditions for the hyperparameters with educated guess for + the length-scale and relative-noise hyperparameters. + Machine precisions are used as default boundary conditions for + the rest of the hyperparameters not given in the dictionary. + """ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): eps_lower, eps_upper = self.get_boundary_limits() @@ -55,31 +20,36 @@ def make_bounds(self, model, X, Y, parameters, parameters_set, **kwargs): elif para == "noise": if "noise_deriv" in parameters_set: bounds[para] = self.noise_bound( - Y[:, 0:1], eps_lower=eps_lower + Y[:, 0:1], + eps_lower=eps_lower, ) else: bounds[para] = self.noise_bound(Y, eps_lower=eps_lower) elif para == "noise_deriv": bounds[para] = self.noise_bound(Y[:, 1:], eps_lower=eps_lower) elif para in self.bounds_dict: - bounds[para] = self.bounds_dict[para].copy() + bounds[para] = array(self.bounds_dict[para], dtype=self.dtype) else: - bounds[para] = np.full( - (parameters.count(para), 2), [eps_lower, eps_upper] + bounds[para] = full( + (parameters.count(para), 2), + [eps_lower, eps_upper], + dtype=self.dtype, ) return bounds def noise_bound( self, Y, - eps_lower=10 * np.sqrt(2.0 * np.finfo(float).eps), + eps_lower=None, **kwargs, ): """ Get the minimum and maximum ranges of the noise in the educated guess regime within a scale. """ + if eps_lower is None: + eps_lower = 10.0 * sqrt(2.0 * finfo(self.dtype).eps) n_max = len(Y.reshape(-1)) * self.scale - if self.log: - return np.array([[eps_lower, np.log(n_max)]]) - return np.array([[eps_lower, n_max]]) + if self.use_log: + return asarray([[eps_lower, log(n_max)]], dtype=self.dtype) + return asarray([[eps_lower, n_max]], dtype=self.dtype) diff --git a/catlearn/regression/gp/hpboundary/strict.py b/catlearn/regression/gp/hpboundary/strict.py index 0a9c7aac..555099b8 100644 --- a/catlearn/regression/gp/hpboundary/strict.py +++ b/catlearn/regression/gp/hpboundary/strict.py @@ -1,57 +1,16 @@ -import numpy as np +from numpy import asarray, log, median, ndarray, zeros from scipy.spatial.distance import pdist from .educated import EducatedBoundaries class StrictBoundaries(EducatedBoundaries): - def __init__( - self, - bounds_dict={}, - scale=1.0, - log=True, - use_derivatives=False, - use_prior_mean=True, - **kwargs - ): - """ - Boundary conditions for the hyperparameters with educated guess for - the length-scale, relative-noise, and prefactor hyperparameters. - Stricter boundary conditions are used for - the length-scale hyperparameter. - Machine precisions are used as boundary conditions for - other hyperparameters not given in the dictionary. - - Parameters: - bounds_dict : dict - A dictionary with boundary conditions as numpy (H,2) arrays - with two columns for each type of hyperparameter. - scale : float - Scale the boundary conditions. - log : bool - Whether to use hyperparameters in log-scale or not. - max_length : bool - Whether to use the maximum scaling for the length-scale or - use a more reasonable scaling. - use_derivatives : bool - Whether the derivatives of the target are used in the model. - The boundary conditions of the length-scale hyperparameter(s) - will change with the use_derivatives. - The use_derivatives will be updated when - update_bounds is called. - use_prior_mean : bool - Whether to use the prior mean to calculate the boundary of - the prefactor hyperparameter. - If use_prior_mean=False, the minimum and maximum target - differences are used as the boundary conditions. - """ - super().__init__( - bounds_dict=bounds_dict, - scale=scale, - log=log, - use_derivatives=use_derivatives, - use_prior_mean=use_prior_mean, - **kwargs, - ) + """ + Boundary conditions for the hyperparameters with educated guess for + the length-scale, relative-noise, and prefactor hyperparameters. + Stricter boundary conditions are used for the length-scale hyperparameter. + Machine precisions are used as boundary conditions for + other hyperparameters not given in the dictionary. + """ def length_bound(self, X, l_dim, **kwargs): """ @@ -64,34 +23,36 @@ def length_bound(self, X, l_dim, **kwargs): # Scale the convergence if derivatives of targets are used if self.use_derivatives: exp_lower = exp_lower * 0.05 - lengths = np.zeros((l_dim, 2)) + lengths = zeros((l_dim, 2), dtype=self.dtype) # If only one features is given then end if len(X) == 1: lengths[:, 0] = exp_lower lengths[:, 1] = exp_max - if self.log: - return np.log(lengths) + if self.use_log: + return log(lengths) return lengths # Ensure that the features are a matrix - if not isinstance(X[0], (list, np.ndarray)): - X = np.array([fp.get_vector() for fp in X]) + if not isinstance(X[0], (list, ndarray)): + X = asarray([fp.get_vector() for fp in X], dtype=self.dtype) for d in range(l_dim): # Calculate distances if l_dim == 1: dis = pdist(X) else: - dis = pdist(X[:, d : d + 1]) + d1 = d + 1 + dis = pdist(X[:, d:d1]) + dis = asarray(dis, dtype=self.dtype) # Calculate the maximum length-scale - dis_max = exp_max * np.median(dis) + dis_max = exp_max * median(dis) if dis_max == 0.0: dis_min, dis_max = exp_lower, exp_max else: # The minimum length-scale from the nearest neighbor distance - dis_min = exp_lower * np.median(self.nearest_neighbors(dis)) + dis_min = exp_lower * median(self.nearest_neighbors(dis)) if dis_min == 0.0: dis_min = exp_lower # Transform into log-scale if specified lengths[d, 0], lengths[d, 1] = dis_min, dis_max - if self.log: - return np.log(lengths) + if self.use_log: + return log(lengths) return lengths diff --git a/catlearn/regression/gp/hpboundary/updatebounds.py b/catlearn/regression/gp/hpboundary/updatebounds.py index 1fbab81f..02cbdca6 100644 --- a/catlearn/regression/gp/hpboundary/updatebounds.py +++ b/catlearn/regression/gp/hpboundary/updatebounds.py @@ -1,8 +1,17 @@ -import numpy as np +from numpy import array, asarray, sum as sum_, sqrt from .boundary import HPBoundaries class UpdatingBoundaries(HPBoundaries): + """ + An updating boundary conditions for the hyperparameters. + Previous solutions to the hyperparameters can be used to + updating the boundary conditions. + The bounds and the solutions are treated as Normal distributions. + A Normal distribution of a mixture model is then treated as + the updated boundary conditions. + """ + def __init__( self, bounds=None, @@ -10,36 +19,45 @@ def __init__( sol_var=0.5, bound_weight=4, min_solutions=4, + seed=None, + dtype=float, **kwargs, ): """ - An updating boundary conditions for the hyperparameters. - Previous solutions to the hyperparameters can be used to - updating the boundary conditions. - The bounds and the solutions are treated as Normal distributions. - A Normal distribution of a mixture model is then treated as - the updated boundary conditions. + Initialize the boundary conditions for the hyperparameters. Parameters: - bounds : Boundary condition class + bounds: Boundary condition class A Boundary condition class that make the boundaries of the hyperparameters. - sols : list of dict + sols: list of dict The solutions of the hyperparameters from previous optimizations. - sol_var : float + sol_var: float The known variance of the Normal distribution used for the solutions. - bound_weight : int + bound_weight: int The weight of the given boundary conditions in terms of number of solution samples. - min_solutions : int + min_solutions: int The minimum number of solutions before the boundary conditions are updated. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the default boundary conditions if bounds is None: - bounds = HPBoundaries(bounds_dict={}, log=True) + bounds = HPBoundaries( + bounds_dict={}, + use_log=True, + seed=seed, + dtype=dtype, + ) # Set all the arguments self.update_arguments( bounds=bounds, @@ -47,6 +65,8 @@ def __init__( sol_var=sol_var, bound_weight=bound_weight, min_solutions=min_solutions, + seed=seed, + dtype=dtype, **kwargs, ) @@ -56,25 +76,25 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): Therefore the variable transformation parameters are also updated. Parameters: - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. Returns: - self : The object itself. + self: The object itself. """ # Update the parameters used self.make_parameters_set(parameters) # Update the boundary conditions and get them self.bounds.update_bounds(model, X, Y, parameters) - bounds_dict = self.bounds.get_bounds(array=False) + bounds_dict = self.bounds.get_bounds(use_array=False) # Get length of the solution sol_len = len(self.sols) # If not enough solutions are given, then use given bounds @@ -87,16 +107,19 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): self.bounds_dict = {} for para in bounds_dict.keys(): # Get the solutions - sol_means = np.array([sol["hp"][para] for sol in self.sols]) + sol_means = array( + [sol["hp"][para] for sol in self.sols], + dtype=self.dtype, + ) # The mean and variance from the boundary conditions - bound_mean = np.sum(bounds_dict[para], axis=-1) + bound_mean = sum_(bounds_dict[para], axis=-1) bound_var = (0.5 * (bounds_dict[para][:, 1] - bound_mean)) ** 2 # Calculate the middle of the boundary conditions mean = ( - np.sum(sol_means, axis=0) + (self.bound_weight * bound_mean) + sum_(sol_means, axis=0) + (self.bound_weight * bound_mean) ) / n_eff # Calculate the variance of the solutions - var_sols = np.sum((sol_means - mean) ** 2, axis=0) + ( + var_sols = sum_((sol_means - mean) ** 2, axis=0) + ( self.sol_var * sol_len ) # Calculate the variance of the boundary conditions @@ -104,13 +127,24 @@ def update_bounds(self, model, X, Y, parameters, **kwargs): self.bound_weight * bound_var ) # Calculate the distance to the boundaries from the middle - bound_dist = 2.0 * np.sqrt((var_sols + var_bound) / n_eff) + bound_dist = 2.0 * sqrt((var_sols + var_bound) / n_eff) # Store the boundary conditions - self.bounds_dict[para] = np.array( - [mean - bound_dist, mean + bound_dist] + self.bounds_dict[para] = asarray( + [mean - bound_dist, mean + bound_dist], + dtype=self.dtype, ).T return self + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + self.bounds.set_dtype(dtype, **kwargs) + return self + + def set_seed(self, seed=None, **kwargs): + super().set_seed(seed, **kwargs) + self.bounds.set_seed(seed, **kwargs) + return self + def update_arguments( self, bounds=None, @@ -118,6 +152,8 @@ def update_arguments( sol_var=None, bound_weight=None, min_solutions=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -125,25 +161,38 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds : Boundary condition class + bounds: Boundary condition class A Boundary condition class that make the boundaries of the hyperparameters. - sols : list of dict + sols: list of dict The solutions of the hyperparameters from previous optimizations. - sol_var : float + sol_var: float The known variance of the Normal distribution used for the solutions. - bound_weight : int + bound_weight: int The weight of the given boundary conditions in terms of number of solution samples. - min_solutions : int + min_solutions: int The minimum number of solutions before the boundary conditions are updated. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) if bounds is not None: self.initiate_bounds_dict(bounds) if sols is not None: @@ -160,19 +209,17 @@ def initiate_bounds_dict(self, bounds, **kwargs): "Make and store the hyperparameter bounds." # Copy the boundary condition object self.bounds = bounds.copy() - self.bounds_dict = self.bounds.get_bounds(array=False) + self.bounds_dict = self.bounds.get_bounds(use_array=False) # Extract the hyperparameter names self.parameters_set = sorted(self.bounds_dict.keys()) - self.parameters = sum( - [ - [para] * len(self.bounds_dict[para]) - for para in self.parameters_set - ], - [], - ) + self.parameters = [ + para + for para in self.parameters_set + for _ in range(len(self.bounds_dict[para])) + ] # Make sure log-scale of the hyperparameters are used - if self.bounds.log is False: - raise Exception( + if self.bounds.use_log is False: + raise ValueError( "The Updating Boundaries need to " "use boundary conditions in the log-scale!" ) @@ -187,6 +234,8 @@ def get_arguments(self): sol_var=self.sol_var, bound_weight=self.bound_weight, min_solutions=self.min_solutions, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpfitter/fbpmgp.py b/catlearn/regression/gp/hpfitter/fbpmgp.py index 3c673a28..090e92cb 100644 --- a/catlearn/regression/gp/hpfitter/fbpmgp.py +++ b/catlearn/regression/gp/hpfitter/fbpmgp.py @@ -1,24 +1,54 @@ -import numpy as np -from numpy.linalg import eigh +from numpy import ( + asarray, + append, + argsort, + array, + diag, + einsum, + empty, + exp, + finfo, + full, + inf, + log, + matmul, + nanargmin, + nanmax, + pi, + triu_indices, + where, + zeros, +) +from numpy.linalg import eigh, LinAlgError +from numpy.random import default_rng, Generator, RandomState +from scipy.linalg import eigh as scipy_eigh from scipy.spatial.distance import pdist from scipy.optimize import OptimizeResult -from .hpfitter import HyperparameterFitter +import warnings +from .hpfitter import HyperparameterFitter, VariableTransformation class FBPMGP(HyperparameterFitter): + """ + This class is used to find the best Gaussian Process + that mimics the Full-Bayesian predictive distribution. + It only works with a Gaussian Process. + """ + def __init__( self, Q=None, n_test=50, ngrid=80, - bounds=None, + bounds=VariableTransformation(), get_prior_mean=False, + round_hp=None, + seed=None, + dtype=float, **kwargs, ): """ - Get the best Gaussian Process that mimics - the Full-Bayesian predictive distribution. - It only works with a Gaussian Process. + Initialize the class with its arguments. Parameters: Q: (M,D) array @@ -34,14 +64,19 @@ def __init__( of the hyperparameter. get_prior_mean: bool Whether to get the prior arguments in the solution. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set the default test points self.Q = None - # Set the default boundary conditions - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - self.bounds = VariableTransformation(bounds=None) # Set the solution form self.update_arguments( Q=Q, @@ -49,12 +84,15 @@ def __init__( ngrid=ngrid, bounds=bounds, get_prior_mean=get_prior_mean, + round_hp=round_hp, + seed=seed, + dtype=dtype, **kwargs, ) - def fit(self, X, Y, model, hp=None, pdis=None, **kwargs): + def fit(self, X, Y, model, hp=None, pdis=None, retrain=True, **kwargs): # Copy the model so it is not changed outside of the optimization - model = self.copy_model(model) + model = self.copy_model(model, retrain=retrain) # Get hyperparameters hp, theta, parameters = self.get_hyperparams(hp, model) # Find FBMGP solution @@ -71,6 +109,49 @@ def fit(self, X, Y, model, hp=None, pdis=None, **kwargs): sol = self.get_full_hp(sol, model) return sol + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the data type in the bounds + self.bounds.set_dtype(dtype, **kwargs) + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + # Set the seed in the bounds + self.bounds.set_seed(seed, **kwargs) + return self + def update_arguments( self, Q=None, @@ -78,6 +159,9 @@ def update_arguments( ngrid=None, bounds=None, get_prior_mean=None, + round_hp=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -98,6 +182,16 @@ def update_arguments( of the hyperparameter. get_prior_mean: bool Whether to get the prior arguments in the solution. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -112,11 +206,20 @@ def update_arguments( self.bounds = bounds.copy() if get_prior_mean is not None: self.get_prior_mean = get_prior_mean + if round_hp is not None or not hasattr(self, "round_hp"): + self.round_hp = round_hp + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) return self def get_hp(self, theta, parameters, **kwargs): "Make hyperparameter dictionary from lists." - theta, parameters = np.array(theta), np.array(parameters) + theta = asarray(theta) + parameters = asarray(parameters) parameters_set = sorted(set(parameters)) hp = { para_s: self.numeric_limits(theta[parameters == para_s]) @@ -124,12 +227,14 @@ def get_hp(self, theta, parameters, **kwargs): } return hp, parameters_set - def numeric_limits(self, array, dh=0.4 * np.log(np.finfo(float).max)): + def numeric_limits(self, theta, dh=None): """ Replace hyperparameters if they are outside of the numeric limits in log-space. """ - return np.where(-dh < array, np.where(array < dh, array, dh), -dh) + if dh is None: + dh = 0.4 * log(finfo(self.dtype).max) + return where(-dh < theta, where(theta < dh, theta, dh), -dh) def update_model(self, model, hp, **kwargs): "Update model." @@ -146,24 +251,25 @@ def kxx_corr(self, model, X, **kwargs): def add_correction(self, model, KXX, n_data, **kwargs): "Add noise correction to covariance matrix." - corr = model.get_correction(np.diag(KXX)) + corr = model.get_correction(diag(KXX)) if corr != 0.0: KXX[range(n_data), range(n_data)] += corr return KXX def y_prior(self, X, Y, model, L=None, low=None, **kwargs): - "Update prior and subtract target." - Y_p = Y.copy() + "Update prior and subtract to target." + Y_p = array(Y, dtype=self.dtype) model.update_priormean(X, Y_p, L=L, low=low, **kwargs) - use_derivatives = model.use_derivatives + get_derivatives = model.get_use_derivatives() pmean = model.get_priormean( X, Y_p, - get_derivatives=use_derivatives, + get_derivatives=get_derivatives, ) - if use_derivatives: - return (Y_p - pmean).T.reshape(-1, 1) - return (Y_p - pmean)[:, 0:1] + Y_p -= pmean + if get_derivatives: + return Y_p.T.reshape(-1, 1) + return Y_p[:, 0:1] def get_eig(self, model, X, Y, **kwargs): "Calculate the eigenvalues." @@ -172,33 +278,31 @@ def get_eig(self, model, X, Y, **kwargs): # Eigendecomposition try: D, U = eigh(KXX) - except Exception as e: - import logging - import scipy.linalg - - logging.error("An error occurred: %s", str(e)) + except LinAlgError: + warnings.warn( + "Eigendecomposition failed, using scipy.eigh instead." + ) # More robust but slower eigendecomposition - D, U = scipy.linalg.eigh(KXX, driver="ev") + D, U = scipy_eigh(KXX, driver="ev") # Subtract the prior mean to the training target Y_p = self.y_prior(X, Y, model, D=D, U=U) - UTY = (np.matmul(U.T, Y_p)).reshape(-1) ** 2 + UTY = (matmul(U.T, Y_p)).reshape(-1) ** 2 return D, U, Y_p, UTY, KXX, n_data def get_eig_without_Yp(self, model, X, Y_p, n_data, **kwargs): "Calculate the eigenvalues without using the prior mean." # Calculate the kernel with and without noise - KXX, n_data = self.kxx_corr(model, X) + KXX, _ = self.kxx_corr(model, X) # Eigendecomposition try: D, U = eigh(KXX) - except Exception as e: - import logging - import scipy.linalg - - logging.error("An error occurred: %s", str(e)) + except LinAlgError: + warnings.warn( + "Eigendecomposition failed, using scipy.eigh instead." + ) # More robust but slower eigendecomposition - D, U = scipy.linalg.eigh(KXX, driver="ev") - UTY = np.matmul(U.T, Y_p) + D, U = scipy_eigh(KXX, driver="ev") + UTY = matmul(U.T, Y_p) UTY2 = UTY.reshape(-1) ** 2 return D, U, UTY, UTY2, Y_p, KXX @@ -223,7 +327,7 @@ def get_grids( if para_bool[para]: grids[para] = lines[p].copy() else: - grids[para] = np.array([model_hp[para][0]]) + grids[para] = asarray([model_hp[para][0]], dtype=self.dtype) return grids def trapz_coef(self, grids, para_bool, **kwargs): @@ -231,16 +335,16 @@ def trapz_coef(self, grids, para_bool, **kwargs): cs = {} for para, pbool in para_bool.items(): if pbool: - cs[para] = np.log(self.trapz_append(grids[para])) + cs[para] = log(self.trapz_append(grids[para])) else: - cs[para] = np.array([0.0]) + cs[para] = asarray([0.0], dtype=self.dtype) return cs def prior_grid(self, grids, pdis=None, i=0, **kwargs): "Get prior distribution of hyperparameters on the grid." if pdis is None: return { - para: np.array([0.0] * len(grid)) + para: zeros((len(grid)), dtype=self.dtype) for para, grid in grids.items() } pr_grid = {} @@ -248,7 +352,7 @@ def prior_grid(self, grids, pdis=None, i=0, **kwargs): if para in pdis.keys(): pr_grid[para] = pdis[para].ln_pdf(grid) else: - pr_grid[para] = np.array([0.0] * len(grid)) + pr_grid[para] = zeros((len(grid)), dtype=self.dtype) return pr_grid def get_all_grids( @@ -285,23 +389,24 @@ def get_all_grids( def trapz_append(self, grid, **kwargs): "Get the weights in linear space from the trapezoidal rule." g1 = [grid[1] - grid[0]] - g2 = np.append(grid[2:] - grid[:-2], grid[-1] - grid[-2]) - return 0.5 * np.append(g1, g2) + g2 = append(grid[2:] - grid[:-2], grid[-1] - grid[-2]) + return 0.5 * append(g1, g2) def get_test_points(self, Q, X_tr, **kwargs): "Get the test point if they are not given." if Q is not None: return Q - i_sort = np.argsort(pdist(X_tr))[: self.n_test] - i_list, j_list = np.triu_indices(len(X_tr), k=1, m=None) + i_sort = argsort(pdist(X_tr))[: self.n_test] + i_list, j_list = triu_indices(len(X_tr), k=1, m=None) i_list, j_list = i_list[i_sort], j_list[i_sort] - r = np.random.uniform(low=0.01, high=0.99, size=(2, len(i_list))) - r = r / np.sum(r, axis=0) - Q = np.array( + r = self.rng.uniform(low=0.01, high=0.99, size=(2, len(i_list))) + r = r / r.sum(axis=0) + Q = asarray( [ X_tr[i] * r[0, k] + X_tr[j] * r[1, k] for k, (i, j) in enumerate(zip(i_list, j_list)) - ] + ], + dtype=self.dtype, ) return Q @@ -320,7 +425,7 @@ def get_test_KQ(self, model, Q, X_tr, use_derivatives=False, **kwargs): def get_prefactors(self, grids, n_data, **kwargs): "Get the prefactor values for log-likelihood." - prefactors = np.exp(2 * grids["prefactor"]).reshape(-1, 1) + prefactors = exp(2.0 * grids["prefactor"]).reshape(-1, 1) ln_prefactor = (n_data * grids["prefactor"]).reshape(-1, 1) return prefactors, ln_prefactor @@ -366,7 +471,7 @@ def get_all_eig_matrices( include_noise=False, ) KQX = model.get_kernel(Q, X, get_derivatives=get_derivatives) - UKQX = np.matmul(KQX, U) + UKQX = matmul(KQX, U) return D, UTY, UTY2, KQQ, UKQX def posterior_value( @@ -384,20 +489,20 @@ def posterior_value( **kwargs, ): "Get the posterior distribution value and add it to the existing sum." - nlp1 = 0.5 * np.sum(UTY2 / D_n, axis=1) - nlp2 = 0.5 * np.sum(np.log(D_n), axis=1) + nlp1 = 0.5 * (UTY2 / D_n).sum(axis=1) + nlp2 = 0.5 * log(D_n).sum(axis=1) like = -( (nlp1 / prefactors + ln_prefactor) + (nlp2 + ln2pi) ) + self.get_grid_sum(pr_grid, l_index) - like_max = np.nanmax(like) + like_max = nanmax(like) if like_max > lp_max: - ll_scale = np.exp(lp_max - like_max) + ll_scale = exp(lp_max - like_max) lp_max = like_max else: ll_scale = 1.0 like = like - lp_max - like = np.exp(like + self.get_grid_sum(cs, l_index)) - like_sum = like_sum * ll_scale + np.sum(like) + like = exp(like + self.get_grid_sum(cs, l_index)) + like_sum = like_sum * ll_scale + like.sum() return like_sum, like, lp_max, ll_scale def get_grid_sum(self, the_grids, l_index): @@ -410,8 +515,8 @@ def get_grid_sum(self, the_grids, l_index): def pred_unc(self, UKQX, UTY, D_n, KQQ, yp, **kwargs): "Make prediction mean and uncertainty from eigendecomposition." UKQXD = UKQX / D_n[:, None, :] - pred = yp + np.einsum("dij,ji->di", UKQXD, UTY, optimize=True) - var = KQQ - np.einsum("dij,ji->di", UKQXD, UKQX.T) + pred = yp + einsum("dij,ji->di", UKQXD, UTY, optimize=True) + var = KQQ - einsum("dij,ji->di", UKQXD, UKQX.T) return pred, var def update_df_ybar( @@ -429,19 +534,19 @@ def update_df_ybar( **kwargs, ): "Update the dict and add values to ybar and y2bar_ubar." - ybar = (ybar * ll_scale) + np.einsum("nj,pn->j", pred, like) + ybar = (ybar * ll_scale) + einsum("nj,pn->j", pred, like) y2bar_ubar = (y2bar_ubar * ll_scale) + ( - np.einsum("nj,pn->j", pred**2, like) - + np.einsum("nj,pn->j", var, prefactors * like) + einsum("nj,pn->j", pred**2, like) + + einsum("nj,pn->j", var, prefactors * like) ) # Store the hyperparameters and prediction mean and variance - df["length"] = np.append( + df["length"] = append( df["length"], - np.full(np.shape(noises), length), + full(noises.shape, length, dtype=self.dtype), ) - df["noise"] = np.append(df["noise"], noises) - df["pred"] = np.append(df["pred"], pred, axis=0) - df["var"] = np.append(df["var"], var, axis=0) + df["noise"] = append(df["noise"], noises) + df["pred"] = append(df["pred"], pred, axis=0) + df["var"] = append(df["var"], var, axis=0) return df, ybar, y2bar_ubar def evaluate_for_noise( @@ -471,7 +576,7 @@ def evaluate_for_noise( Evaluate log-posterior and update the data frame for all noise hyperparameter in grid simulatenously. """ - D_n = D + np.exp(2 * grids["noise"]).reshape(-1, 1) + D_n = D + exp(2.0 * grids["noise"]).reshape(-1, 1) # Calculate log-posterior like_sum, like, lp_max, ll_scale = self.posterior_value( like_sum, @@ -521,26 +626,25 @@ def get_solution( ybar = ybar / like_sum y2bar_ubar = y2bar_ubar / like_sum # Get the analytic solution to the prefactor - prefactor = np.mean( - (y2bar_ubar + (df["pred"] ** 2) - (2 * df["pred"] * ybar)) - / df["var"], - axis=1, - ) + prefactor = ( + (y2bar_ubar + (df["pred"] ** 2) - (2.0 * df["pred"] * ybar)) + / df["var"] + ).mean(axis=1) # Calculate all Kullback-Leibler divergences kl = 0.5 * ( - n_test * (1 + np.log(2 * np.pi)) - + (np.sum(np.log(df["var"]), axis=1) + n_test * np.log(prefactor)) + n_test * (1 + log(2.0 * pi)) + + (log(df["var"]).sum(axis=1) + n_test * log(prefactor)) ) # Find the best solution - i_min = np.nanargmin(kl) + i_min = nanargmin(kl) kl_min = kl[i_min] / n_test hp_best = dict( - length=np.array([df["length"][i_min]]), - noise=np.array([df["noise"][i_min]]), - prefactor=np.array([0.5 * np.log(prefactor[i_min])]), + length=asarray([df["length"][i_min]], dtype=self.dtype), + noise=asarray([df["noise"][i_min]], dtype=self.dtype), + prefactor=asarray([0.5 * log(prefactor[i_min])], dtype=self.dtype), ) theta = [hp_best[para] for para in hp_best.keys()] - theta = np.array(theta).reshape(-1) + theta = asarray(theta, dtype=self.dtype).reshape(-1) sol = { "fun": kl_min, "hp": hp_best, @@ -565,7 +669,6 @@ def fbpmgp( **kwargs, ): "Only works with the FBPMGP object function." - np.random.seed(12) # Update hyperparameters hp, parameters_set = self.get_hp(theta, parameters) model = self.update_model(model, hp) @@ -586,25 +689,25 @@ def fbpmgp( use_derivatives = model.use_derivatives yp = model.get_priormean( Q, - np.zeros((len(Q), len(Y[0]))), + zeros((len(Q), len(Y[0])), dtype=self.dtype), get_derivatives=use_derivatives, ) yp = yp.reshape(-1) n_data = len(Y_p) # Initialize fb df = { - key: np.array([]) for key in ["ll", "length", "noise", "prefactor"] + key: asarray([]) for key in ["ll", "length", "noise", "prefactor"] } if model.use_derivatives: - df["pred"] = np.empty((0, len(Q) * len(Y[0]))) - df["var"] = np.empty((0, len(Q) * len(Y[0]))) + df["pred"] = empty((0, len(Q) * len(Y[0])), dtype=self.dtype) + df["var"] = empty((0, len(Q) * len(Y[0])), dtype=self.dtype) else: - df["pred"] = np.empty((0, len(Q))) - df["var"] = np.empty((0, len(Q))) + df["pred"] = empty((0, len(Q)), dtype=self.dtype) + df["var"] = empty((0, len(Q)), dtype=self.dtype) like_sum, ybar, y2bar_ubar = 0.0, 0.0, 0.0 - lp_max = -np.inf + lp_max = -inf prefactors, ln_prefactor = self.get_prefactors(grids, n_data) - ln2pi = 0.5 * n_data * np.log(2 * np.pi) + ln2pi = 0.5 * n_data * log(2.0 * pi) for l_index, length in enumerate(grids["length"]): D, UTY, UTY2, KQQ, UKQX = self.get_all_eig_matrices( length, @@ -656,6 +759,8 @@ def get_arguments(self): ngrid=self.ngrid, bounds=self.bounds, get_prior_mean=self.get_prior_mean, + round_hp=self.round_hp, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpfitter/hpfitter.py b/catlearn/regression/gp/hpfitter/hpfitter.py index ba032edc..1e5c7bd7 100644 --- a/catlearn/regression/gp/hpfitter/hpfitter.py +++ b/catlearn/regression/gp/hpfitter/hpfitter.py @@ -1,51 +1,56 @@ -import numpy as np +from numpy import asarray, round as round_ +from ..optimizers.optimizer import FunctionEvaluation +from ..hpboundary.hptrans import VariableTransformation +from ..pdistributions.update_pdis import update_pdis class HyperparameterFitter: + """ + Hyperparameter fitter class for optimizing the hyperparameters + of a given objective function with a given optimizer. + """ + def __init__( self, func, - optimizer=None, - bounds=None, + optimizer=FunctionEvaluation(jac=False), + bounds=VariableTransformation(), use_update_pdis=False, get_prior_mean=False, use_stored_sols=False, + round_hp=None, + dtype=float, **kwargs, ): """ - Hyperparameter fitter object with an optimizer for optimizing - the hyperparameters on different given objective functions. + Initialize the hyperparameter fitter class. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - optimizer : Optimizer class + optimizer: Optimizer class A class with the used optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. Most of the global optimizers are using boundary conditions. The bounds in this class will be used for the optimizer and func. - use_update_pdis : bool + use_update_pdis: bool Whether to update the prior distributions of the hyperparameters with the given boundary conditions. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - use_stored_sols : bool + use_stored_sols: bool Whether to store the solutions. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # Set the default optimizer - if optimizer is None: - from ..optimizers.optimizer import FunctionEvaluation - - optimizer = FunctionEvaluation(jac=False) - # Set the default boundary conditions - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - self.bounds = VariableTransformation(bounds=None) # Set all the arguments self.update_arguments( func=func, @@ -54,35 +59,40 @@ def __init__( use_update_pdis=use_update_pdis, get_prior_mean=get_prior_mean, use_stored_sols=use_stored_sols, + round_hp=round_hp, + dtype=dtype, **kwargs, ) - def fit(self, X, Y, model, hp=None, pdis=None, **kwargs): + def fit(self, X, Y, model, hp=None, pdis=None, retrain=True, **kwargs): """ Optimize the hyperparameters. Parameters: - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - model : Model + model: Model The Machine Learning Model with kernel and prior that are optimized. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. + retrain: bool + Whether to retrain the model after the optimization. + The model is not copied if retrain is True. Returns: - dict : A solution dictionary with objective function value, + dict: A solution dictionary with objective function value, optimized hyperparameters, success statement, and number of used evaluations. """ # Copy the model so it is not changed outside of the optimization - model = self.copy_model(model) + model = self.copy_model(model, retrain=retrain) # Always reset the solution in the objective function self.reset_func() # Get hyperparameters @@ -109,6 +119,41 @@ def fit(self, X, Y, model, hp=None, pdis=None, **kwargs): self.store_sol(sol) return sol + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + self.dtype = dtype + self.func.set_dtype(dtype, **kwargs) + self.bounds.set_dtype(dtype, **kwargs) + self.optimizer.set_dtype(dtype, **kwargs) + return self + + def set_seed(self, seed, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.func.set_seed(seed, **kwargs) + self.bounds.set_seed(seed, **kwargs) + self.optimizer.set_seed(seed, **kwargs) + return self + def update_arguments( self, func=None, @@ -117,6 +162,8 @@ def update_arguments( use_update_pdis=None, get_prior_mean=None, use_stored_sols=None, + round_hp=None, + dtype=None, **kwargs, ): """ @@ -124,24 +171,30 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - optimizer : Optimizer class + optimizer: Optimizer class A class with the used optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. Most of the global optimizers are using boundary conditions. The bounds in this class will be used for the optimizer and func. - use_update_pdis : bool + use_update_pdis: bool Whether to update the prior distributions of the hyperparameters with the given boundary conditions. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - use_stored_sols : bool + use_stored_sols: bool Whether to store the solutions. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -158,14 +211,22 @@ def update_arguments( self.get_prior_mean = get_prior_mean if use_stored_sols is not None: self.use_stored_sols = use_stored_sols + if round_hp is not None or not hasattr(self, "round_hp"): + self.round_hp = round_hp + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) # Empty the stored solutions self.sols = [] # Make sure that the objective function gets the prior mean parameters self.func.update_arguments(get_prior_mean=self.get_prior_mean) return self - def copy_model(self, model, **kwargs): + def copy_model(self, model, retrain=True, **kwargs): "Make a copy of the model, so it is not overwritten." + # Do not copy the model if retrain is True + if retrain: + return model + # Copy the model if retrain is False return model.copy() def reset_func(self, **kwargs): @@ -201,11 +262,11 @@ def hp_to_theta(self, hp): a list of hyperparameter names. """ parameters_set = sorted(hp.keys()) - theta = sum([list(hp[para]) for para in parameters_set], []) - parameters = sum( - [[para] * len(hp[para]) for para in parameters_set], [] - ) - return np.array(theta), parameters + theta = [hp_v for para in parameters_set for hp_v in hp[para]] + parameters = [ + para for para in parameters_set for _ in range(len(hp[para])) + ] + return asarray(theta), parameters def update_bounds(self, model, X, Y, parameters, **kwargs): "Update the boundary condition class with the data." @@ -225,17 +286,21 @@ def update_pdis(self, pdis, model, X, Y, parameters, **kwargs): Update the prior distributions of the hyperparameters with the boundary conditions. """ - if self.use_update_pdis and pdis is not None: - from ..pdistributions.update_pdis import update_pdis - - pdis = update_pdis( - model, - parameters, - X, - Y, - bounds=self.bounds, - pdis=pdis, - ) + if pdis is not None: + pdis = { + para: pdis_p.set_dtype(self.dtype) + for para, pdis_p in pdis.items() + } + if self.use_update_pdis: + pdis = update_pdis( + model, + parameters, + X, + Y, + bounds=self.bounds, + pdis=pdis, + dtype=self.dtype, + ) return pdis def get_full_hp(self, sol, model, **kwargs): @@ -244,6 +309,11 @@ def get_full_hp(self, sol, model, **kwargs): that are optimized and within the model. """ sol["full hp"] = model.get_hyperparams() + # Round the hyperparameters if needed + if self.round_hp is not None: + for key, value in sol["hp"].items(): + sol["hp"][key] = round_(value, self.round_hp) + # Update the optimized hyperparameters sol["full hp"].update(sol["hp"]) return sol @@ -267,6 +337,8 @@ def get_arguments(self): use_update_pdis=self.use_update_pdis, get_prior_mean=self.get_prior_mean, use_stored_sols=self.use_stored_sols, + round_hp=self.round_hp, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/hpfitter/redhpfitter.py b/catlearn/regression/gp/hpfitter/redhpfitter.py index bfc2d451..9141b362 100644 --- a/catlearn/regression/gp/hpfitter/redhpfitter.py +++ b/catlearn/regression/gp/hpfitter/redhpfitter.py @@ -1,48 +1,64 @@ -import numpy as np +from numpy import inf from scipy.optimize import OptimizeResult -from .hpfitter import HyperparameterFitter +from .hpfitter import ( + FunctionEvaluation, + HyperparameterFitter, + VariableTransformation, +) class ReducedHyperparameterFitter(HyperparameterFitter): + """ + Hyperparameter fitter class for optimizing the hyperparameters + of a given objective function with a given optimizer. + The hyperparameters are only optimized when the training set size + is below a given number. + """ + def __init__( self, func, - optimizer=None, - bounds=None, + optimizer=FunctionEvaluation(jac=False), + bounds=VariableTransformation(), use_update_pdis=False, get_prior_mean=False, use_stored_sols=False, + round_hp=None, opt_tr_size=50, + dtype=float, **kwargs, ): """ - Hyperparameter fitter object with an optimizer for optimizing - the hyperparameters on different given objective functions. - The optimization of the hyperparameters are only performed when - the training set size is below a number. + Initialize the hyperparameter fitter class. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - optimizer : Optimizer class + optimizer: Optimizer class A class with the used optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. Most of the global optimizers are using boundary conditions. The bounds in this class will be used for the optimizer and func. - use_update_pdis : bool + use_update_pdis: bool Whether to update the prior distributions of the hyperparameters with the given boundary conditions. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - use_stored_sols : bool + use_stored_sols: bool Whether to store the solutions. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. opt_tr_size: int The maximum size of the training set before the hyperparameters are not optimized. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( func, @@ -51,20 +67,30 @@ def __init__( use_update_pdis=use_update_pdis, get_prior_mean=get_prior_mean, use_stored_sols=use_stored_sols, + round_hp=round_hp, opt_tr_size=opt_tr_size, + dtype=dtype, **kwargs, ) - def fit(self, X, Y, model, hp=None, pdis=None, **kwargs): + def fit(self, X, Y, model, hp=None, pdis=None, retrain=True, **kwargs): # Check if optimization is needed if len(X) <= self.opt_tr_size: # Optimize the hyperparameters - return super().fit(X, Y, model, hp=hp, pdis=pdis, **kwargs) + return super().fit( + X, + Y, + model, + hp=hp, + pdis=pdis, + retrain=retrain, + **kwargs, + ) # Use existing hyperparameters hp, theta, parameters = self.get_hyperparams(hp, model) # Do not optimize hyperparameters sol = { - "fun": np.inf, + "fun": inf, "x": theta, "hp": hp, "success": False, @@ -85,7 +111,9 @@ def update_arguments( use_update_pdis=None, get_prior_mean=None, use_stored_sols=None, + round_hp=None, opt_tr_size=None, + dtype=None, **kwargs, ): """ @@ -93,49 +121,49 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - optimizer : Optimizer class + optimizer: Optimizer class A class with the used optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. Most of the global optimizers are using boundary conditions. The bounds in this class will be used for the optimizer and func. - use_update_pdis : bool + use_update_pdis: bool Whether to update the prior distributions of the hyperparameters with the given boundary conditions. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - use_stored_sols : bool + use_stored_sols: bool Whether to store the solutions. + round_hp: int (optional) + The number of decimals to round the hyperparameters to. + If None, the hyperparameters are not rounded. opt_tr_size: int The maximum size of the training set before the hyperparameters are not optimized. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if func is not None: - self.func = func.copy() - if optimizer is not None: - self.optimizer = optimizer.copy() - if bounds is not None: - self.bounds = bounds.copy() - if use_update_pdis is not None: - self.use_update_pdis = use_update_pdis - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean - if use_stored_sols is not None: - self.use_stored_sols = use_stored_sols + super().update_arguments( + func=func, + optimizer=optimizer, + bounds=bounds, + use_update_pdis=use_update_pdis, + get_prior_mean=get_prior_mean, + use_stored_sols=use_stored_sols, + round_hp=round_hp, + dtype=dtype, + ) if opt_tr_size is not None: self.opt_tr_size = opt_tr_size - # Empty the stored solutions - self.sols = [] - # Make sure that the objective function gets the prior mean parameters - self.func.update_arguments(get_prior_mean=self.get_prior_mean) return self def get_arguments(self): @@ -148,7 +176,9 @@ def get_arguments(self): use_update_pdis=self.use_update_pdis, get_prior_mean=self.get_prior_mean, use_stored_sols=self.use_stored_sols, + round_hp=self.round_hp, opt_tr_size=self.opt_tr_size, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/kernel/kernel.py b/catlearn/regression/gp/kernel/kernel.py index 01fd9363..4904bf78 100644 --- a/catlearn/regression/gp/kernel/kernel.py +++ b/catlearn/regression/gp/kernel/kernel.py @@ -1,17 +1,22 @@ -import numpy as np +from numpy import asarray, array, finfo from scipy.spatial.distance import pdist, cdist class Kernel: + """ + The kernel class with hyperparameters. + """ + def __init__( self, use_derivatives=False, use_fingerprint=False, hp={}, + dtype=float, **kwargs, ): """ - The Kernel class with hyperparameters. + Initialize the kernel class. Parameters: use_derivatives: bool @@ -22,14 +27,17 @@ def __init__( A dictionary of the hyperparameters in the log-space. The hyperparameters should be given as flatten arrays, like hp=dict(length=np.array([-0.7])). + dtype: type + The data type of the arrays. """ # Set the default hyperparameters - self.hp = dict(length=np.array([-0.7])) + self.hp = dict(length=asarray([-0.7], dtype=dtype)) # Set all the arguments self.update_arguments( use_derivatives=use_derivatives, use_fingerprint=use_fingerprint, hp=hp, + dtype=dtype, **kwargs, ) @@ -44,23 +52,23 @@ def __call__( Make the kernel matrix. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. - features2 : (M,D) array or (M) list of fingerprint objects + features2: (M,D) array or (M) list of fingerprint objects Features with M data points and D dimensions. If it is not given a squared kernel from features is generated. get_derivatives: bool Whether to predict derivatives of target. Returns: - KXX : array + KXX: array The symmetric kernel matrix if features2=None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. The number of columns in the array is N, or N*(D+1) if use_derivatives=True. or - KQX : array + KQX: array The kernel matrix if features2 is not None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. @@ -81,7 +89,7 @@ def diag(self, features, get_derivatives=True, **kwargs): Get the diagonal kernel vector. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. get_derivatives: bool Whether to predict derivatives of target. @@ -97,7 +105,7 @@ def diag_deriv(self, features, **kwargs): Get the derivative of the diagonal kernel vector wrt. the features. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. Returns: @@ -110,14 +118,14 @@ def get_gradients(self, features, hp, KXX, correction=True, **kwargs): Get the gradients of the kernel matrix wrt. to the hyperparameters. Parameters: - features : (N,D) array + features: (N,D) array Features with N data points and D dimensions. - hp : list + hp: list A list of the string names of the hyperparameters that are optimized. - KXX : (N,N) array + KXX: (N,N) array The kernel matrix of training data. - correction : bool + correction: bool Whether the noise correction is used. Returns: @@ -139,9 +147,9 @@ def set_hyperparams(self, new_params, **kwargs): self: The updated object itself. """ if "length" in new_params: - self.hp["length"] = np.array( + self.hp["length"] = array( new_params["length"], - dtype=float, + dtype=self.dtype, ).reshape(-1) return self @@ -159,7 +167,7 @@ def get_hp_dimension(self, features=None, **kwargs): Get the dimension of the length-scale hyperparameter. Parameters: - features : (N,D) array or (N) list of fingerprint objects or None + features: (N,D) array or (N) list of fingerprint objects or None Features with N data points. Returns: @@ -175,11 +183,59 @@ def get_use_fingerprint(self): "Get whether a fingerprint is used as the features." return self.use_fingerprint + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + self.dtype = dtype + # Set the machine precision + self.eps = 1.1 * finfo(self.dtype).eps + # Set the data type of the hyperparameters + self.set_hyperparams(self.hp) + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use the derivatives of the targets. + + Parameters: + use_derivatives: bool + Use derivatives/gradients for training and predictions. + + Returns: + self: The updated object itself. + """ + # Set whether to use derivatives for the target + self.use_derivatives = use_derivatives + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether to use the fingerprint instance. + + Parameters: + use_fingerprint: bool + Use fingerprint instance as features. + + Returns: + self: The updated object itself. + """ + self.use_fingerprint = use_fingerprint + return self + def update_arguments( self, use_derivatives=None, use_fingerprint=None, hp=None, + dtype=None, **kwargs, ): """ @@ -195,14 +251,18 @@ def update_arguments( A dictionary of the hyperparameters in the log-space. The hyperparameters should be given as flatten arrays, like hp=dict(length=np.array([-0.7])). + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ if use_derivatives is not None: - self.use_derivatives = use_derivatives + self.set_use_derivatives(use_derivatives) if use_fingerprint is not None: - self.use_fingerprint = use_fingerprint + self.set_use_fingerprint(use_fingerprint) + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) if hp is not None: self.set_hyperparams(hp) return self @@ -212,11 +272,11 @@ def get_KXX(self, features, **kwargs): Make the symmetric kernel matrix. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. Returns: - KXX : array + KXX: array The symmetric kernel matrix if features2=None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. @@ -230,16 +290,16 @@ def get_KQX(self, features, features2, get_derivatives=True, **kwargs): Make the kernel matrix. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. - features2 : (M,D) array or (M) list of fingerprint objects + features2: (M,D) array or (M) list of fingerprint objects Features with M data points and D dimensions. If it is not given a squared kernel from features is generated. get_derivatives: bool Whether to predict derivatives of target. Returns: - KQX : array + KQX: array The kernel matrix if features2 is not None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. @@ -250,10 +310,22 @@ def get_KQX(self, features, features2, get_derivatives=True, **kwargs): def get_arrays(self, features, features2=None, **kwargs): "Get the feature matrix from the fingerprint." - X = np.array([feature.get_vector() for feature in features]) + if self.use_fingerprint: + X = asarray( + [feature.get_vector() for feature in features], + dtype=self.dtype, + ) + else: + X = array(features, dtype=self.dtype) if features2 is None: return X - Q = np.array([feature.get_vector() for feature in features2]) + if self.use_fingerprint: + Q = asarray( + [feature.get_vector() for feature in features2], + dtype=self.dtype, + ) + else: + Q = array(features2, dtype=self.dtype) return X, Q def get_symmetric_absolute_distances( @@ -266,7 +338,8 @@ def get_symmetric_absolute_distances( Calculate the symmetric absolute distance matrix in (scaled) feature space. """ - return pdist(features, metric=metric) + D = pdist(features, metric=metric) + return asarray(D, dtype=self.dtype) def get_absolute_distances( self, @@ -276,7 +349,8 @@ def get_absolute_distances( **kwargs, ): "Calculate the absolute distance matrix in (scaled) feature space." - return cdist(features, features2, metric=metric) + D = cdist(features, features2, metric=metric) + return asarray(D, dtype=self.dtype) def get_feature_dimension(self, features, **kwargs): "Get the dimension of the features." @@ -287,10 +361,14 @@ def get_feature_dimension(self, features, **kwargs): def get_fp_deriv(self, features, dim=None, **kwargs): "Get the derivatives of all the fingerprints." if dim is None: - return np.array( - [fp.get_derivatives() for fp in features] + return asarray( + [fp.get_derivatives() for fp in features], + dtype=self.dtype, ).transpose((2, 0, 1)) - return np.array([fp.get_derivatives(dim) for fp in features]) + return asarray( + [fp.get_derivatives(dim) for fp in features], + dtype=self.dtype, + ) def get_derivative_dimension(self, features, **kwargs): "Get the dimension of the features." @@ -305,6 +383,7 @@ def get_arguments(self): use_derivatives=self.use_derivatives, use_fingerprint=self.use_fingerprint, hp=self.hp, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/kernel/se.py b/catlearn/regression/gp/kernel/se.py index 3e55ed06..714083cb 100644 --- a/catlearn/regression/gp/kernel/se.py +++ b/catlearn/regression/gp/kernel/se.py @@ -1,48 +1,35 @@ -import numpy as np +from numpy import ( + append, + asarray, + einsum, + exp, + diag, + diagonal, + fill_diagonal, + ones, + tile, + transpose, + zeros, +) from scipy.spatial.distance import squareform from .kernel import Kernel class SE(Kernel): - def __init__( - self, - use_derivatives=False, - use_fingerprint=False, - hp={}, - **kwargs, - ): - """ - The Kernel class with hyperparameters. - Squared exponential or radial basis kernel class. - - Parameters: - use_derivatives: bool - Whether to use the derivatives of the targets. - use_fingerprint: bool - Whether fingerprint objects is given or arrays. - hp: dict - A dictionary of the hyperparameters in the log-space. - The hyperparameters should be given as flatten arrays, - like hp=dict(length=np.array([-0.7])). - """ - super().__init__( - use_derivatives=use_derivatives, - use_fingerprint=use_fingerprint, - hp=hp, - **kwargs, - ) + """ + The Squared exponential or radial basis function kernel class + with hyperparameters. + """ def get_KXX(self, features, **kwargs): # Scale features or fingerprints with their length-scales - if self.use_fingerprint: - X = self.get_arrays(features) * np.exp(-self.hp["length"][0]) - else: - X = features * np.exp(-self.hp["length"][0]) + X = self.get_arrays(features) + X *= exp(-self.hp["length"][0]) # Calculate the symmetric scaled distance matrix D = self.get_symmetric_absolute_distances(X, metric="sqeuclidean") # Calculate the normal covariance matrix - K = squareform(np.exp((-0.5) * D)) - np.fill_diagonal(K, 1.0) + K = squareform(exp((-0.5) * D)) + fill_diagonal(K, 1.0) # Whether to the extended covariance matrix for derivative of targets if self.use_derivatives: if self.use_fingerprint: @@ -52,16 +39,12 @@ def get_KXX(self, features, **kwargs): def get_KQX(self, features, features2, get_derivatives=True, **kwargs): # Scale features or fingerprints with their length-scales - length_scale = np.exp(-self.hp["length"][0]) - if self.use_fingerprint: - Q, X = self.get_arrays(features, features2) - Q = Q * length_scale - X = X * length_scale - else: - Q = features * length_scale - X = features2 * length_scale + length_scale = exp(-self.hp["length"][0]) + Q, X = self.get_arrays(features, features2) + Q *= length_scale + X *= length_scale D = self.get_absolute_distances(Q, X, metric="sqeuclidean") - K = np.exp((-0.5) * D) + K = exp((-0.5) * D) if get_derivatives or self.use_derivatives: if self.use_fingerprint: return self.get_KQX_ext_fp( @@ -99,15 +82,15 @@ def get_KXX_ext(self, features, X, D, K, **kwargs): The covariance matrix without derivatives of the features. Returns: - (N*D+N,N*D+N) array : The extended symmetric kernel matrix. + (N*D+N,N*D+N) array: The extended symmetric kernel matrix. """ # Get dimensions - nd1, xdim = np.shape(X) + nd1, xdim = X.shape nd1x = nd1 * xdim nd1x1 = nd1x + nd1 # Get the derivative and hessian of the scaled distance matrix dDpre, dD = self.get_distance_derivative(X, X, nd1, nd1, xdim, axis=0) - ddDpre = -2.0 * np.exp(-2 * self.hp["length"][0]) + ddDpre = -2.0 * exp(-2 * self.hp["length"][0]) # The first derivative of the kernel dKpre, dK = self.get_derivative_K(K) dKdD = (-dDpre * dKpre) * dK @@ -116,27 +99,27 @@ def get_KXX_ext(self, features, X, D, K, **kwargs): ddKdD = ((-dDpre * dDpre * ddKpre) * ddK) * dD dKddD = (ddDpre * dKpre) * dK # Calculate the full symmetric kernel matrix - Kext = np.zeros((nd1x1, nd1x1)) + Kext = zeros((nd1x1, nd1x1), dtype=self.dtype) Kext[:nd1, :nd1] = K.copy() # Derivative part - Kext[:nd1, nd1:] = np.transpose( + Kext[:nd1, nd1:] = transpose( dKdD * dD, (1, 0, 2), ).reshape(nd1, nd1x) Kext[nd1:, :nd1] = Kext[:nd1, nd1:].T # Hessian part + xdimm = xdim - 1 for d1 in range(1, xdim): nd1d1 = nd1 * d1 nd1d11 = nd1d1 + nd1 - Kext[nd1d1:nd1d11, nd1d11:] = np.transpose( - ddKdD[d1:] * dD[d1 - 1], + d1m = d1 - 1 + Kext[nd1d1:nd1d11, nd1d11:] = transpose( + ddKdD[d1:] * dD[d1m], (1, 0, 2), ).reshape(nd1, nd1 * (xdim - d1)) Kext[nd1d11:, nd1d1:nd1d11] = Kext[nd1d1:nd1d11, nd1d11:].T - Kext[nd1d1:nd1d11, nd1d1:nd1d11] = ( - ddKdD[d1 - 1] * dD[d1 - 1] + dKddD - ) - Kext[nd1x:nd1x1, nd1x:nd1x1] = (ddKdD[xdim - 1] * dD[xdim - 1]) + dKddD + Kext[nd1d1:nd1d11, nd1d1:nd1d11] = ddKdD[d1m] * dD[d1m] + dKddD + Kext[nd1x:nd1x1, nd1x:nd1x1] = (ddKdD[xdimm] * dD[xdimm]) + dKddD return Kext def get_KXX_ext_fp(self, features, X, D, K, **kwargs): @@ -154,7 +137,7 @@ def get_KXX_ext_fp(self, features, X, D, K, **kwargs): The covariance matrix without derivatives of the features. Returns: - (N*Dx+N,N*Dx+N) array : The extended symmetric kernel matrix. + (N*Dx+N,N*Dx+N) array: The extended symmetric kernel matrix. """ # Get dimensions nd1 = len(X) @@ -176,29 +159,30 @@ def get_KXX_ext_fp(self, features, X, D, K, **kwargs): dKdD = (-dDpre * dKpre) * dK # The hessian of the kernel ddKpre, ddK = self.get_hessian_K(K) - ddKdD = ((dDpre * dDpre * ddKpre) * ddK) * np.transpose(dD, (0, 2, 1)) + ddKdD = ((dDpre * dDpre * ddKpre) * ddK) * transpose(dD, (0, 2, 1)) dKddD = (ddDpre * dKpre) * dK # Calculate the full symmetric kernel matrix - Kext = np.zeros((nd1x1, nd1x1)) + Kext = zeros((nd1x1, nd1x1), dtype=self.dtype) Kext[:nd1, :nd1] = K.copy() # Derivative part - Kext[:nd1, nd1:] = np.transpose( + Kext[:nd1, nd1:] = transpose( dKdD * dD, (1, 0, 2), ).reshape(nd1, nd1x) Kext[nd1:, :nd1] = Kext[:nd1, nd1:].T # Hessian part + xdimm = xdim - 1 for d1 in range(1, xdim): nd1d1 = nd1 * d1 nd1d11 = nd1d1 + nd1 - Kext[nd1d1:nd1d11, nd1d1:] = np.transpose( - (ddKdD[d1 - 1] * dD[d1 - 1 :]) - + (dKddD * ddD[d1 - 1, d1 - 1 :]), + d1m = d1 - 1 + Kext[nd1d1:nd1d11, nd1d1:] = transpose( + (ddKdD[d1m] * dD[d1m:]) + (dKddD * ddD[d1m, d1m:]), (1, 0, 2), ).reshape(nd1, nd1 * (xdim - d1 + 1)) Kext[nd1d11:, nd1d1:nd1d11] = Kext[nd1d1:nd1d11, nd1d11:].T - Kext[nd1x:nd1x1, nd1x:nd1x1] = (ddKdD[xdim - 1] * dD[xdim - 1]) + ( - dKddD * ddD[xdim - 1, xdim - 1 :] + Kext[nd1x:nd1x1, nd1x:nd1x1] = (ddKdD[xdimm] * dD[xdimm]) + ( + dKddD * ddD[xdimm, xdimm:] ) return Kext @@ -233,15 +217,15 @@ def get_KQX_ext( Whether to predict derivatives of target. Returns: - (M*D+N,N*D+N) array : The extended kernel matrix. + (M*D+N,N*D+N) array: The extended kernel matrix. """ # Get dimensions nd1 = len(Q) - nd2, xdim = np.shape(X) + nd2, xdim = X.shape nrows = nd1 * (xdim + 1) if get_derivatives else nd1 ncol = nd2 * (xdim + 1) if self.use_derivatives else nd2 # The full kernel matrix - Kext = np.zeros((nrows, ncol)) + Kext = zeros((nrows, ncol), dtype=self.dtype) Kext[:nd1, :nd2] = K.copy() # Get the derivative of the scaled distance matrix dDpre, dD = self.get_distance_derivative(Q, X, nd1, nd2, xdim, axis=0) @@ -251,7 +235,7 @@ def get_KQX_ext( btensor = dKdD * dD if self.use_derivatives: # Derivative part of X - Kext[:nd1, nd2:] = np.transpose( + Kext[:nd1, nd2:] = transpose( btensor, (1, 0, 2), ).reshape(nd1, nd2 * xdim) @@ -262,11 +246,11 @@ def get_KQX_ext( if self.use_derivatives: ddKpre, ddK = self.get_hessian_K(K) ddKdD = ((-dDpre * dDpre * ddKpre) * ddK) * dD - ddDpre = -2.0 * np.exp(-2 * self.hp["length"][0]) + ddDpre = -2.0 * exp(-2 * self.hp["length"][0]) dKddD = (ddDpre * dKpre) * dK btensor = ddKdD[:, None, :, :] * dD btensor[range(xdim), range(xdim), :, :] += dKddD - Kext[nd1:, nd2:] = np.transpose( + Kext[nd1:, nd2:] = transpose( btensor, (0, 2, 1, 3), ).reshape(nd1 * xdim, nd2 * xdim) @@ -303,7 +287,7 @@ def get_KQX_ext_fp( Whether to predict derivatives of target. Returns: - (M*Dx+N,N*Dx+N) array : The extended kernel matrix. + (M*Dx+N,N*Dx+N) array: The extended kernel matrix. """ # Get dimensions nd1, nd2 = len(Q), len(X) @@ -311,7 +295,7 @@ def get_KQX_ext_fp( nrows = nd1 * (xdim + 1) if get_derivatives else nd1 ncol = nd2 * (xdim + 1) if self.use_derivatives else nd2 # The full kernel matrix - Kext = np.zeros((nrows, ncol)) + Kext = zeros((nrows, ncol), dtype=self.dtype) Kext[:nd1, :nd2] = K.copy() # The first derivative of the kernel dKpre, dK = self.get_derivative_K(K) @@ -326,7 +310,7 @@ def get_KQX_ext_fp( **kwargs, ) dKdD = (dDpre2 * dKpre) * dK - Kext[:nd1, nd2:] = np.transpose( + Kext[:nd1, nd2:] = transpose( dKdD * dD2, (1, 0, 2), ).reshape(nd1, nd2 * xdim) @@ -352,8 +336,8 @@ def get_KQX_ext_fp( ddKpre, ddK = self.get_hessian_K(K) ddKdD = ((dDpre1 * dDpre2 * ddKpre) * ddK) * dD1 dKddD = (ddDpre * dKpre) * dK - Kext[nd1:, nd2:] = np.transpose( - np.einsum("ijk,ljk->iljk", ddKdD, dD2, optimize=True) + Kext[nd1:, nd2:] = transpose( + einsum("ijk,ljk->iljk", ddKdD, dD2, optimize=True) + (ddD * dKddD), (0, 2, 1, 3), ).reshape(nd1 * xdim, nd2 * xdim) @@ -372,7 +356,7 @@ def get_derivative_K(self, K, **kwargs): The distance matrix contains one of the length scales. Parameters: - K : (N,M) array + K: (N,M) array The kernel matrix without derivatives. Returns: @@ -390,7 +374,7 @@ def get_hessian_K(self, K, **kwargs): The distance matrices contain one of the length scales. Parameters: - K : (N,M) array + K: (N,M) array The kernel matrix without derivatives. Returns: @@ -402,24 +386,24 @@ def get_hessian_K(self, K, **kwargs): def diag(self, features, get_derivatives=True, **kwargs): nd1 = len(features) - K_diag = np.ones(nd1) + K_diag = ones(nd1, dtype=self.dtype) if get_derivatives: if self.use_fingerprint: fp_deriv = self.get_fp_deriv(features) - Kdd_diag = np.einsum( + Kdd_diag = einsum( "dij,dij->di", fp_deriv, fp_deriv, optimize=True, ).reshape(-1) - return np.append( + return append( K_diag, - np.exp(-2.0 * self.hp["length"][0]) * Kdd_diag, + exp(-2.0 * self.hp["length"][0]) * Kdd_diag, ) - return np.append( + return append( K_diag, - np.exp(-2.0 * self.hp["length"][0]) - * np.ones(nd1 * len(features[0])), + exp(-2.0 * self.hp["length"][0]) + * ones(nd1 * len(features[0]), dtype=self.dtype), ) return K_diag @@ -429,10 +413,8 @@ def diag_deriv(self, features, **kwargs): def get_gradients(self, features, hp, KXX, correction=True, **kwargs): hp_deriv = {} if "length" in hp: - if self.use_fingerprint: - X = self.get_arrays(features) * np.exp(-self.hp["length"][0]) - else: - X = features * np.exp(-self.hp["length"][0]) + X = self.get_arrays(features) + X *= exp(-self.hp["length"][0]) D = squareform( self.get_symmetric_absolute_distances(X, metric="sqeuclidean") ) @@ -447,11 +429,11 @@ def get_gradients(self, features, hp, KXX, correction=True, **kwargs): nd1x1 = nd1x + nd1 # Get the gradient of the kernel K = KXX[:nd1, :nd1].copy() - K_diag = np.diag(KXX) + K_diag = diag(KXX) Kd = KXX.copy() Kd[:nd1, :nd1] = Kd[:nd1, :nd1] * D D2 = D - 2 - Kd[:nd1, nd1:] = Kd[:nd1, nd1:] * np.tile(D2, (1, xdim)) + Kd[:nd1, nd1:] *= tile(D2, (1, xdim)) Kd[nd1:, :nd1] = Kd[:nd1, nd1:].T ddKpre, ddK = self.get_hessian_K(K) if self.use_fingerprint: @@ -463,7 +445,7 @@ def get_gradients(self, features, hp, KXX, correction=True, **kwargs): axis=0, **kwargs, ) - ddKdD = ((dDpre * dDpre * ddKpre) * ddK) * np.transpose( + ddKdD = ((dDpre * dDpre * ddKpre) * ddK) * transpose( dD, (0, 2, 1), ) @@ -477,30 +459,29 @@ def get_gradients(self, features, hp, KXX, correction=True, **kwargs): axis=0, ) ddKdD = ((-dDpre * dDpre * ddKpre) * ddK) * dD + xdimm = xdim - 1 for d1 in range(1, xdim): nd1d1 = nd1 * d1 nd1d11 = nd1d1 + nd1 - ddKdDdD = 2 * np.transpose( - ddKdD[d1 - 1] * dD[d1 - 1 :], + d1m = d1 - 1 + ddKdDdD = 2.0 * transpose( + ddKdD[d1m] * dD[d1m:], (1, 0, 2), ).reshape(nd1, nd1 * (xdim - d1 + 1)) - Kd[nd1d1:nd1d11, nd1d1:] = ( - Kd[nd1d1:nd1d11, nd1d1:] - * np.tile(D2, (1, xdim - d1 + 1)) - ) - ddKdDdD + Kd[nd1d1:nd1d11, nd1d1:] *= tile(D2, (1, xdim - d1 + 1)) + Kd[nd1d1:nd1d11, nd1d1:] -= ddKdDdD Kd[nd1d11:, nd1d1:nd1d11] = Kd[nd1d1:nd1d11, nd1d11:].T - Kd[nd1x:nd1x1, nd1x:nd1x1] = Kd[ - nd1x:nd1x1, nd1x:nd1x1 - ] * D2 - (2 * ddKdD[xdim - 1] * dD[xdim - 1]) + Kd[nd1x:nd1x1, nd1x:nd1x1] *= D2 + Kd[nd1x:nd1x1, nd1x:nd1x1] -= 2.0 * ddKdD[xdimm] * dD[xdimm] if correction: Kd[range(nd1x), range(nd1x)] += ( - (1 / (1 / (2.3e-16) - (len(K_diag) ** 2))) - * (2 * np.sum(K_diag)) - * (-2 * np.sum(K_diag[nd1:])) + (1.0 / (1.0 / self.eps - (len(K_diag) ** 2))) + * (2.0 * K_diag.sum()) + * (-2.0 * K_diag[nd1:].sum()) ) else: Kd = D * KXX - hp_deriv["length"] = np.array([Kd]) + hp_deriv["length"] = asarray([Kd]) return hp_deriv def get_distance_derivative(self, Q, X, nd1, nd2, dim, axis=0, **kwargs): @@ -508,7 +489,7 @@ def get_distance_derivative(self, Q, X, nd1, nd2, dim, axis=0, **kwargs): Get the derivative of the scaled distance matrix wrt. the features/fingerprint. """ - dDpre = 2.0 * np.exp(-self.hp["length"][0]) + dDpre = 2.0 * exp(-self.hp["length"][0]) if axis != 0: dDpre = -dDpre return dDpre, Q.T.reshape(dim, nd1, 1) - X.T.reshape(dim, 1, nd2) @@ -525,20 +506,20 @@ def get_distance_derivative_fp( Get the derivative of the distance matrix wrt. the features/fingerprint. """ - dDpre = 2.0 * np.exp(-self.hp["length"][0]) + dDpre = 2.0 * exp(-self.hp["length"][0]) if axis != 0: dDpre = -dDpre if X is None: - Q_chain = np.einsum("lj,ikj->ilk", Q, fp_deriv) - dQ = Q_chain - np.diagonal(Q_chain, axis1=1, axis2=2)[:, None, :] + Q_chain = einsum("lj,ikj->ilk", Q, fp_deriv) + dQ = Q_chain - diagonal(Q_chain, axis1=1, axis2=2)[:, None, :] return dDpre, dQ if axis == 0: - Q_chain = np.einsum("kj,ikj->ik", Q, fp_deriv) - X_chain = np.einsum("lj,ikj->ikl", X, fp_deriv) + Q_chain = einsum("kj,ikj->ik", Q, fp_deriv) + X_chain = einsum("lj,ikj->ikl", X, fp_deriv) dQ = Q_chain[:, :, None] - X_chain return dDpre, dQ - Q_chain = np.einsum("lj,ikj->ilk", Q, fp_deriv) - X_chain = np.einsum("kj,ikj->ik", X, fp_deriv) + Q_chain = einsum("lj,ikj->ilk", Q, fp_deriv) + X_chain = einsum("kj,ikj->ik", X, fp_deriv) dQ = Q_chain - X_chain[:, None, :] return dDpre, dQ @@ -547,7 +528,7 @@ def get_distance_hessian(self, **kwargs): Get the derivative of the scaled distance matrix wrt. the features/fingerprint. """ - dDpre = -2.0 * np.exp(-2 * self.hp["length"][0]) + dDpre = -2.0 * exp(-2 * self.hp["length"][0]) return dDpre, 1.0 def get_distance_hessian_fp(self, fp_deriv1, fp_deriv2, **kwargs): @@ -555,8 +536,8 @@ def get_distance_hessian_fp(self, fp_deriv1, fp_deriv2, **kwargs): Get the derivative of the scaled distance matrix wrt. the features/fingerprint. """ - dDpre = -2.0 * np.exp(-2 * self.hp["length"][0]) - hes_fp = np.einsum( + dDpre = -2.0 * exp(-2 * self.hp["length"][0]) + hes_fp = einsum( "dji,eki->dejk", fp_deriv1, fp_deriv2, diff --git a/catlearn/regression/gp/means/constant.py b/catlearn/regression/gp/means/constant.py index 4b4a9e8c..f36bd3ad 100644 --- a/catlearn/regression/gp/means/constant.py +++ b/catlearn/regression/gp/means/constant.py @@ -1,48 +1,55 @@ -import numpy as np +from numpy import full, zeros from .prior import Prior class Prior_constant(Prior): - def __init__(self, yp=0.0, add=0.0, **kwargs): + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is a constant from the target values + if given else it is 0. + A value can be added to the constant. + """ + + def __init__(self, yp=0.0, add=0.0, dtype=float, **kwargs): """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is a constant from the target values - if given else it is 0. - A value can be added to the constant. + Initialize the prior mean. Parameters: - yp : float + yp: float The prior mean constant - add : float + add: float A value added to the found prior mean from data. + dtype: type + The data type of the arrays. """ - self.update_arguments(yp=yp, add=add, **kwargs) + self.update_arguments(yp=yp, add=add, dtype=dtype, **kwargs) def get(self, features, targets, get_derivatives=True, **kwargs): if get_derivatives: - yp = np.zeros(targets.shape) + yp = zeros(targets.shape, dtype=self.dtype) yp[:, 0] = self.prior_mean return yp - return np.full(targets.shape, self.prior_mean) + return full(targets.shape, self.prior_mean, dtype=self.dtype) def get_parameters(self, **kwargs): return dict(yp=self.yp, add=self.add) - def update_arguments(self, yp=None, add=None, **kwargs): + def update_arguments(self, yp=None, add=None, dtype=None, **kwargs): """ Update the class with its arguments. The existing arguments are used if they are not given. Parameters: - yp : float + yp: float The prior mean constant - add : float + add: float A value added to the found prior mean from data. Returns: self: The updated object itself. """ + super().update_arguments(dtype=dtype) if add is not None: self.add = add if yp is not None: @@ -53,7 +60,7 @@ def update_arguments(self, yp=None, add=None, **kwargs): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(yp=self.yp, add=self.add) + arg_kwargs = dict(yp=self.yp, add=self.add, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/means/first.py b/catlearn/regression/gp/means/first.py index 690bd867..486ac5db 100644 --- a/catlearn/regression/gp/means/first.py +++ b/catlearn/regression/gp/means/first.py @@ -2,20 +2,12 @@ class Prior_first(Prior_constant): - def __init__(self, yp=0.0, add=0.0, **kwargs): - """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is the first target value if given else it is 0. - A value can be added to the constant. - - Parameters: - yp : float - The prior mean constant - add : float - A value added to the found prior mean from data. - """ - self.update_arguments(yp=yp, add=add, **kwargs) + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is the first target value if given else it is 0. + A value can be added to the constant. + """ def update(self, features, targets, **kwargs): self.update_arguments(yp=targets.item(0)) diff --git a/catlearn/regression/gp/means/max.py b/catlearn/regression/gp/means/max.py index 947d7197..eaf71c9c 100644 --- a/catlearn/regression/gp/means/max.py +++ b/catlearn/regression/gp/means/max.py @@ -1,23 +1,14 @@ -import numpy as np from .constant import Prior_constant class Prior_max(Prior_constant): - def __init__(self, yp=0.0, add=0.0, **kwargs): - """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is the maximum target value if given else it is 0. - A value can be added to the constant. - - Parameters: - yp : float - The prior mean constant - add : float - A value added to the found prior mean from data. - """ - self.update_arguments(yp=yp, add=add, **kwargs) + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is the maximum target value if given else it is 0. + A value can be added to the constant. + """ def update(self, features, targets, **kwargs): - self.update_arguments(yp=np.max(targets[:, 0])) + self.update_arguments(yp=targets[:, 0].max()) return self diff --git a/catlearn/regression/gp/means/mean.py b/catlearn/regression/gp/means/mean.py index f518ef32..6474658d 100644 --- a/catlearn/regression/gp/means/mean.py +++ b/catlearn/regression/gp/means/mean.py @@ -1,23 +1,14 @@ -import numpy as np from .constant import Prior_constant class Prior_mean(Prior_constant): - def __init__(self, yp=0.0, add=0.0, **kwargs): - """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is the mean of the target values if given else it is 0. - A value can be added to the constant. - - Parameters: - yp : float - The prior mean constant - add : float - A value added to the found prior mean from data. - """ - self.update_arguments(yp=yp, add=add, **kwargs) + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is the mean of the target values if given else it is 0. + A value can be added to the constant. + """ def update(self, features, targets, **kwargs): - self.update_arguments(yp=np.mean(targets[:, 0])) + self.update_arguments(yp=targets[:, 0].mean()) return self diff --git a/catlearn/regression/gp/means/median.py b/catlearn/regression/gp/means/median.py index 63df8803..a0048ee4 100644 --- a/catlearn/regression/gp/means/median.py +++ b/catlearn/regression/gp/means/median.py @@ -1,24 +1,16 @@ -import numpy as np +from numpy import median from .constant import Prior_constant class Prior_median(Prior_constant): - def __init__(self, yp=0.0, add=0.0, **kwargs): - """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is the median of the target values - if given else it is 0. - A value can be added to the constant. - - Parameters: - yp : float - The prior mean constant - add : float - A value added to the found prior mean from data. - """ - self.update_arguments(yp=yp, add=add, **kwargs) + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is the median of the target values + if given else it is 0. + A value can be added to the constant. + """ def update(self, features, targets, **kwargs): - self.update_arguments(yp=np.median(targets[:, 0])) + self.update_arguments(yp=median(targets[:, 0])) return self diff --git a/catlearn/regression/gp/means/min.py b/catlearn/regression/gp/means/min.py index 12cf0aee..f577d5bb 100644 --- a/catlearn/regression/gp/means/min.py +++ b/catlearn/regression/gp/means/min.py @@ -1,24 +1,15 @@ -import numpy as np from .constant import Prior_constant class Prior_min(Prior_constant): - def __init__(self, yp=0.0, add=0.0, **kwargs): - """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. - The prior mean is the minimum of the target value - if given else it is 0. - A value can be added to the constant. - - Parameters: - yp : float - The prior mean constant - add : float - A value added to the found prior mean from data. - """ - self.update_arguments(yp=yp, add=add, **kwargs) + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + The prior mean is the minimum of the target value + if given else it is 0. + A value can be added to the constant. + """ def update(self, features, targets, **kwargs): - self.update_arguments(yp=np.min(targets[:, 0])) + self.update_arguments(yp=targets[:, 0].min()) return self diff --git a/catlearn/regression/gp/means/prior.py b/catlearn/regression/gp/means/prior.py index 2cc2fc88..7cb1614d 100644 --- a/catlearn/regression/gp/means/prior.py +++ b/catlearn/regression/gp/means/prior.py @@ -1,19 +1,27 @@ class Prior: - def __init__(self, **kwargs): + """ + The prior mean of the targets. + The prior mean is used as a baseline of the target values. + """ + + def __init__(self, dtype=float, **kwargs): """ - The prior mean of the targets. - The prior mean is used as a baseline of the target values. + Initialize the prior mean. + + Parameters: + dtype: type + The data type of the arrays. """ - self.update_arguments(**kwargs) + self.update_arguments(dtype=dtype, **kwargs) def get(self, features, targets, get_derivatives=True, **kwargs): """ Get the prior mean of the targets. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array or (N,1+D) array + targets: (N,1) array or (N,1+D) array Training targets with N data points. If get_derivatives=True, the training targets is in first column and derivatives is in the next columns. @@ -27,9 +35,9 @@ def update(self, features, targets, **kwargs): Update the prior mean with the given data. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array or (N,1+D) array + targets: (N,1) array or (N,1+D) array Training targets with N data points. If get_derivatives=True, the training targets is in first column and derivatives is in the next columns. @@ -49,20 +57,41 @@ def get_parameters(self, **kwargs): """ return dict() - def update_arguments(self, **kwargs): + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + return self + + def update_arguments(self, dtype=None, **kwargs): """ Update the class with its arguments. The existing arguments are used if they are not given. + Parameters: + dtype: type + The data type of the arrays. + Returns: self: The updated object itself. """ + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) return self def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict() + arg_kwargs = dict(dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/models/gp.py b/catlearn/regression/gp/models/gp.py index e507bdbd..90ca52ea 100644 --- a/catlearn/regression/gp/models/gp.py +++ b/catlearn/regression/gp/models/gp.py @@ -1,23 +1,34 @@ -import numpy as np -from .model import ModelProcess +from numpy import asarray, array, diag, empty, exp, full +from .model import ( + ModelProcess, + Prior_mean, + SE, + HyperparameterFitter, + LogLikelihood, +) class GaussianProcess(ModelProcess): + """ + The Gaussian Process Regressor. + The Gaussian process uses Cholesky decomposition for + inverting the kernel matrix. + The hyperparameters can be optimized. + """ + def __init__( self, - prior=None, - kernel=None, - hpfitter=None, + prior=Prior_mean(), + kernel=SE(use_derivatives=False, use_fingerprint=False), + hpfitter=HyperparameterFitter(func=LogLikelihood()), hp={}, use_derivatives=False, use_correction=True, + dtype=float, **kwargs ): """ - The Gaussian Process Regressor. - The Gaussian process uses Cholesky decomposition for - inverting the kernel matrix. - The hyperparameters can be optimized. + Initialize the Gaussian Process Regressor. Parameters: prior: Prior class @@ -31,35 +42,24 @@ def __init__( The hyperparameters are used in the log-space. use_derivatives: bool Use derivatives/gradients for training and predictions. - use_correction : bool + use_correction: bool Use the noise correction on the covariance matrix. + dtype: type + The data type of the arrays. """ # Set default descriptors self.trained_model = False self.corr = 0.0 self.features = [] - self.L = np.array([]) + self.L = empty(0, dtype=dtype) self.low = False - self.coef = np.array([]) + self.coef = empty(0, dtype=dtype) self.prefactor = 1.0 # Set default hyperparameters - self.hp = {"noise": np.array([-8.0]), "prefactor": np.array([0.0])} - # Set the default prior mean class - if prior is None: - from ..means.mean import Prior_mean - - prior = Prior_mean() - # Set the default kernel class - if kernel is None: - from ..kernel import SE - - kernel = SE(use_derivatives=use_derivatives, use_fingerprint=False) - # The default hyperparameter optimization method - if hpfitter is None: - from ..hpfitter import HyperparameterFitter - from ..objectivefunctions.gp.likelihood import LogLikelihood - - hpfitter = HyperparameterFitter(func=LogLikelihood()) + self.hp = { + "noise": asarray([-8.0], dtype=dtype), + "prefactor": asarray([0.0], dtype=dtype), + } # Set all the arguments self.update_arguments( prior=prior, @@ -68,53 +68,54 @@ def __init__( hp=hp, use_derivatives=use_derivatives, use_correction=use_correction, + dtype=dtype, **kwargs ) def set_hyperparams(self, new_params, **kwargs): - self.kernel.set_hyperparams(new_params) - # Prefactor and relative-noise hyperparameter is always in the GP + # Set the hyperparameters in the parent class + super().set_hyperparams(new_params, **kwargs) + # Set the prefactor hyperparameter if "prefactor" in new_params: - self.hp["prefactor"] = np.array( - new_params["prefactor"], dtype=float + self.hp["prefactor"] = array( + new_params["prefactor"], + dtype=self.dtype, ).reshape(-1) self.prefactor = self.calculate_prefactor() - if "noise" in new_params: - self.hp["noise"] = np.array( - new_params["noise"], dtype=float - ).reshape(-1) - if "noise_deriv" in new_params: - self.hp["noise_deriv"] = np.array( - new_params["noise_deriv"], dtype=float - ).reshape(-1) return self def get_gradients(self, features, hp, KXX, **kwargs): hp_deriv = {} n_data, m_data = len(features), len(KXX) if "prefactor" in hp: - hp_deriv["prefactor"] = np.array( + hp_deriv["prefactor"] = asarray( [ 2.0 - * np.exp(2.0 * self.hp["prefactor"][0]) + * exp(2.0 * self.hp["prefactor"][0]) * self.add_regularization(KXX, n_data, overwrite=False) - ] + ], ) if "noise" in hp: - K_deriv = np.full(m_data, 2.0 * np.exp(2.0 * self.hp["noise"][0])) + K_deriv = full( + m_data, + 2.0 * exp(2.0 * self.hp["noise"][0]), + dtype=self.dtype, + ) if "noise_deriv" in self.hp: K_deriv[n_data:] = 0.0 - hp_deriv["noise"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise"] = asarray([diag(K_deriv)]) else: - hp_deriv["noise"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise"] = asarray([diag(K_deriv)]) if "noise_deriv" in hp: - K_deriv = np.full( - m_data, 2.0 * np.exp(2.0 * self.hp["noise_deriv"][0]) + K_deriv = full( + m_data, + 2.0 * exp(2.0 * self.hp["noise_deriv"][0]), + dtype=self.dtype, ) K_deriv[:n_data] = 0.0 - hp_deriv["noise_deriv"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise_deriv"] = asarray([diag(K_deriv)]) hp_deriv.update(self.kernel.get_gradients(features, hp, KXX=KXX)) return hp_deriv def calculate_prefactor(self, features=None, targets=None, **kwargs): - return np.exp(2.0 * self.hp["prefactor"][0]) + return exp(2.0 * self.hp["prefactor"][0]) diff --git a/catlearn/regression/gp/models/model.py b/catlearn/regression/gp/models/model.py index 0825163f..0a10b01c 100644 --- a/catlearn/regression/gp/models/model.py +++ b/catlearn/regression/gp/models/model.py @@ -1,66 +1,73 @@ -import numpy as np +from numpy import ( + array, + asarray, + diag, + einsum, + empty, + exp, + finfo, + inf, + matmul, + nan_to_num, +) from scipy.linalg import cho_factor, cho_solve +import pickle +from ..means.mean import Prior_mean +from ..kernel import SE +from ..hpfitter import HyperparameterFitter +from ..objectivefunctions.gp.likelihood import LogLikelihood class ModelProcess: + """ + The Model Process Regressor. + The Model process uses Cholesky decomposition for + inverting the kernel matrix. + The hyperparameters can be optimized. + """ + def __init__( self, - prior=None, - kernel=None, - hpfitter=None, + prior=Prior_mean(), + kernel=SE(use_derivatives=False, use_fingerprint=False), + hpfitter=HyperparameterFitter(func=LogLikelihood()), hp={}, use_derivatives=False, use_correction=True, + dtype=float, **kwargs, ): """ - The Model Process Regressor. - The Model process uses Cholesky decomposition for - inverting the kernel matrix. - The hyperparameters can be optimized. + Initialize the Model Process Regressor. Parameters: - prior : Prior class + prior: Prior class The prior mean given for the data. - kernel : Kernel class + kernel: Kernel class The kernel function used for the kernel matrix. - hpfitter : HyperparameterFitter class + hpfitter: HyperparameterFitter class A class to optimize hyperparameters - hp : dictionary + hp: dictionary A dictionary of hyperparameters like noise and length scale. The hyperparameters are used in the log-space. - use_derivatives : bool + use_derivatives: bool Use derivatives/gradients of the targets for training and predictions. - use_correction : bool + use_correction: bool Use the noise correction on the covariance matrix. + dtype: type + The data type of the arrays. """ # Set default descriptors self.trained_model = False self.corr = 0.0 self.features = [] - self.L = np.array([]) + self.L = empty(0, dtype=dtype) self.low = False - self.coef = np.array([]) + self.coef = empty(0, dtype=dtype) self.prefactor = 1.0 # Set default relative-noise hyperparameter - self.hp = {"noise": np.array([-8.0])} - # Set the default prior mean class - if prior is None: - from ..means.mean import Prior_mean - - prior = Prior_mean() - # Set the default kernel class - if kernel is None: - from ..kernel import SE - - kernel = SE(use_derivatives=use_derivatives, use_fingerprint=False) - # The default hyperparameter optimization method - if hpfitter is None: - from ..hpfitter import HyperparameterFitter - from ..objectivefunctions.gp.likelihood import LogLikelihood - - hpfitter = HyperparameterFitter(func=LogLikelihood()) + self.hp = {"noise": asarray([-8.0], dtype=dtype)} # Set all the arguments self.update_arguments( prior=prior, @@ -69,6 +76,7 @@ def __init__( hp=hp, use_derivatives=use_derivatives, use_correction=use_correction, + dtype=dtype, **kwargs, ) @@ -77,9 +85,9 @@ def train(self, features, targets, **kwargs): Train the model with training features and targets. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array or (N,1+D) array + targets: (N,1) array or (N,1+D) array Training targets with N data points. If use_derivatives=True, the training targets is in first column and derivatives is in the next columns. @@ -94,11 +102,11 @@ def train(self, features, targets, **kwargs): # Make the kernel matrix decomposition self.L, self.low = self.calculate_kernel_decomposition(features) # Modify the targets with the prior mean and rearrangement - targets = self.modify_targets(features, targets) + targets_mod = self.modify_targets(features, targets) # Calculate the coefficients - self.coef = self.calculate_coefficients(features, targets) + self.coef = self.calculate_coefficients(features, targets_mod) # Calculate the prefactor for variance predictions - self.prefactor = self.calculate_prefactor(features, targets) + self.prefactor = self.calculate_prefactor(features, targets_mod) return self def optimize( @@ -115,23 +123,23 @@ def optimize( Optimize the hyperparameter of the model and its kernel. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array or (N,D+1) array + targets: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - retrain : bool + retrain: bool Whether to retrain the model after the optimization. - hp : dict + hp: dict Use a set of hyperparameters to optimize from else the current set is used. The hyperparameters are used in the log-space. - maxiter : int + maxiter: int Maximum number of iterations used by local or global optimization method. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. - verbose : bool + verbose: bool Print the optimized hyperparameters and the object function value. @@ -141,7 +149,7 @@ def optimize( """ # Ensure the targets are in the right format if not self.use_derivatives: - targets = targets[:, 0:1].copy() + targets = array(targets[:, 0:1], dtype=self.dtype) # Optimize the hyperparameters sol = self.hpfitter.fit( features, @@ -149,6 +157,7 @@ def optimize( model=self, hp=hp, pdis=pdis, + retrain=retrain, **kwargs, ) # Print the solution @@ -177,33 +186,33 @@ def predict( coefficients from training data. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - get_derivatives : bool + get_derivatives: bool Whether to predict the derivatives of the prediction mean. - get_variance : bool + get_variance: bool Whether to predict the variance of the targets. - include_noise : bool + include_noise: bool Whether to include the noise of data in the predicted variance. - get_derivtives_var : bool + get_derivtives_var: bool Whether to predict the variance of the derivatives of the targets. - get_var_derivatives : bool + get_var_derivatives: bool Whether to calculate the derivatives of the predicted variance of the targets. Returns: - Y_predict : (M,1) or (M,1+D) array + Y_predict: (M,1) or (M,1+D) array The predicted mean values with or without derivatives. - var : (M,1) or (M,1+D) array + var: (M,1) or (M,1+D) array The predicted variance of the targets with or without derivatives. - var_deriv : (M,D) array + var_deriv: (M,D) array The derivatives of the predicted variance of the targets. """ # Check if the model is trained if not self.trained_model: - raise Exception("The model is not trained!") + raise AttributeError("The model is not trained!") # Calculate the kernel matrix of test and training data if ( get_derivatives @@ -256,25 +265,25 @@ def predict_mean( coefficients from training data. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - KQX : (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array The kernel matrix of the test and training features. If KQX=None, it is calculated. - get_derivatives : bool + get_derivatives: bool Whether to predict the derivatives of the prediction mean. Returns: - Y_predict : (M,1) array + Y_predict: (M,1) array The predicted mean values if get_derivatives=False. or - Y_predict : (M,1+D) array + Y_predict: (M,1+D) array The predicted mean values and its derivatives if get_derivatives=True. """ # Check if the model is trained if not self.trained_model: - raise Exception("The model is not trained!") + raise AttributeError("The model is not trained!") # Get the number of test points m_data = len(features) # Calculate the kernel of test and training data if it is not given @@ -288,11 +297,11 @@ def predict_mean( if not get_derivatives: KQX = KQX[:m_data] # Calculate the prediction mean - Y_predict = np.matmul(KQX, self.coef) + Y_predict = matmul(KQX, self.coef) # Rearrange prediction Y_predict = Y_predict.reshape(m_data, -1, order="F") # Add the prior mean - Y_predict = Y_predict + self.get_priormean( + Y_predict += self.get_priormean( features, Y_predict, get_derivatives=get_derivatives, @@ -311,29 +320,29 @@ def predict_variance( Calculate the predicted variance of the test targets. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - KQX : (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array The kernel matrix of the test and training features. If KQX=None, it is calculated. - get_derivatives : bool + get_derivatives: bool Whether to predict the uncertainty of the derivatives of the targets. - include_noise : bool + include_noise: bool Whether to include the noise of data in the predicted variance Returns: - var : (M,1) array + var: (M,1) array The predicted variance of the targets if get_derivatives=False. or - var : (M,1+D) array + var: (M,1+D) array The predicted variance of the targets and its derivatives if get_derivatives=True. """ # Check if the model is trained if not self.trained_model: - raise Exception("The model is not trained!") + raise AttributeError("The model is not trained!") # Get the number of test points m_data = len(features) # Calculate the kernel of test and training data if it is not given @@ -355,7 +364,7 @@ def predict_variance( ) # Calculate predicted variance var = ( - k - np.einsum("ij,ji->i", KQX, self.calculate_CinvKQX(KQX)) + k - einsum("ij,ji->i", KQX, self.calculate_CinvKQX(KQX)) ).reshape(-1, 1) # Scale prediction variance with the prefactor var = var * self.prefactor @@ -368,19 +377,19 @@ def calculate_variance_derivatives(self, features, KQX=None, **kwargs): the test targets. Parameters: - features : (M,D) array or (M) list of fingerprint objects + features: (M,D) array or (M) list of fingerprint objects Test features with M data points. - KQX : (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array The kernel matrix of the test and training features. If KQX=None, it is calculated. Returns: - var_deriv : (M,D) array + var_deriv: (M,D) array The derivatives of the predicted variance of the targets. """ # Check if the model is trained if not self.trained_model: - raise Exception("The model is not trained!") + raise AttributeError("The model is not trained!") # Get the number of test points m_data = len(features) # Calculate the kernel matrix of test and training data @@ -393,36 +402,111 @@ def calculate_variance_derivatives(self, features, KQX=None, **kwargs): # Calculate derivative of the diagonal wrt. the test features k_deriv = self.kernel_deriv_diag(features) # Calculate derivative of the predicted variance - var_deriv = k_deriv - 2.0 * np.einsum( - "ij,ji->i", KQX[m_data:], self.calculate_CinvKQX(KQX[:m_data]) + var_deriv = k_deriv - 2.0 * einsum( + "ij,ji->i", + KQX[m_data:], + self.calculate_CinvKQX(KQX[:m_data]), ).reshape(-1, 1) # Scale prediction variance with the prefactor var_deriv = var_deriv * self.prefactor # Rearrange derivative of variance return var_deriv.reshape(m_data, -1, order="F") + def predict_covariance( + self, + features, + KQX=None, + get_derivatives=False, + include_noise=False, + **kwargs, + ): + """ + Calculate the predicted covariance matrix of the test targets. + + Parameters: + features: (M,D) array or (M) list of fingerprint objects + Test features with M data points. + KQX: (M,N) or (M,N+N*D) or (M+M*D,N+N*D) array + The kernel matrix of the test and training features. + If KQX=None, it is calculated. + get_derivatives: bool + Whether to predict the uncertainty of the derivatives of + the targets. + include_noise: bool + Whether to include the noise of data in the predicted variance + + Returns: + var: (M,M) array + The predicted covariance matrix of the targets + if get_derivatives=False. + or + var: (M*(1+D),M*(1+D)) array + The predicted covariance matrix of the targets + and its derivatives if get_derivatives=True. + + """ + # Check if the model is trained + if not self.trained_model: + raise AttributeError("The model is not trained!") + # Get the number of test points + n_data = len(features) + # Calculate the kernel of test and training data if it is not given + if KQX is None: + KQX = self.get_kernel( + features, + self.features, + get_derivatives=get_derivatives, + ) + else: + if not get_derivatives: + KQX = KQX[:n_data] + # Calculate the kernel matrix of the test data + KQQ = self.get_kernel( + features, + get_derivatives=get_derivatives, + ) + # Add noise to the diagonal of the kernel matrix + if include_noise: + add_v = self.inf_to_num(exp(2.0 * self.hp["noise"][0])) + self.corr + m_data = len(KQQ) + if "noise_deriv" in self.hp: + KQQ[range(n_data), range(n_data)] += add_v + add_v = self.inf_to_num(exp(2.0 * self.hp["noise_deriv"][0])) + add_v += self.corr + KQQ[range(n_data, m_data), range(n_data, m_data)] += add_v + else: + KQQ[range(m_data), range(m_data)] += add_v + # Calculate predicted variance + var = KQQ - matmul(KQX, self.calculate_CinvKQX(KQX)) + # Scale prediction variance with the prefactor + var = var * self.prefactor + # Return the predicted covariance matrix + return var + def set_hyperparams(self, new_params, **kwargs): """ Set or update the hyperparameters for the model. Parameters: - new_params : dictionary + new_params: dictionary A dictionary of hyperparameters that are added or updated. The hyperparameters are used in the log-space. Returns: self: The object itself with the new hyperparameters. """ + # Set the hyperparameters in the kernel self.kernel.set_hyperparams(new_params) + # Set the relative-noise hyperparameter if "noise" in new_params: - self.hp["noise"] = np.array( + self.hp["noise"] = array( new_params["noise"], - dtype=float, + dtype=self.dtype, ).reshape(-1) if "noise_deriv" in new_params: - self.hp["noise_deriv"] = np.array( + self.hp["noise_deriv"] = array( new_params["noise_deriv"], - dtype=float, + dtype=self.dtype, ).reshape(-1) return self @@ -449,23 +533,23 @@ def get_kernel( Make the kernel matrix. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. - features2 : (M,D) array or (M) list of fingerprint objects + features2: (M,D) array or (M) list of fingerprint objects Features with M data points and D dimensions. If it is not given a squared kernel from features is generated. get_derivatives: bool Whether to predict derivatives of target. Returns: - KXX : array + KXX: array The symmetric kernel matrix if features2=None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. The number of columns in the array is N, or N*(D+1) if use_derivatives=True. or - KQX : array + KQX: array The kernel matrix if features2 is not None. The number of rows in the array is N, or N*(D+1) if get_derivatives=True. @@ -493,9 +577,9 @@ def update_priormean(self, features, targets, **kwargs): Update the prior mean with the data. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Training features with N data points. - targets : (N,1) array or (N,1+D) array + targets: (N,1) array or (N,1+D) array Training targets with N data points. If use_derivatives=True, the training targets is in first column and derivatives is in the next columns. @@ -517,13 +601,13 @@ def get_priormean( Get the prior mean for the given features. Parameters: - features : (N,D) array or (N) list of fingerprint objects + features: (N,D) array or (N) list of fingerprint objects Features with N data points. - targets : (N,1) array or (N,1+D) array + targets: (N,1) array or (N,1+D) array Targets with N data points. If get_derivatives=True, the targets is in first column and derivatives is in the next columns. - get_derivatives : bool + get_derivatives: bool Whether to give the prior mean of the derivatives of targets. Returns: @@ -551,11 +635,11 @@ def get_gradients(self, features, hp, KXX, **kwargs): wrt.the hyperparameters. Parameters: - features : (N,D) array + features: (N,D) array Features with N data points and D dimensions. - hp : list + hp: list A list with elements of the hyperparameters that are optimized. - KXX : (N,N) array + KXX: (N,N) array The kernel matrix of training data. Returns: @@ -572,19 +656,88 @@ def get_use_fingerprint(self): "Get whether a fingerprint is used as the features." return self.kernel.get_use_fingerprint() + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set the machine precision + self.eps = 1.1 * finfo(self.dtype).eps + # Set the data type of the attributes + self.prior.set_dtype(dtype=dtype, **kwargs) + self.kernel.set_dtype(dtype=dtype, **kwargs) + self.hpfitter.set_dtype(dtype=dtype, **kwargs) + # Set the data type of the hyperparameters + self.set_hyperparams(self.hp) + return self + + def set_seed(self, seed, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + self.hpfitter.set_seed(seed) + return self + + def set_use_derivatives(self, use_derivatives, **kwargs): + """ + Set whether to use derivatives/gradients for training and predictions. + + Parameters: + use_derivatives: bool + Use derivatives/gradients for training and predictions. + + Returns: + self: The updated object itself. + """ + # Set whether to use derivatives for the target + self.use_derivatives = use_derivatives + # Set whether to use derivatives for the kernel + self.kernel.set_use_derivatives(use_derivatives) + return self + + def set_use_fingerprint(self, use_fingerprint, **kwargs): + """ + Set whether to use a fingerprint as the features. + + Parameters: + use_fingerprint: bool + Use a fingerprint as the features. + + Returns: + self: The updated object itself. + """ + # Set whether to use a fingerprint for the features + self.kernel.set_use_fingerprint(use_fingerprint) + return self + def save_model(self, filename="model.pkl", **kwargs): """ Save the model object to a file. Parameters: - filename : str + filename: str The name of the file where the object is saved. Returns: self: The object itself. """ - import pickle - with open(filename, "wb") as file: pickle.dump(self, file) return self @@ -594,14 +747,12 @@ def load_model(self, filename="model.pkl", **kwargs): Load the model object from a file. Parameters: - filename : str + filename: str The name of the file where the object is saved. Returns: model: The loaded model object. """ - import pickle - with open(filename, "rb") as file: model = pickle.load(file) return model @@ -614,6 +765,7 @@ def update_arguments( hp={}, use_derivatives=None, use_correction=None, + dtype=None, **kwargs, ): """ @@ -621,19 +773,21 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - prior : Prior class + prior: Prior class The prior given for new data. - kernel : Kernel class + kernel: Kernel class The kernel function used for the kernel matrix. - hpfitter : HyperparameterFitter class + hpfitter: HyperparameterFitter class A class to optimize hyperparameters - hp : dictionary + hp: dictionary A dictionary of hyperparameters like noise and length scale. The hyperparameters are used in the log-space. - use_derivatives : bool + use_derivatives: bool Use derivatives/gradients for training and predictions. - use_correction : bool + use_correction: bool Use the noise correction on the covariance matrix. + dtype: type + The data type of the arrays. Returns: self: The updated instance itself. @@ -646,13 +800,16 @@ def update_arguments( self.kernel = kernel.copy() # Set whether to use derivatives for the target if use_derivatives is not None: - self.use_derivatives = use_derivatives + self.set_use_derivatives(use_derivatives) # Set noise correction if use_correction is not None: self.use_correction = use_correction # The hyperparameter optimization method if hpfitter is not None: self.hpfitter = hpfitter.copy() + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) # Set hyperparameters self.set_hyperparams(hp) # Check if the attributes agree @@ -670,39 +827,39 @@ def add_regularization(self, K, n_data, overwrite=True, **kwargs): K = K.copy() m_data = len(K) # Calculate the correction, so the kernel matrix is invertible - self.corr = self.get_correction(np.diag(K)) + self.corr = self.get_correction(diag(K)) + add_v = self.inf_to_num(exp(2.0 * self.hp["noise"][0])) + self.corr if "noise_deriv" in self.hp: - add_v = ( - self.inf_to_num(np.exp(2 * self.hp["noise"][0])) + self.corr - ) K[range(n_data), range(n_data)] += add_v - add_v = ( - self.inf_to_num(np.exp(2 * self.hp["noise_deriv"][0])) - + self.corr - ) + add_v = self.inf_to_num(exp(2.0 * self.hp["noise_deriv"][0])) + add_v += self.corr K[range(n_data, m_data), range(n_data, m_data)] += add_v else: - add_v = ( - self.inf_to_num(np.exp(2 * self.hp["noise"][0])) + self.corr - ) K[range(m_data), range(m_data)] += add_v return K def inf_to_num(self, value, replacing=1e300): "Check if a value is infinite and then replace it with a large number." - if value == np.inf: + if value == inf: return replacing return value - def get_correction(self, K_diag, **kwargs): + def get_correction(self, K_diag=None, **kwargs): """ Get the noise correction, so that the training covariance matrix is always invertible. + + Parameters: + K_diag: N or N*(D+1) array (optional) + The diagonal elements of the kernel matrix. + If it is not given, the stored noise correction is used. """ - if self.use_correction: - K_sum = np.sum(K_diag) + if self.use_correction and K_diag is not None: + K_sum = K_diag.sum() n = len(K_diag) - corr = (K_sum**2) * (1.0 / ((1.0 / 2.3e-16) - (n**2))) + corr = (K_sum**2) * (1.0 / ((1.0 / self.eps) - (n**2))) + elif self.use_correction and K_diag is None: + corr = self.corr else: corr = 0.0 return corr @@ -718,19 +875,19 @@ def calculate_kernel_decomposition(self, features, **kwargs): def modify_targets(self, features, targets, **kwargs): "Modify the targets with the prior mean and rearrangement." # Subtracting prior mean from target - targets = targets.copy() - self.update_priormean(features, targets, L=self.L, low=self.low) - targets = targets - self.get_priormean( + targets_mod = array(targets, dtype=self.dtype) + self.update_priormean(features, targets_mod, L=self.L, low=self.low) + targets_mod -= self.get_priormean( features, targets, get_derivatives=self.use_derivatives, ) # Rearrange targets if derivatives are used if self.use_derivatives: - targets = targets.T.reshape(-1, 1) + targets_mod = targets_mod.T.reshape(-1, 1) else: - targets = targets[:, 0:1].copy() - return targets + targets_mod = targets_mod[:, 0:1] + return targets_mod def calculate_coefficients(self, features, targets, **kwargs): "Calculate the coefficients for the prediction mean." @@ -755,12 +912,12 @@ def kernel_diag( k = self.kernel.diag(features, get_derivatives=get_derivatives) # Add noise to the kernel elements if include_noise: - noise = np.nan_to_num(np.exp(2.0 * self.hp["noise"][0])) + noise = nan_to_num(exp(2.0 * self.hp["noise"][0])) noise += self.corr if get_derivatives and "noise_deriv" in self.hp: k[:m_data] += noise k[m_data:] += ( - np.nan_to_num(np.exp(2.0 * self.hp["noise_deriv"][0])) + nan_to_num(exp(2.0 * self.hp["noise_deriv"][0])) + self.corr ) else: @@ -781,7 +938,7 @@ def calculate_CinvKQX(self, KQX, **kwargs): def check_attributes(self): "Check if all attributes agree between the class and subclasses." if self.use_derivatives != self.kernel.get_use_derivatives(): - raise Exception( + raise ValueError( "The Model and the Kernel do not agree " "whether to use derivatives!" ) @@ -797,6 +954,7 @@ def get_arguments(self): hp=self.get_hyperparams(), use_derivatives=self.use_derivatives, use_correction=self.use_correction, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict( diff --git a/catlearn/regression/gp/models/tp.py b/catlearn/regression/gp/models/tp.py index 6eb42763..aa0db610 100644 --- a/catlearn/regression/gp/models/tp.py +++ b/catlearn/regression/gp/models/tp.py @@ -1,25 +1,36 @@ -import numpy as np -from .model import ModelProcess +from numpy import asarray, diag, dot, empty, exp, full +from .model import ( + ModelProcess, + Prior_mean, + SE, + HyperparameterFitter, +) +from ..objectivefunctions.tp.likelihood import LogLikelihood class TProcess(ModelProcess): + """ + The Student's T Process Regressor. + The Student's T process uses Cholesky decomposition for + inverting the kernel matrix. + The hyperparameters can be optimized. + """ + def __init__( self, - prior=None, - kernel=None, - hpfitter=None, + prior=Prior_mean(), + kernel=SE(use_derivatives=False, use_fingerprint=False), + hpfitter=HyperparameterFitter(func=LogLikelihood()), hp={}, use_derivatives=False, use_correction=True, a=1e-20, b=1e-20, + dtype=float, **kwargs, ): """ - The Student's T Process Regressor. - The Student's T process uses Cholesky decomposition for - inverting the kernel matrix. - The hyperparameters can be optimized. + Initialize the Student's T Process Regressor. Parameters: prior: Prior class @@ -33,7 +44,7 @@ def __init__( The hyperparameters are used in the log-space. use_derivatives: bool Use derivatives/gradients for training and predictions. - use_correction : bool + use_correction: bool Use the noise correction on the covariance matrix. a: float Hyperprior shape parameter for the inverse-gamma distribution @@ -41,35 +52,19 @@ def __init__( b: float Hyperprior scale parameter for the inverse-gamma distribution of the prefactor. + dtype: type + The data type of the arrays. """ # Set default descriptors self.trained_model = False self.corr = 0.0 self.features = [] - self.L = np.array([]) + self.L = empty(0, dtype=dtype) self.low = False - self.coef = np.array([]) + self.coef = empty(0, dtype=dtype) self.prefactor = 1.0 - # Set default relative-noise hyperparameters - self.hp = {"noise": np.array([-8.0])} - # Set the default prior mean class - if prior is None: - from ..means.mean import Prior_mean - - prior = Prior_mean() - # Set the default kernel class - if kernel is None: - from ..kernel import SE - - kernel = SE(use_derivatives=use_derivatives, use_fingerprint=False) - # The default hyperparameter optimization method - if hpfitter is None: - from ..hpfitter import HyperparameterFitter - from ..objectivefunctions.tp.likelihood import LogLikelihood - - hpfitter = HyperparameterFitter(func=LogLikelihood()) - # Set noise hyperparameters - self.hp = {"noise": np.array([-8.0])} + # Set default hyperparameters + self.hp = {"noise": asarray([-8.0], dtype=dtype)} # Set all the arguments self.update_arguments( prior=prior, @@ -80,22 +75,10 @@ def __init__( use_correction=use_correction, a=a, b=b, + dtype=dtype, **kwargs, ) - def set_hyperparams(self, new_params={}, **kwargs): - self.kernel.set_hyperparams(new_params) - # Noise is always in the TP - if "noise" in new_params: - self.hp["noise"] = np.array( - new_params["noise"], dtype=float - ).reshape(-1) - if "noise_deriv" in new_params: - self.hp["noise_deriv"] = np.array( - new_params["noise_deriv"], dtype=float - ).reshape(-1) - return self - def update_arguments( self, prior=None, @@ -106,6 +89,7 @@ def update_arguments( use_correction=None, a=None, b=None, + dtype=None, **kwargs, ): """ @@ -113,18 +97,18 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - prior : Prior class + prior: Prior class The prior given for new data. - kernel : Kernel class + kernel: Kernel class The kernel function used for the kernel matrix. - hpfitter : HyperparameterFitter class + hpfitter: HyperparameterFitter class A class to optimize hyperparameters - hp : dictionary + hp: dictionary A dictionary of hyperparameters like noise and length scale. The hyperparameters are used in the log-space. - use_derivatives : bool + use_derivatives: bool Use derivatives/gradients for training and predictions. - use_correction : bool + use_correction: bool Use the noise correction on the covariance matrix. a: float Hyperprior shape parameter for the inverse-gamma distribution @@ -132,53 +116,52 @@ def update_arguments( b: float Hyperprior scale parameter for the inverse-gamma distribution of the prefactor. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ - # Set the prior mean class - if prior is not None: - self.prior = prior.copy() - # Set the kernel class - if kernel is not None: - self.kernel = kernel.copy() - # Set whether to use derivatives for the target - if use_derivatives is not None: - self.use_derivatives = use_derivatives - # Set noise correction - if use_correction is not None: - self.use_correction = use_correction - # The hyperparameter optimization method - if hpfitter is not None: - self.hpfitter = hpfitter.copy() + super().update_arguments( + prior=prior, + kernel=kernel, + hpfitter=hpfitter, + hp=hp, + use_derivatives=use_derivatives, + use_correction=use_correction, + dtype=dtype, + **kwargs, + ) # The hyperprior shape parameter if a is not None: self.a = float(a) # The hyperprior scale parameter if b is not None: self.b = float(b) - # Set hyperparameters - self.set_hyperparams(hp) - # Check if the attributes agree - self.check_attributes() return self def get_gradients(self, features, hp, KXX, **kwargs): hp_deriv = {} n_data, m_data = len(features), len(KXX) if "noise" in hp: - K_deriv = np.full(m_data, 2.0 * np.exp(2.0 * self.hp["noise"][0])) + K_deriv = full( + m_data, + 2.0 * exp(2.0 * self.hp["noise"][0]), + dtype=self.dtype, + ) if "noise_deriv" in self.hp: K_deriv[n_data:] = 0.0 - hp_deriv["noise"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise"] = asarray([diag(K_deriv)]) else: - hp_deriv["noise"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise"] = asarray([diag(K_deriv)]) if "noise_deriv" in hp: - K_deriv = np.full( - m_data, 2.0 * np.exp(2.0 * self.hp["noise_deriv"][0]) + K_deriv = full( + m_data, + 2.0 * exp(2.0 * self.hp["noise_deriv"][0]), + dtype=self.dtype, ) K_deriv[:n_data] = 0.0 - hp_deriv["noise_deriv"] = np.array([np.diag(K_deriv)]) + hp_deriv["noise_deriv"] = asarray([diag(K_deriv)]) hp_deriv.update(self.kernel.get_gradients(features, hp, KXX=KXX)) return hp_deriv @@ -188,7 +171,7 @@ def get_hyperprior_parameters(self, **kwargs): def calculate_prefactor(self, features, targets, **kwargs): n2 = float(len(targets) - 2) if len(targets) > 1 else 0.0 - tcoef = np.matmul(targets.T, self.coef).item(0) + tcoef = dot(targets.reshape(-1), self.coef.reshape(-1)) return (2.0 * self.b + tcoef) / (2.0 * self.a + n2) def get_arguments(self): @@ -203,6 +186,7 @@ def get_arguments(self): use_correction=self.use_correction, a=self.a, b=self.b, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict( diff --git a/catlearn/regression/gp/objectivefunctions/batch.py b/catlearn/regression/gp/objectivefunctions/batch.py index 0979c3cb..36362715 100644 --- a/catlearn/regression/gp/objectivefunctions/batch.py +++ b/catlearn/regression/gp/objectivefunctions/batch.py @@ -1,9 +1,26 @@ -import numpy as np +from numpy import ( + append, + arange, + array_split, + concatenate, + tile, +) +from numpy.random import default_rng, Generator, RandomState from .objectivefunction import ObjectiveFuction from ..means.constant import Prior_constant class BatchFuction(ObjectiveFuction): + """ + The objective function that is used to optimize the hyperparameters. + The instance splits the training data into batches. + A given objective function is then used as + an objective function for the batches. + The function values from each batch are summed. + BatchFuction is not recommended for analytic prefactor or + noise optimized objective functions! + """ + def __init__( self, func, @@ -11,35 +28,32 @@ def __init__( batch_size=25, equal_size=False, use_same_prior_mean=True, - seed=1, + seed=None, + dtype=float, **kwargs, ): """ - The objective function that is used to optimize the hyperparameters. - The instance splits the training data into batches. - A given objective function is then used as - an objective function for the batches. - The function values from each batch are summed. - BatchFuction is not recommended for analytic prefactor or - noise optimized objective functions! + Initialize the objective function. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - equal_size : bool + equal_size: bool Whether the clusters are forced to have the same size. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # Set a random seed - self.seed = seed # Set the arguments self.update_arguments( func=func, @@ -48,6 +62,7 @@ def __init__( equal_size=equal_size, use_same_prior_mean=use_same_prior_mean, seed=seed, + dtype=dtype, **kwargs, ) @@ -79,14 +94,14 @@ def function( self.sol = self.func.sol return output # Update the model with hyperparameters and prior mean - hp, parameters_set = self.make_hp(theta, parameters) + hp, _ = self.make_hp(theta, parameters) model = self.update_model(model, hp) self.set_same_prior_mean(model, X, Y) # Calculate the number of batches n_batches = self.get_number_batches(n_data) - indicies = np.arange(n_data) + indices = arange(n_data) i_batches = self.randomized_batches( - indicies, + indices, n_data, n_batches, **kwargs, @@ -134,6 +149,38 @@ def function( self.update_solution(fvalue, theta, hp, model, jac=False) return fvalue + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype=dtype) + # Set the dtype of the objective function + self.func.set_dtype(dtype=dtype) + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + # Set the seed of the objective function + self.func.set_seed(seed=self.seed) + return self + def update_arguments( self, func=None, @@ -142,6 +189,7 @@ def update_arguments( equal_size=None, use_same_prior_mean=None, seed=None, + dtype=None, **kwargs, ): """ @@ -149,19 +197,23 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - func : ObjectiveFunction class + func: ObjectiveFunction class A class with the objective function used to optimize the hyperparameters. - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. - equal_size : bool + equal_size: bool Whether the clusters are forced to have the same size. - use_same_prior_mean : bool + use_same_prior_mean: bool Whether to use the same prior mean for all models. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. @@ -171,21 +223,23 @@ def update_arguments( # Set descriptor of the objective function self.use_analytic_prefactor = func.use_analytic_prefactor self.use_optimized_noise = func.use_optimized_noise - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean if batch_size is not None: self.batch_size = int(batch_size) if equal_size is not None: self.equal_size = equal_size if use_same_prior_mean is not None: self.use_same_prior_mean = use_same_prior_mean - if seed is not None: - self.seed = seed # Update the objective function if len(kwargs.keys()): self.func.update_arguments(**kwargs) - # Always reset the solution when the objective function is changed - self.reset_solution() + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed=seed) + # Update the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + ) return self def update_solution( @@ -200,7 +254,7 @@ def update_solution( ): if fun < self.sol["fun"]: self.sol["fun"] = fun - self.sol["x"] = np.concatenate( + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() @@ -242,36 +296,33 @@ def get_number_batches(self, n_data, **kwargs): n_batches = n_batches + 1 return n_batches - def randomized_batches(self, indicies, n_data, n_batches, **kwargs): - "Randomized indicies used for batches." - # Permute the indicies - i_perm = self.get_permutation(indicies) + def randomized_batches(self, indices, n_data, n_batches, **kwargs): + "Randomized indices used for batches." + # Permute the indices + i_perm = self.get_permutation(indices) # Ensure equal sizes of batches if chosen if self.equal_size: i_perm = self.ensure_equal_sizes(i_perm, n_data, n_batches) - i_batches = np.array_split(i_perm, n_batches) + i_batches = array_split(i_perm, n_batches) return i_batches - def get_permutation(self, indicies): - "Permute the indicies" - if self.seed: - rng = np.random.default_rng(seed=self.seed) - return rng.permutation(indicies) - return np.random.permutation(indicies) + def get_permutation(self, indices): + "Permute the indices" + return self.rng.permutation(indices) def ensure_equal_sizes(self, i_perm, n_data, n_batches, **kwargs): - "Extend the permuted indicies so the clusters have equal sizes." + "Extend the permuted indices so the clusters have equal sizes." # Find the number of points that should be added n_missing = (n_batches * self.batch_size) - n_data - # Extend the permuted indicies + # Extend the permuted indices if n_missing > 0: if n_missing > n_data: - i_perm = np.append( + i_perm = append( i_perm, - np.tile(i_perm, (n_missing // n_data) + 1)[:n_missing], + tile(i_perm, (n_missing // n_data) + 1)[:n_missing], ) else: - i_perm = np.append(i_perm, i_perm[:n_missing]) + i_perm = append(i_perm, i_perm[:n_missing]) return i_perm def get_arguments(self): @@ -284,6 +335,7 @@ def get_arguments(self): equal_size=self.equal_size, use_same_prior_mean=self.use_same_prior_mean, seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/objectivefunctions/best_batch.py b/catlearn/regression/gp/objectivefunctions/best_batch.py index 2a63acd3..f5f1b6b8 100644 --- a/catlearn/regression/gp/objectivefunctions/best_batch.py +++ b/catlearn/regression/gp/objectivefunctions/best_batch.py @@ -1,52 +1,17 @@ -import numpy as np +from numpy import arange, inf from .batch import BatchFuction class BestBatchFuction(BatchFuction): - def __init__( - self, - func, - get_prior_mean=False, - batch_size=25, - equal_size=False, - use_same_prior_mean=True, - seed=1, - **kwargs, - ): - """ - The objective function that is used to optimize the hyperparameters. - The instance splits the training data into batches. - A given objective function is then used as - an objective function for the batches. - The lowest function value and it corresponding hyperparameters - from a single batch are used. - BestBatchFuction is not recommended for gradient-based optimization! - - Parameters: - func : ObjectiveFunction class - A class with the objective function used - to optimize the hyperparameters. - get_prior_mean : bool - Whether to get the parameters of the prior mean - in the solution. - equal_size : bool - Whether the clusters are forced to have the same size. - use_same_prior_mean : bool - Whether to use the same prior mean for all models. - seed : int (optional) - The random seed used to permute the indicies. - If seed=None or False or 0, a random seed is not used. - """ - # Set the arguments - super().__init__( - func=func, - get_prior_mean=get_prior_mean, - batch_size=batch_size, - equal_size=equal_size, - use_same_prior_mean=use_same_prior_mean, - seed=seed, - **kwargs, - ) + """ + The objective function that is used to optimize the hyperparameters. + The instance splits the training data into batches. + A given objective function is then used as + an objective function for the batches. + The lowest function value and it corresponding hyperparameters + from a single batch are used. + BestBatchFuction is not recommended for gradient-based optimization! + """ def function( self, @@ -76,17 +41,17 @@ def function( self.sol = self.func.sol return output # Update the model with hyperparameters and prior mean - hp, parameters_set = self.make_hp(theta, parameters) + hp, _ = self.make_hp(theta, parameters) model = self.update_model(model, hp) self.set_same_prior_mean(model, X, Y) # Calculate the number of batches n_batches = self.get_number_batches(n_data) - indicies = np.arange(n_data) + indices = arange(n_data) i_batches = self.randomized_batches( - indicies, n_data, n_batches, **kwargs + indices, n_data, n_batches, **kwargs ) # Sum function values together from batches - fvalue = np.inf + fvalue = inf deriv = None for i_batch in i_batches: # Get the feature and target batch diff --git a/catlearn/regression/gp/objectivefunctions/gp/factorized_gpp.py b/catlearn/regression/gp/objectivefunctions/gp/factorized_gpp.py index 753fc33f..f18d7bb3 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/factorized_gpp.py +++ b/catlearn/regression/gp/objectivefunctions/gp/factorized_gpp.py @@ -1,52 +1,29 @@ -import numpy as np +from numpy import ( + append, + asarray, + concatenate, + diag, + einsum, + empty, + exp, + log, + matmul, + pi, + zeros, +) from .factorized_likelihood import FactorizedLogLikelihood class FactorizedGPP(FactorizedLogLikelihood): - def __init__( - self, - get_prior_mean=False, - modification=False, - ngrid=80, - bounds=None, - noise_optimizer=None, - **kwargs, - ): - """ - The factorized Geissers surrogate predictive probability - objective function that is used to optimize the hyperparameters. - The prefactor hyperparameter is determined from - an analytical expression. - An eigendecomposition is performed to get the eigenvalues. - The relative-noise hyperparameter can be searched from - a single eigendecomposition for each length-scale hyperparameter. - - Parameters: - get_prior_mean: bool - Whether to save the parameters of the prior mean - in the solution. - modification: bool - Whether to modify the analytical prefactor value in the end. - The prefactor hyperparameter becomes larger - if modification=True. - ngrid: int - Number of grid points that are searched in - the relative-noise hyperparameter. - bounds: Boundary_conditions class - A class of the boundary conditions of - the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class - A line search optimization method for - the relative-noise hyperparameter. - """ - super().__init__( - get_prior_mean=get_prior_mean, - modification=modification, - ngrid=ngrid, - bounds=bounds, - noise_optimizer=noise_optimizer, - **kwargs, - ) + """ + The factorized Geissers surrogate predictive probability + objective function that is used to optimize the hyperparameters. + The prefactor hyperparameter is determined from + an analytical expression. + An eigendecomposition is performed to get the eigenvalues. + The relative-noise hyperparameter can be searched from + a single eigendecomposition for each length-scale hyperparameter. + """ def function( self, @@ -134,29 +111,27 @@ def derivative( n_data, **kwargs, ): - gpp_deriv = np.array([]) - D_n = D + np.exp(2 * noise) + gpp_deriv = empty(0, dtype=self.dtype) + D_n = D + exp(2.0 * noise) UDn = U / D_n - KXX_inv = np.matmul(UDn, U.T) - K_inv_diag = np.diag(KXX_inv) - prefactor2 = np.mean( - (np.matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag - ) - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - hp["noise"] = np.array([noise]) - coef_re = np.matmul(KXX_inv, Y_p).reshape(-1) + KXX_inv = matmul(UDn, U.T) + K_inv_diag = diag(KXX_inv) + prefactor2 = ((matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag).mean() + hp["prefactor"] = asarray([0.5 * log(prefactor2)]) + hp["noise"] = asarray([noise]) + coef_re = matmul(KXX_inv, Y_p).reshape(-1) co_Kinv = coef_re / K_inv_diag for para in parameters_set: if para == "prefactor": - gpp_d = np.zeros((len(hp[para]))) + gpp_d = zeros((len(hp[para])), dtype=self.dtype) else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) r_j, s_j = self.get_r_s_derivatives(K_deriv, KXX_inv, coef_re) gpp_d = ( - np.mean(co_Kinv * (2.0 * r_j + co_Kinv * s_j), axis=-1) + (co_Kinv * (2.0 * r_j + co_Kinv * s_j)).mean(axis=-1) / prefactor2 - ) + np.mean(s_j / K_inv_diag, axis=-1) - gpp_deriv = np.append(gpp_deriv, gpp_d) + ) + (s_j / K_inv_diag).mean(axis=-1) + gpp_deriv = append(gpp_deriv, gpp_d) gpp_deriv = gpp_deriv - self.logpriors(hp, pdis, jac=True) / n_data return gpp_deriv @@ -165,27 +140,22 @@ def get_r_s_derivatives(self, K_deriv, KXX_inv, coef): Get the r and s vector that are products of the inverse and derivative covariance matrix """ - r_j = np.einsum("ji,di->dj", KXX_inv, np.matmul(K_deriv, -coef)) - s_j = np.einsum("ji,dji->di", KXX_inv, np.matmul(K_deriv, KXX_inv)) + r_j = einsum("ji,di->dj", KXX_inv, matmul(K_deriv, -coef)) + s_j = einsum("ji,dji->di", KXX_inv, matmul(K_deriv, KXX_inv)) return r_j, s_j def get_eig_fun(self, noise, hp, pdis, U, UTY, D, n_data, **kwargs): "Calculate GPP from Eigendecomposition for a noise value." - D_n = D + np.exp(2.0 * noise) + D_n = D + exp(2.0 * noise) UDn = U / D_n - K_inv_diag = np.einsum("ij,ji->i", UDn, U.T) - prefactor = 0.5 * np.log( - np.mean((np.matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag) - ) - gpp_v = ( - 1 - - np.mean(np.log(K_inv_diag)) - + 2.0 * prefactor - + np.log(2.0 * np.pi) + K_inv_diag = einsum("ij,ji->i", UDn, U.T) + prefactor = 0.5 * log( + ((matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag).mean() ) + gpp_v = 1.0 - log(K_inv_diag).mean() + 2.0 * prefactor + log(2.0 * pi) if pdis is not None: - hp["prefactor"] = np.array([prefactor]) - hp["noise"] = np.array([noise]).reshape(-1) + hp["prefactor"] = asarray([prefactor]) + hp["noise"] = asarray([noise]).reshape(-1) return gpp_v - self.logpriors(hp, pdis, jac=False) / n_data def get_all_eig_fun(self, noises, hp, pdis, U, UTY, D, n_data, **kwargs): @@ -193,21 +163,20 @@ def get_all_eig_fun(self, noises, hp, pdis, U, UTY, D, n_data, **kwargs): Calculate GPP from Eigendecompositions for all noise values from the list. """ - D_n = D + np.exp(2.0 * noises) + D_n = D + exp(2.0 * noises) UDn = U / D_n[:, None, :] - K_inv_diag = np.einsum("dij,ji->di", UDn, U.T, optimize=True) - prefactor = 0.5 * np.log( - np.mean( - (np.matmul(UDn, UTY).reshape((len(noises), n_data)) ** 2) - / K_inv_diag, - axis=1, - ) + K_inv_diag = einsum("dij,ji->di", UDn, U.T, optimize=True) + prefactor = 0.5 * log( + ( + (matmul(UDn, UTY).reshape((len(noises), n_data)) ** 2) + / K_inv_diag + ).mean(axis=1) ) gpp_v = ( 1.0 - - np.mean(np.log(K_inv_diag), axis=1) + - log(K_inv_diag).mean(axis=1) + 2.0 * prefactor - + np.log(2.0 * np.pi) + + log(2.0 * pi) ) if pdis is not None: hp["prefactor"] = prefactor.reshape(-1, 1) @@ -273,21 +242,21 @@ def update_solution( and numerically, respectively. """ if fun < self.sol["fun"]: - D_n = D + np.exp(2.0 * noise) + D_n = D + exp(2.0 * noise) UDn = U / D_n - K_inv_diag = np.einsum("ij,ji->i", UDn, U.T) - prefactor2 = np.mean( - (np.matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag - ) + K_inv_diag = einsum("ij,ji->i", UDn, U.T) + prefactor2 = ( + (matmul(UDn, UTY).reshape(-1) ** 2) / K_inv_diag + ).mean() if self.modification: prefactor2 = ( (n_data / (n_data - len(theta))) * prefactor2 if n_data - len(theta) > 0 else prefactor2 ) - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - hp["noise"] = np.array([noise]) - self.sol["x"] = np.concatenate( + hp["prefactor"] = asarray([0.5 * log(prefactor2)]) + hp["noise"] = asarray([noise]) + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() diff --git a/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood.py b/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood.py index 00b6a4e4..f36737e5 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood.py +++ b/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood.py @@ -1,25 +1,42 @@ -import numpy as np +from numpy import ( + append, + asarray, + concatenate, + empty, + exp, + log, + matmul, + pi, + zeros, +) from ..objectivefunction import ObjectiveFuction +from ...hpboundary.hptrans import VariableTransformation +from ...optimizers.noisesearcher import NoiseFineGridSearch class FactorizedLogLikelihood(ObjectiveFuction): + """ + The factorized log-likelihood objective function that is used + to optimize the hyperparameters. + The prefactor hyperparameter is determined from + an analytical expression. + An eigendecomposition is performed to get the eigenvalues. + The relative-noise hyperparameter can be searched from + a single eigendecomposition for each length-scale hyperparameter. + """ + def __init__( self, get_prior_mean=False, modification=False, ngrid=80, - bounds=None, + bounds=VariableTransformation(), noise_optimizer=None, + dtype=float, **kwargs, ): """ - The factorized log-likelihood objective function that is used - to optimize the hyperparameters. - The prefactor hyperparameter is determined from - an analytical expression. - An eigendecomposition is performed to get the eigenvalues. - The relative-noise hyperparameter can be searched from - a single eigendecomposition for each length-scale hyperparameter. + Initialize the objective function. Parameters: get_prior_mean: bool @@ -35,22 +52,18 @@ def __init__( bounds: Boundary_conditions class A class of the boundary conditions of the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class + noise_optimizer: Noise line search optimizer class A line search optimization method for the relative-noise hyperparameter. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = True self.use_optimized_noise = True - # Set default bounds - if bounds is None: - from ...hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) # Set default noise line optimizer if noise_optimizer is None: - from ...optimizers.noisesearcher import NoiseFineGridSearch - noise_optimizer = NoiseFineGridSearch( maxiter=1000, tol=1e-5, @@ -58,6 +71,7 @@ def __init__( multiple_min=False, ngrid=ngrid, loops=2, + dtype=dtype, ) # Set the arguments self.update_arguments( @@ -66,9 +80,21 @@ def __init__( ngrid=ngrid, bounds=bounds, noise_optimizer=noise_optimizer, + dtype=dtype, **kwargs, ) + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype=dtype, **kwargs) + self.bounds.set_dtype(dtype=dtype, **kwargs) + self.noise_optimizer.set_dtype(dtype=dtype, **kwargs) + return self + + def set_seed(self, seed, **kwargs): + self.bounds.set_seed(seed, **kwargs) + self.noise_optimizer.set_seed(seed) + return self + def update_arguments( self, get_prior_mean=None, @@ -76,6 +102,7 @@ def update_arguments( ngrid=None, bounds=None, noise_optimizer=None, + dtype=None, **kwargs, ): """ @@ -96,15 +123,16 @@ def update_arguments( bounds: Boundary_conditions class A class of the boundary conditions of the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class + noise_optimizer: Noise line search optimizer class A line search optimization method for the relative-noise hyperparameter. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean if modification is not None: self.modification = modification if ngrid is not None: @@ -113,8 +141,11 @@ def update_arguments( self.bounds = bounds.copy() if noise_optimizer is not None: self.noise_optimizer = noise_optimizer.copy() - # Always reset the solution when the objective function is changed - self.reset_solution() + # Set the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + ) return self def function( @@ -198,40 +229,40 @@ def derivative( pdis, **kwargs, ): - nlp_deriv = np.array([]) - D_n = D + np.exp(2.0 * noise) - prefactor2 = np.mean(UTY / D_n) - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - hp["noise"] = np.array([noise]) - KXX_inv = np.matmul(U / D_n, U.T) - coef = np.matmul(KXX_inv, Y_p) + nlp_deriv = empty(0, dtype=self.dtype) + D_n = D + exp(2.0 * noise) + prefactor2 = (UTY / D_n).mean() + hp["prefactor"] = asarray([0.5 * log(prefactor2)]) + hp["noise"] = asarray([noise]) + KXX_inv = matmul(U / D_n, U.T) + coef = matmul(KXX_inv, Y_p) for para in parameters_set: if para == "prefactor": - nlp_d = np.zeros((len(hp[para]))) + nlp_d = zeros((len(hp[para])), dtype=self.dtype) else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) K_deriv_cho = self.get_K_inv_deriv(K_deriv, KXX_inv) nlp_d = ( (-0.5 / prefactor2) - * np.matmul(coef.T, np.matmul(K_deriv, coef)).reshape(-1) + * matmul(coef.T, matmul(K_deriv, coef)).reshape(-1) ) + (0.5 * K_deriv_cho) - nlp_deriv = np.append(nlp_deriv, nlp_d) + nlp_deriv = append(nlp_deriv, nlp_d) nlp_deriv = nlp_deriv - self.logpriors(hp, pdis, jac=True) return nlp_deriv def get_eig_fun(self, noise, hp, pdis, UTY, D, n_data, **kwargs): "Calculate log-likelihood from Eigendecomposition for a noise value." - D_n = D + np.exp(2.0 * noise) - prefactor = 0.5 * np.log(np.mean(UTY / D_n)) + D_n = D + exp(2.0 * noise) + prefactor = 0.5 * log((UTY / D_n).mean()) nlp = ( - 0.5 * n_data * (1 + np.log(2.0 * np.pi)) + 0.5 * n_data * (1 + log(2.0 * pi)) + (n_data * prefactor) - + 0.5 * np.sum(np.log(D_n)) + + 0.5 * log(D_n).sum() ) if pdis is not None: - hp["prefactor"] = np.array([prefactor]) - hp["noise"] = np.array([noise]).reshape(-1) + hp["prefactor"] = asarray([prefactor], dtype=self.dtype) + hp["noise"] = asarray([noise], dtype=self.dtype).reshape(-1) return nlp - self.logpriors(hp, pdis, jac=False) def get_all_eig_fun(self, noises, hp, pdis, UTY, D, n_data, **kwargs): @@ -239,10 +270,10 @@ def get_all_eig_fun(self, noises, hp, pdis, UTY, D, n_data, **kwargs): Calculate log-likelihood from Eigendecompositions for all noise values from the list. """ - D_n = D + np.exp(2.0 * noises) - prefactor = 0.5 * np.log(np.mean(UTY / D_n, axis=1)) - nlp = (0.5 * n_data * (1 + np.log(2.0 * np.pi))) + ( - (n_data * prefactor) + (0.5 * np.sum(np.log(D_n), axis=1)) + D_n = D + exp(2.0 * noises) + prefactor = 0.5 * log((UTY / D_n).mean(axis=1)) + nlp = (0.5 * n_data * (1 + log(2.0 * pi))) + ( + (n_data * prefactor) + (0.5 * log(D_n).sum(axis=1)) ) if pdis is not None: hp["prefactor"] = prefactor.reshape(-1, 1) @@ -252,7 +283,8 @@ def get_all_eig_fun(self, noises, hp, pdis, UTY, D, n_data, **kwargs): def make_noise_list(self, model, X, Y, **kwargs): "Make the list of noises." return self.bounds.make_single_line( - parameter="noise", ngrid=self.ngrid + parameter="noise", + ngrid=self.ngrid, ).reshape(-1, 1) def maximize_noise( @@ -274,7 +306,14 @@ def maximize_noise( func_args = (hp.copy(), pdis, UTY, D, n_data) # Calculate function values for line coordinates sol = self.noise_optimizer.run( - self, noises, ["noise"], model, X, Y, pdis, func_args=func_args + self, + noises, + ["noise"], + model, + X, + Y, + pdis, + func_args=func_args, ) # Find the minimum value return sol["x"][0], sol["fun"] @@ -305,14 +344,16 @@ def update_solution( and numerically, respectively. """ if fun < self.sol["fun"]: - D_n = D + np.exp(2.0 * noise) - prefactor2 = np.mean(UTY / D_n) + D_n = D + exp(2.0 * noise) + prefactor2 = (UTY / D_n).mean() if self.modification: if n_data - len(theta) > 0: prefactor2 = (n_data / (n_data - len(theta))) * prefactor2 - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - hp["noise"] = np.array([noise]) - self.sol["x"] = np.concatenate( + hp["prefactor"] = asarray( + [0.5 * log(prefactor2)], + ) + hp["noise"] = asarray([noise]) + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() @@ -332,6 +373,7 @@ def get_arguments(self): ngrid=self.ngrid, bounds=self.bounds, noise_optimizer=self.noise_optimizer, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood_svd.py b/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood_svd.py index fd409558..73415f97 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood_svd.py +++ b/catlearn/regression/gp/objectivefunctions/gp/factorized_likelihood_svd.py @@ -1,53 +1,18 @@ -import numpy as np -from .factorized_likelihood import FactorizedLogLikelihood +from numpy import matmul from numpy.linalg import svd +from .factorized_likelihood import FactorizedLogLikelihood class FactorizedLogLikelihoodSVD(FactorizedLogLikelihood): - def __init__( - self, - get_prior_mean=False, - modification=False, - ngrid=80, - bounds=None, - noise_optimizer=None, - **kwargs, - ): - """ - The factorized log-likelihood objective function that is used - to optimize the hyperparameters. - The prefactor hyperparameter is determined from - an analytical expression. - A SVD is performed to get the eigenvalues. - The relative-noise hyperparameter can be searched from - a single eigendecomposition for each length-scale hyperparameter. - - Parameters: - get_prior_mean: bool - Whether to save the parameters of the prior mean - in the solution. - modification: bool - Whether to modify the analytical prefactor value in the end. - The prefactor hyperparameter becomes larger - if modification=True. - ngrid: int - Number of grid points that are searched in - the relative-noise hyperparameter. - bounds: Boundary_conditions class - A class of the boundary conditions of - the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class - A line search optimization method for - the relative-noise hyperparameter. - """ - super().__init__( - get_prior_mean=get_prior_mean, - modification=modification, - ngrid=ngrid, - bounds=bounds, - noise_optimizer=noise_optimizer, - **kwargs, - ) + """ + The factorized log-likelihood objective function that is used + to optimize the hyperparameters. + The prefactor hyperparameter is determined from + an analytical expression. + A SVD is performed to get the eigenvalues. + The relative-noise hyperparameter can be searched from + a single eigendecomposition for each length-scale hyperparameter. + """ def get_eig(self, model, X, Y): "Calculate the eigenvalues" @@ -57,5 +22,5 @@ def get_eig(self, model, X, Y): U, D, Vt = svd(KXX, hermitian=True) # Subtract the prior mean to the training target Y_p = self.y_prior(X, Y, model, D=D, U=U) - UTY = np.matmul(Vt, Y_p).reshape(-1) ** 2 + UTY = matmul(Vt, Y_p).reshape(-1) ** 2 return D, U, Y_p, UTY, KXX, n_data diff --git a/catlearn/regression/gp/objectivefunctions/gp/gpe.py b/catlearn/regression/gp/objectivefunctions/gp/gpe.py index 2cf3714a..040a8746 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/gpe.py +++ b/catlearn/regression/gp/objectivefunctions/gp/gpe.py @@ -1,17 +1,24 @@ -import numpy as np +from numpy import append, empty from .loo import LOO class GPE(LOO): - def __init__(self, get_prior_mean=False, **kwargs): + """ + The Geissers predictive mean square error objective function as + a function of the hyperparameters. + """ + + def __init__(self, get_prior_mean=False, dtype=float, **kwargs): """ - The Geissers predictive mean square error objective function as - a function of the hyperparameters. + Initialize the objective function. Parameters: get_prior_mean: bool Whether to save the parameters of the prior mean in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = False @@ -19,6 +26,7 @@ def __init__(self, get_prior_mean=False, **kwargs): # Set the arguments self.update_arguments( get_prior_mean=get_prior_mean, + dtype=dtype, **kwargs, ) @@ -35,7 +43,7 @@ def function( ): hp, parameters_set = self.make_hp(theta, parameters) model = self.update_model(model, hp) - coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) + coef, L, low, _, KXX, n_data = self.coef_cholesky(model, X, Y) KXX_inv, K_inv_diag, coef_re, co_Kinv = self.get_co_Kinv( L, low, @@ -44,7 +52,7 @@ def function( ) K_inv_diag_rev = 1.0 / K_inv_diag prefactor2 = self.get_prefactor2(model) - gpe_v = np.mean(co_Kinv**2) + prefactor2 * np.mean(K_inv_diag_rev) + gpe_v = (co_Kinv**2).mean() + prefactor2 * K_inv_diag_rev.mean() gpe_v = gpe_v - self.logpriors(hp, pdis, jac=False) / n_data if jac: return gpe_v, self.derivative( @@ -80,22 +88,47 @@ def derivative( pdis, **kwargs, ): - gpe_deriv = np.array([]) + gpe_deriv = empty(0, dtype=self.dtype) for para in parameters_set: if para == "prefactor": - gpe_d = 2.0 * prefactor2 * np.mean(K_inv_diag_rev) + gpe_d = 2.0 * prefactor2 * K_inv_diag_rev.mean() else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) r_j, s_j = self.get_r_s_derivatives(K_deriv, KXX_inv, coef_re) - gpe_d = 2 * np.mean( - (co_Kinv * K_inv_diag_rev) * (r_j + s_j * co_Kinv), axis=-1 - ) + prefactor2 * np.mean( - s_j * (K_inv_diag_rev * K_inv_diag_rev), axis=-1 + gpe_d = 2.0 * ( + (co_Kinv * K_inv_diag_rev) * (r_j + s_j * co_Kinv) + ).mean(axis=-1) + prefactor2 * ( + s_j * (K_inv_diag_rev * K_inv_diag_rev) + ).mean( + axis=-1 ) - gpe_deriv = np.append(gpe_deriv, gpe_d) + gpe_deriv = append(gpe_deriv, gpe_d) gpe_deriv = gpe_deriv - self.logpriors(hp, pdis, jac=True) / n_data return gpe_deriv + def update_arguments(self, get_prior_mean=None, dtype=None, **kwargs): + """ + Update the objective function with its arguments. + The existing arguments are used if they are not given. + + Parameters: + get_prior_mean: bool + Whether to get the parameters of the prior mean + in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated object itself. + """ + # Set the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + ) + return self + def update_solution( self, fun, diff --git a/catlearn/regression/gp/objectivefunctions/gp/gpp.py b/catlearn/regression/gp/objectivefunctions/gp/gpp.py index e07999ed..04c41af0 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/gpp.py +++ b/catlearn/regression/gp/objectivefunctions/gp/gpp.py @@ -1,19 +1,26 @@ -import numpy as np +from numpy import append, asarray, concatenate, empty, log, pi, zeros from .loo import LOO class GPP(LOO): - def __init__(self, get_prior_mean=False, **kwargs): + """ + The Geissers surrogate predictive probability objective function as + a function of the hyperparameters. + The prefactor hyperparameter is calculated from + an analytical expression. + """ + + def __init__(self, get_prior_mean=False, dtype=float, **kwargs): """ - The Geissers surrogate predictive probability objective function as - a function of the hyperparameters. - The prefactor hyperparameter is calculated from - an analytical expression. + Initialize the objective function. Parameters: get_prior_mean: bool Whether to save the parameters of the prior mean in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = True @@ -21,6 +28,7 @@ def __init__(self, get_prior_mean=False, **kwargs): # Set the arguments self.update_arguments( get_prior_mean=get_prior_mean, + dtype=dtype, **kwargs, ) @@ -37,22 +45,17 @@ def function( ): hp, parameters_set = self.make_hp(theta, parameters) model = self.update_model(model, hp) - coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) + coef, L, low, _, KXX, n_data = self.coef_cholesky(model, X, Y) KXX_inv, K_inv_diag, coef_re, co_Kinv = self.get_co_Kinv( L, low, n_data, coef, ) - prefactor2 = np.mean(co_Kinv * coef_re) - prefactor = 0.5 * np.log(prefactor2) - hp["prefactor"] = np.array([prefactor]) - gpp_v = ( - 1.0 - - np.mean(np.log(K_inv_diag)) - + 2.0 * prefactor - + np.log(2.0 * np.pi) - ) + prefactor2 = (co_Kinv * coef_re).mean() + prefactor = 0.5 * log(prefactor2) + hp["prefactor"] = asarray([prefactor], dtype=self.dtype) + gpp_v = 1.0 - log(K_inv_diag).mean() + 2.0 * prefactor + log(2.0 * pi) gpp_v = gpp_v - self.logpriors(hp, pdis, jac=False) / n_data if jac: deriv = self.derivative( @@ -106,41 +109,50 @@ def derivative( pdis, **kwargs, ): - gpp_deriv = np.array([]) + gpp_deriv = empty(0, dtype=self.dtype) hp.update( - dict(prefactor=np.array([0.5 * np.log(prefactor2)]).reshape(-1)) + dict( + prefactor=asarray( + [0.5 * log(prefactor2)], + dtype=self.dtype, + ).reshape(-1) + ) ) for para in parameters_set: if para == "prefactor": - gpp_d = np.zeros((len(hp[para]))) + gpp_d = zeros((len(hp[para])), dtype=self.dtype) else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) r_j, s_j = self.get_r_s_derivatives(K_deriv, KXX_inv, coef_re) gpp_d = ( - np.mean(co_Kinv * (2.0 * r_j + co_Kinv * s_j), axis=-1) + (co_Kinv * (2.0 * r_j + co_Kinv * s_j)).mean(axis=-1) / prefactor2 - ) + np.mean(s_j / K_inv_diag, axis=-1) - gpp_deriv = np.append(gpp_deriv, gpp_d) + ) + (s_j / K_inv_diag).mean(axis=-1) + gpp_deriv = append(gpp_deriv, gpp_d) gpp_deriv = gpp_deriv - self.logpriors(hp, pdis, jac=True) / n_data return gpp_deriv - def update_arguments(self, get_prior_mean=None, **kwargs): + def update_arguments(self, get_prior_mean=None, dtype=None, **kwargs): """ Update the objective function with its arguments. The existing arguments are used if they are not given. Parameters: - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean - # Always reset the solution when the objective function is changed - self.reset_solution() + # Set the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + ) return self def update_solution( @@ -164,7 +176,7 @@ def update_solution( than the input since it is optimized analytically. """ if fun < self.sol["fun"]: - self.sol["x"] = np.concatenate( + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() @@ -178,7 +190,7 @@ def update_solution( def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(get_prior_mean=self.get_prior_mean) + arg_kwargs = dict(get_prior_mean=self.get_prior_mean, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/objectivefunctions/gp/likelihood.py b/catlearn/regression/gp/objectivefunctions/gp/likelihood.py index dc870c36..52a08132 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/likelihood.py +++ b/catlearn/regression/gp/objectivefunctions/gp/likelihood.py @@ -1,20 +1,12 @@ -import numpy as np -from scipy.linalg import cho_solve +from numpy import append, diagonal, empty, log, matmul, pi from ..objectivefunction import ObjectiveFuction class LogLikelihood(ObjectiveFuction): - def __init__(self, get_prior_mean=False, **kwargs): - """ - The log-likelihood objective function that is used to - optimize the hyperparameters. - - Parameters: - get_prior_mean: bool - Whether to save the parameters of the prior mean - in the solution. - """ - super().__init__(get_prior_mean=get_prior_mean, **kwargs) + """ + The log-likelihood objective function that is used to + optimize the hyperparameters. + """ def function( self, @@ -31,11 +23,11 @@ def function( model = self.update_model(model, hp) coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) prefactor2 = self.get_prefactor2(model) - nlp = ( - 0.5 * np.matmul(Y_p.T, coef).item(0) / prefactor2 - + 0.5 * n_data * np.log(prefactor2) - + np.sum(np.log(np.diagonal(L))) - + 0.5 * n_data * np.log(2.0 * np.pi) + nlp = 0.5 * ( + matmul(Y_p.T, coef).item(0) / prefactor2 + + n_data * log(prefactor2) + + 2.0 * log(diagonal(L)).sum() + + n_data * log(2.0 * pi) ) nlp = nlp - self.logpriors(hp, pdis, jac=False) if jac: @@ -72,18 +64,18 @@ def derivative( pdis, **kwargs, ): - nlp_deriv = np.array([]) - KXX_inv = cho_solve((L, low), np.identity(n_data), check_finite=False) + nlp_deriv = empty(0, dtype=self.dtype) + KXX_inv = self.get_cinv(L=L, low=low, n_data=n_data) for para in parameters_set: if para == "prefactor": - nlp_d = -np.matmul(Y_p.T, coef).item(0) / prefactor2 + n_data + nlp_d = -matmul(Y_p.T, coef).item(0) / prefactor2 + n_data else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) K_deriv_cho = self.get_K_inv_deriv(K_deriv, KXX_inv) nlp_d = ( (-0.5 / prefactor2) - * np.matmul(coef.T, np.matmul(K_deriv, coef)).reshape(-1) + * matmul(coef.T, matmul(K_deriv, coef)).reshape(-1) ) + (0.5 * K_deriv_cho) - nlp_deriv = np.append(nlp_deriv, nlp_d) + nlp_deriv = append(nlp_deriv, nlp_d) nlp_deriv = nlp_deriv - self.logpriors(hp, pdis, jac=True) return nlp_deriv diff --git a/catlearn/regression/gp/objectivefunctions/gp/loo.py b/catlearn/regression/gp/objectivefunctions/gp/loo.py index 88d7c6b4..85106ab9 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/loo.py +++ b/catlearn/regression/gp/objectivefunctions/gp/loo.py @@ -1,18 +1,33 @@ -import numpy as np -from scipy.linalg import cho_solve +from numpy import ( + append, + asarray, + concatenate, + diag, + einsum, + empty, + log, + matmul, + sqrt, + zeros, +) from ..objectivefunction import ObjectiveFuction class LOO(ObjectiveFuction): + """ + The leave-one-out objective function that is used to + optimize the hyperparameters. + """ + def __init__( self, get_prior_mean=False, use_analytic_prefactor=True, + dtype=float, **kwargs, ): """ - The leave-one-out objective function that is used to - optimize the hyperparameters. + Initialize the objective function. Parameters: get_prior_mean: bool @@ -20,6 +35,10 @@ def __init__( in the solution. use_analytic_prefactor: bool Whether to calculate the analytical prefactor value in the end. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ # Set descriptor of the objective function self.use_optimized_noise = False @@ -27,6 +46,7 @@ def __init__( self.update_arguments( get_prior_mean=get_prior_mean, use_analytic_prefactor=use_analytic_prefactor, + dtype=dtype, **kwargs, ) @@ -43,14 +63,14 @@ def function( ): hp, parameters_set = self.make_hp(theta, parameters) model = self.update_model(model, hp) - coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) + coef, L, low, _, KXX, n_data = self.coef_cholesky(model, X, Y) KXX_inv, K_inv_diag, coef_re, co_Kinv = self.get_co_Kinv( L, low, n_data, coef, ) - loo_v = np.mean(co_Kinv**2) + loo_v = (co_Kinv**2).mean() loo_v = loo_v - self.logpriors(hp, pdis, jac=False) / n_data if jac: deriv = self.derivative( @@ -106,18 +126,17 @@ def derivative( pdis, **kwargs, ): - loo_deriv = np.array([]) + loo_deriv = empty(0, dtype=self.dtype) for para in parameters_set: if para == "prefactor": - loo_d = np.zeros((len(hp[para]))) + loo_d = zeros((len(hp[para])), dtype=self.dtype) else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) r_j, s_j = self.get_r_s_derivatives(K_deriv, KXX_inv, coef_re) - loo_d = 2.0 * np.mean( - (co_Kinv / K_inv_diag) * (r_j + s_j * co_Kinv), - axis=-1, - ) - loo_deriv = np.append(loo_deriv, loo_d) + loo_d = 2.0 * ( + (co_Kinv / K_inv_diag) * (r_j + s_j * co_Kinv) + ).mean(axis=-1) + loo_deriv = append(loo_deriv, loo_d) loo_deriv = loo_deriv - self.logpriors(hp, pdis, jac=True) / n_data return loo_deriv @@ -125,6 +144,7 @@ def update_arguments( self, get_prior_mean=None, use_analytic_prefactor=None, + dtype=None, **kwargs, ): """ @@ -137,16 +157,20 @@ def update_arguments( in the solution. use_analytic_prefactor: bool Whether to calculate the analytical prefactor value in the end. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean if use_analytic_prefactor is not None: self.use_analytic_prefactor = use_analytic_prefactor - # Always reset the solution when the objective function is changed - self.reset_solution() + # Set the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + ) return self def update_solution( @@ -174,11 +198,11 @@ def update_solution( """ if fun < self.sol["fun"]: if self.use_analytic_prefactor: - prefactor2 = np.mean(co_Kinv * coef_re) - ( - np.mean(coef_re / np.sqrt(K_inv_diag)) ** 2 + prefactor2 = (co_Kinv * coef_re).mean() - ( + (coef_re / sqrt(K_inv_diag)).mean() ** 2 ) - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - self.sol["x"] = np.concatenate( + hp["prefactor"] = asarray([0.5 * log(prefactor2)]) + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) else: @@ -193,8 +217,8 @@ def update_solution( def get_co_Kinv(self, L, low, n_data, coef): "Get the inverse covariance matrix and diagonal products." - KXX_inv = cho_solve((L, low), np.identity(n_data), check_finite=False) - K_inv_diag = np.diag(KXX_inv) + KXX_inv = self.get_cinv(L=L, low=low, n_data=n_data) + K_inv_diag = diag(KXX_inv) coef_re = coef.reshape(-1) co_Kinv = coef_re / K_inv_diag return KXX_inv, K_inv_diag, coef_re, co_Kinv @@ -204,8 +228,8 @@ def get_r_s_derivatives(self, K_deriv, KXX_inv, coef): Get the r and s vector that are products of the inverse and derivative covariance matrix """ - r_j = np.einsum("ji,di->dj", KXX_inv, np.matmul(K_deriv, -coef)) - s_j = np.einsum("ji,dji->di", KXX_inv, np.matmul(K_deriv, KXX_inv)) + r_j = einsum("ji,di->dj", KXX_inv, matmul(K_deriv, -coef)) + s_j = einsum("ji,dji->di", KXX_inv, matmul(K_deriv, KXX_inv)) return r_j, s_j def get_arguments(self): @@ -214,6 +238,7 @@ def get_arguments(self): arg_kwargs = dict( get_prior_mean=self.get_prior_mean, use_analytic_prefactor=self.use_analytic_prefactor, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/objectivefunctions/gp/mle.py b/catlearn/regression/gp/objectivefunctions/gp/mle.py index 9f661952..c675da8a 100644 --- a/catlearn/regression/gp/objectivefunctions/gp/mle.py +++ b/catlearn/regression/gp/objectivefunctions/gp/mle.py @@ -1,15 +1,35 @@ -import numpy as np -from scipy.linalg import cho_solve +from numpy import ( + append, + asarray, + concatenate, + diagonal, + dot, + empty, + matmul, + log, + pi, + zeros, +) from ..objectivefunction import ObjectiveFuction class MaximumLogLikelihood(ObjectiveFuction): - def __init__(self, get_prior_mean=False, modification=False, **kwargs): + """ + The Maximum log-likelihood objective function as + a function of the hyperparameters. + The prefactor hyperparameter is calculated from + an analytical expression. + """ + + def __init__( + self, + get_prior_mean=False, + modification=False, + dtype=float, + **kwargs, + ): """ - The Maximum log-likelihood objective function as - a function of the hyperparameters. - The prefactor hyperparameter is calculated from - an analytical expression. + Initialize the objective function. Parameters: get_prior_mean: bool @@ -19,6 +39,9 @@ def __init__(self, get_prior_mean=False, modification=False, **kwargs): Whether to modify the analytical prefactor value in the end. The prefactor hyperparameter becomes larger if modification=True. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = True @@ -27,6 +50,7 @@ def __init__(self, get_prior_mean=False, modification=False, **kwargs): self.update_arguments( get_prior_mean=get_prior_mean, modification=modification, + dtype=dtype, **kwargs, ) @@ -44,13 +68,13 @@ def function( hp, parameters_set = self.make_hp(theta, parameters) model = self.update_model(model, hp) coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) - prefactor2 = np.matmul(Y_p.T, coef).item(0) / n_data - prefactor = 0.5 * np.log(prefactor2) - hp["prefactor"] = np.array([prefactor]) + prefactor2 = dot(Y_p.reshape(-1), coef.reshape(-1)) / n_data + prefactor = 0.5 * log(prefactor2) + hp["prefactor"] = asarray([prefactor], dtype=self.dtype) nlp = ( - 0.5 * n_data * (1 + np.log(2.0 * np.pi)) + 0.5 * n_data * (1 + log(2.0 * pi)) + n_data * prefactor - + np.sum(np.log(np.diagonal(L))) + + log(diagonal(L)).sum() ) nlp = nlp - self.logpriors(hp, pdis, jac=False) if jac: @@ -105,19 +129,19 @@ def derivative( pdis, **kwargs, ): - nlp_deriv = np.array([]) - KXX_inv = cho_solve((L, low), np.identity(n_data), check_finite=False) + nlp_deriv = empty(0, dtype=self.dtype) + KXX_inv = self.get_cinv(L=L, low=low, n_data=n_data) for para in parameters_set: if para == "prefactor": - nlp_d = np.zeros((len(hp[para]))) + nlp_d = zeros((len(hp[para])), dtype=self.dtype) else: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) K_deriv_cho = self.get_K_inv_deriv(K_deriv, KXX_inv) nlp_d = ( (-0.5 / prefactor2) - * np.matmul(coef.T, np.matmul(K_deriv, coef)).reshape(-1) + * matmul(coef.T, matmul(K_deriv, coef)).reshape(-1) ) + (0.5 * K_deriv_cho) - nlp_deriv = np.append(nlp_deriv, nlp_d) + nlp_deriv = append(nlp_deriv, nlp_d) nlp_deriv = nlp_deriv - self.logpriors(hp, pdis, jac=True) return nlp_deriv @@ -125,6 +149,7 @@ def update_arguments( self, get_prior_mean=None, modification=None, + dtype=None, **kwargs, ): """ @@ -139,16 +164,21 @@ def update_arguments( Whether to modify the analytical prefactor value in the end. The prefactor hyperparameter becomes larger if modification=True. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if get_prior_mean is not None: - self.get_prior_mean = get_prior_mean if modification is not None: self.modification = modification - # Always reset the solution when the objective function is changed - self.reset_solution() + # Set the arguments of the parent class + super().update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + **kwargs, + ) return self def update_solution( @@ -176,8 +206,8 @@ def update_solution( if self.modification: if n_data - len(theta) > 0: prefactor2 = (n_data / (n_data - len(theta))) * prefactor2 - hp["prefactor"] = np.array([0.5 * np.log(prefactor2)]) - self.sol["x"] = np.concatenate( + hp["prefactor"] = asarray([0.5 * log(prefactor2)]) + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() @@ -194,6 +224,7 @@ def get_arguments(self): arg_kwargs = dict( get_prior_mean=self.get_prior_mean, modification=self.modification, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/objectivefunctions/objectivefunction.py b/catlearn/regression/gp/objectivefunctions/objectivefunction.py index d1c12c80..b476d726 100644 --- a/catlearn/regression/gp/objectivefunctions/objectivefunction.py +++ b/catlearn/regression/gp/objectivefunctions/objectivefunction.py @@ -1,23 +1,49 @@ -import numpy as np -from scipy.linalg import cho_factor, cho_solve -from numpy.linalg import eigh +from numpy import ( + array, + asarray, + append, + diag, + einsum, + empty, + finfo, + identity, + inf, + log, + matmul, + where, + zeros, +) +from numpy.linalg import eigh, LinAlgError +from scipy.linalg import cho_factor, cho_solve, eigh as scipy_eigh +import warnings class ObjectiveFuction: - def __init__(self, get_prior_mean=False, **kwargs): + """ + The objective function that is used to optimize the hyperparameters. + """ + + def __init__(self, get_prior_mean=False, dtype=float, **kwargs): """ - The objective function that is used to optimize the hyperparameters. + Initialize the objective function. Parameters: - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = False self.use_optimized_noise = False # Set the arguments - self.update_arguments(get_prior_mean=get_prior_mean, **kwargs) + self.update_arguments( + get_prior_mean=get_prior_mean, + dtype=dtype, + **kwargs, + ) def function( self, @@ -67,19 +93,56 @@ def derivative(self, **kwargs): """ raise NotImplementedError() - def update_arguments(self, get_prior_mean=None, **kwargs): + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + return self + + def set_seed(self, seed, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + return self + + def update_arguments(self, get_prior_mean=None, dtype=None, **kwargs): """ Update the objective function with its arguments. The existing arguments are used if they are not given. Parameters: - get_prior_mean : bool + get_prior_mean: bool Whether to get the parameters of the prior mean in the solution. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) + # Set the get_prior_mean if get_prior_mean is not None: self.get_prior_mean = get_prior_mean # Always reset the solution when the objective function is changed @@ -91,7 +154,7 @@ def reset_solution(self): Reset the solution of the optimization in terms of the hyperparameters and model. """ - self.sol = {"fun": np.inf, "x": np.array([]), "hp": {}} + self.sol = {"fun": inf, "x": empty(0, dtype=self.dtype), "hp": {}} return self def update_solution( @@ -149,8 +212,9 @@ def get_stored_solution(self, **kwargs): def make_hp(self, theta, parameters, **kwargs): "Make hyperparameter dictionary from lists" - theta, parameters = np.array(theta), np.array(parameters) + theta = asarray(theta) parameters_set = sorted(set(parameters)) + parameters = asarray(parameters) hp = { para_s: self.numeric_limits(theta[parameters == para_s]) for para_s in parameters_set @@ -161,12 +225,14 @@ def get_hyperparams(self, model, **kwargs): "Get the hyperparameters for the model and the kernel." return model.get_hyperparams() - def numeric_limits(self, array, dh=0.1 * np.log(np.finfo(float).max)): + def numeric_limits(self, a, dh=None): """ Replace hyperparameters if they are outside of the numeric limits in log-space. """ - return np.where(-dh < array, np.where(array < dh, array, dh), -dh) + if dh is None: + dh = 0.1 * log(finfo(self.dtype).max) + return where(-dh < a, where(a < dh, a, dh), -dh) def update_model(self, model, hp, **kwargs): "Update the the machine learning model with the hyperparameters." @@ -189,24 +255,25 @@ def kxx_corr(self, model, X, **kwargs): def add_correction(self, model, KXX, n_data, **kwargs): "Add noise correction to covariance matrix." - corr = model.get_correction(np.diag(KXX)) + corr = model.get_correction(diag(KXX)) if corr > 0.0: KXX[range(n_data), range(n_data)] += corr return KXX def y_prior(self, X, Y, model, L=None, low=None, **kwargs): "Update prior and subtract to target." - Y_p = Y.copy() + Y_p = array(Y, dtype=self.dtype) model.update_priormean(X, Y_p, L=L, low=low, **kwargs) - get_derivatives = model.use_derivatives + get_derivatives = model.get_use_derivatives() pmean = model.get_priormean( X, Y_p, get_derivatives=get_derivatives, ) + Y_p -= pmean if get_derivatives: - return (Y_p - pmean).T.reshape(-1, 1) - return (Y_p - pmean)[:, 0:1] + return Y_p.T.reshape(-1, 1) + return Y_p[:, 0:1] def coef_cholesky(self, model, X, Y, **kwargs): "Calculate the coefficients by using Cholesky decomposition." @@ -227,24 +294,40 @@ def get_eig(self, model, X, Y, **kwargs): # Eigendecomposition try: D, U = eigh(KXX) - except Exception as e: - import logging - import scipy.linalg - - logging.error("An error occurred: %s", str(e)) + except LinAlgError: + warnings.warn( + "Eigendecomposition failed, using scipy.eigh instead." + ) # More robust but slower eigendecomposition - D, U = scipy.linalg.eigh(KXX, driver="ev") + D, U = scipy_eigh(KXX, driver="ev") # Subtract the prior mean to the training target Y_p = self.y_prior(X, Y, model, D=D, U=U) - UTY = (np.matmul(U.T, Y_p)).reshape(-1) ** 2 + UTY = matmul(U.T, Y_p).reshape(-1) ** 2 return D, U, Y_p, UTY, KXX, n_data - def get_cinv(self, model, X, Y, **kwargs): - "Get the inverse covariance matrix." + def get_cinv_model(self, model, X, Y, check_finite=False, **kwargs): + "Get the inverse covariance matrix from the model." coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) - cinv = cho_solve((L, low), np.identity(n_data), check_finite=False) + cinv = self.get_cinv( + L, + low, + n_data, + check_finite=check_finite, + **kwargs, + ) return coef, cinv, Y_p, KXX, n_data + def get_cinv(self, L, low, n_data, check_finite=False, **kwargs): + "Get the inverse covariance matrix." + return cho_solve( + (L, low), + identity( + n_data, + dtype=self.dtype, + ), + check_finite=check_finite, + ) + def logpriors(self, hp, pdis=None, jac=False, **kwargs): "Log of the prior distribution value for the hyperparameters." # If no prior distribution is used for the hyperparameters @@ -260,15 +343,21 @@ def logpriors(self, hp, pdis=None, jac=False, **kwargs): return lprior return lprior.reshape(-1) # Derivate of the log probability wrt. the hyperparameters - lprior_deriv = np.array([]) + lprior_deriv = empty(0, dtype=self.dtype) for para, value in hp.items(): if para in pdis.keys(): - lprior_deriv = np.append( + lprior_deriv = append( lprior_deriv, - np.array(pdis[para].ln_deriv(value)).reshape(-1), + asarray( + pdis[para].ln_deriv(value), + dtype=self.dtype, + ).reshape(-1), ) else: - lprior_deriv = np.append(lprior_deriv, np.zeros((len(value)))) + lprior_deriv = append( + lprior_deriv, + zeros((len(value)), dtype=self.dtype), + ) return lprior_deriv def get_K_inv_deriv(self, K_deriv, KXX_inv, **kwargs): @@ -276,7 +365,7 @@ def get_K_inv_deriv(self, K_deriv, KXX_inv, **kwargs): Get the diagonal elements of the matrix product of the inverse and derivative covariance matrix. """ - return np.einsum("ij,dji->d", KXX_inv, K_deriv) + return einsum("ij,dji->d", KXX_inv, K_deriv) def get_K_deriv(self, model, parameter, X, KXX, **kwargs): "Get the gradient of the covariance matrix wrt. the hyperparameter." @@ -294,7 +383,7 @@ def get_prior_parameters(self, model, **kwargs): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(get_prior_mean=self.get_prior_mean) + arg_kwargs = dict(get_prior_mean=self.get_prior_mean, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood.py b/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood.py index 436958ab..eb1d0c50 100644 --- a/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood.py +++ b/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood.py @@ -1,22 +1,39 @@ -import numpy as np -from ..gp.factorized_likelihood import FactorizedLogLikelihood +from numpy import ( + append, + asarray, + concatenate, + empty, + exp, + log, + matmul, +) +from ..gp.factorized_likelihood import ( + FactorizedLogLikelihood, + VariableTransformation, +) +from ...optimizers.noisesearcher import NoiseFineGridSearch class FactorizedLogLikelihood(FactorizedLogLikelihood): + """ + The factorized log-likelihood objective function that is used + to optimize the hyperparameters. + An eigendecomposition is performed to get the eigenvalues. + The relative-noise hyperparameter can be searched from + a single eigendecomposition for each length-scale hyperparameter. + """ + def __init__( self, get_prior_mean=False, ngrid=80, - bounds=None, + bounds=VariableTransformation(), noise_optimizer=None, + dtype=float, **kwargs, ): """ - The factorized log-likelihood objective function that is used - to optimize the hyperparameters. - An eigendecomposition is performed to get the eigenvalues. - The relative-noise hyperparameter can be searched from - a single eigendecomposition for each length-scale hyperparameter. + Initialize the objective function. Parameters: get_prior_mean: bool @@ -28,22 +45,18 @@ def __init__( bounds: Boundary_conditions class A class of the boundary conditions of the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class + noise_optimizer: Noise line search optimizer class A line search optimization method for the relative-noise hyperparameter. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ # Set descriptor of the objective function self.use_analytic_prefactor = False self.use_optimized_noise = True - # Set default bounds - if bounds is None: - from ...hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) # Set default noise line optimizer if noise_optimizer is None: - from ...optimizers.noisesearcher import NoiseFineGridSearch - noise_optimizer = NoiseFineGridSearch( maxiter=1000, tol=1e-5, @@ -58,6 +71,7 @@ def __init__( ngrid=ngrid, bounds=bounds, noise_optimizer=noise_optimizer, + dtype=dtype, **kwargs, ) @@ -131,33 +145,33 @@ def derivative( n_data, **kwargs, ): - nlp_deriv = np.array([]) - D_n = D + np.exp(2 * noise) - hp["noise"] = np.array([noise]) - KXX_inv = np.matmul(U / D_n, U.T) - coef = np.matmul(KXX_inv, Y_p) + nlp_deriv = empty(0, dtype=self.dtype) + D_n = D + exp(2.0 * noise) + hp["noise"] = asarray([noise]) + KXX_inv = matmul(U / D_n, U.T) + coef = matmul(KXX_inv, Y_p) a, b = self.get_hyperprior_parameters(model) - ycoef = 1.0 + np.sum(UTY / D_n) / (2.0 * b) + ycoef = 1.0 + (UTY / D_n).sum() / (2.0 * b) for para in parameters_set: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) K_deriv_cho = self.get_K_inv_deriv(K_deriv, KXX_inv) nlp_d = ( (-0.5 / ycoef) * ((a + 0.5 * n_data) / b) - * np.matmul(coef.T, np.matmul(K_deriv, coef)).reshape(-1) + * matmul(coef.T, matmul(K_deriv, coef)).reshape(-1) ) + 0.5 * K_deriv_cho - nlp_deriv = np.append(nlp_deriv, nlp_d) + nlp_deriv = append(nlp_deriv, nlp_d) nlp_deriv = nlp_deriv - self.logpriors(hp, pdis, jac=True) return nlp_deriv def get_eig_fun(self, noise, hp, pdis, UTY, D, n_data, a, b, **kwargs): "Calculate log-likelihood from Eigendecomposition for a noise value." - D_n = D + np.exp(2.0 * noise) - nlp = 0.5 * np.sum(np.log(D_n)) + 0.5 * (2.0 * a + n_data) * np.log( - 1.0 + np.sum(UTY / D_n) / (2.0 * b) + D_n = D + exp(2.0 * noise) + nlp = 0.5 * log(D_n).sum() + 0.5 * (2.0 * a + n_data) * log( + 1.0 + (UTY / D_n).sum() / (2.0 * b) ) if pdis is not None: - hp["noise"] = np.array([noise]).reshape(-1) + hp["noise"] = asarray([noise]).reshape(-1) return nlp - self.logpriors(hp, pdis, jac=False) def get_all_eig_fun( @@ -176,10 +190,10 @@ def get_all_eig_fun( Calculate log-likelihood from Eigendecompositions for all noise values from the list. """ - D_n = D + np.exp(2.0 * noises) - nlp = 0.5 * np.sum(np.log(D_n), axis=1) + 0.5 * ( - 2.0 * a + n_data - ) * np.log(1.0 + np.sum(UTY / D_n, axis=1) / (2.0 * b)) + D_n = D + exp(2.0 * noises) + nlp = 0.5 * log(D_n).sum(axis=1) + 0.5 * (2.0 * a + n_data) * log( + 1.0 + (UTY / D_n).sum(axis=1) / (2.0 * b) + ) if pdis is not None: hp["noise"] = noises return nlp - self.logpriors(hp, pdis, jac=False) @@ -243,8 +257,8 @@ def update_solution( than the input since they are optimized numerically. """ if fun < self.sol["fun"]: - hp["noise"] = np.array([noise]) - self.sol["x"] = np.concatenate( + hp["noise"] = asarray([noise]) + self.sol["x"] = concatenate( [hp[para] for para in sorted(hp.keys())] ) self.sol["hp"] = hp.copy() @@ -263,6 +277,7 @@ def get_arguments(self): ngrid=self.ngrid, bounds=self.bounds, noise_optimizer=self.noise_optimizer, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() diff --git a/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood_svd.py b/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood_svd.py index 8e986e22..fe5d269c 100644 --- a/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood_svd.py +++ b/catlearn/regression/gp/objectivefunctions/tp/factorized_likelihood_svd.py @@ -1,45 +1,16 @@ -import numpy as np -from .factorized_likelihood import FactorizedLogLikelihood +from numpy import matmul from numpy.linalg import svd +from .factorized_likelihood import FactorizedLogLikelihood class FactorizedLogLikelihoodSVD(FactorizedLogLikelihood): - def __init__( - self, - get_prior_mean=False, - ngrid=80, - bounds=None, - noise_optimizer=None, - **kwargs - ): - """ - The factorized log-likelihood objective function that is used - to optimize the hyperparameters. - A SVD is performed to get the eigenvalues. - The relative-noise hyperparameter can be searched from - a single eigendecomposition for each length-scale hyperparameter. - - Parameters: - get_prior_mean: bool - Whether to save the parameters of the prior mean - in the solution. - ngrid: int - Number of grid points that are searched in - the relative-noise hyperparameter. - bounds: Boundary_conditions class - A class of the boundary conditions of - the relative-noise hyperparameter. - noise_optimizer : Noise line search optimizer class - A line search optimization method for - the relative-noise hyperparameter. - """ - super().__init__( - get_prior_mean=get_prior_mean, - ngrid=ngrid, - bounds=bounds, - noise_optimizer=noise_optimizer, - **kwargs - ) + """ + The factorized log-likelihood objective function that is used + to optimize the hyperparameters. + A SVD is performed to get the eigenvalues. + The relative-noise hyperparameter can be searched from + a single eigendecomposition for each length-scale hyperparameter. + """ def get_eig(self, model, X, Y): "Calculate the eigenvalues" @@ -49,5 +20,5 @@ def get_eig(self, model, X, Y): U, D, Vt = svd(KXX, hermitian=True) # Subtract the prior mean to the training target Y_p = self.y_prior(X, Y, model, D=D, U=U) - UTY = np.matmul(Vt, Y_p).reshape(-1) ** 2 + UTY = matmul(Vt, Y_p).reshape(-1) ** 2 return D, U, Y_p, UTY, KXX, n_data diff --git a/catlearn/regression/gp/objectivefunctions/tp/likelihood.py b/catlearn/regression/gp/objectivefunctions/tp/likelihood.py index c6a7d067..d91aca6d 100644 --- a/catlearn/regression/gp/objectivefunctions/tp/likelihood.py +++ b/catlearn/regression/gp/objectivefunctions/tp/likelihood.py @@ -1,23 +1,18 @@ -import numpy as np -from scipy.linalg import cho_solve +from numpy import ( + append, + empty, + diagonal, + log, + matmul, +) from ..objectivefunction import ObjectiveFuction class LogLikelihood(ObjectiveFuction): - def __init__(self, get_prior_mean=False, **kwargs): - """ - The log-likelihood objective function that is used to - optimize the hyperparameters. - - Parameters: - get_prior_mean: bool - Whether to save the parameters of the prior mean - in the solution. - """ - super().__init__(get_prior_mean=get_prior_mean, **kwargs) - # Set descriptor of the objective function - self.use_analytic_prefactor = False - self.use_optimized_noise = False + """ + The log-likelihood objective function that is used to + optimize the hyperparameters. + """ def function( self, @@ -34,10 +29,8 @@ def function( model = self.update_model(model, hp) a, b = self.get_hyperprior_parameters(model) coef, L, low, Y_p, KXX, n_data = self.coef_cholesky(model, X, Y) - ycoef = 1.0 + (np.matmul(Y_p.T, coef).item(0) / (2.0 * b)) - nlp = np.sum(np.log(np.diagonal(L))) + 0.5 * ( - 2.0 * a + n_data - ) * np.log(ycoef) + ycoef = 1.0 + (matmul(Y_p.T, coef).item(0) / (2.0 * b)) + nlp = log(diagonal(L)).sum() + 0.5 * (2.0 * a + n_data) * log(ycoef) nlp = nlp - self.logpriors(hp, pdis, jac=False) if jac: return nlp, self.derivative( @@ -77,17 +70,17 @@ def derivative( pdis, **kwargs, ): - nlp_deriv = np.array([]) - KXX_inv = cho_solve((L, low), np.identity(n_data), check_finite=False) + nlp_deriv = empty(0, dtype=self.dtype) + KXX_inv = self.get_cinv(L=L, low=low, n_data=n_data) for para in parameters_set: K_deriv = self.get_K_deriv(model, para, X=X, KXX=KXX) K_deriv_cho = self.get_K_inv_deriv(K_deriv, KXX_inv) nlp_d = ( (-0.5 / ycoef) * ((a + 0.5 * n_data) / b) - * np.matmul(coef.T, np.matmul(K_deriv, coef)).reshape(-1) + * matmul(coef.T, matmul(K_deriv, coef)).reshape(-1) ) + 0.5 * K_deriv_cho - nlp_deriv = np.append(nlp_deriv, nlp_d) + nlp_deriv = append(nlp_deriv, nlp_d) nlp_deriv = nlp_deriv - self.logpriors(hp, pdis, jac=True) return nlp_deriv diff --git a/catlearn/regression/gp/optimizers/__init__.py b/catlearn/regression/gp/optimizers/__init__.py index 351792f3..b6f4f287 100644 --- a/catlearn/regression/gp/optimizers/__init__.py +++ b/catlearn/regression/gp/optimizers/__init__.py @@ -4,6 +4,7 @@ RandomSamplingOptimizer, GridOptimizer, IterativeLineOptimizer, + ScipyGlobalOptimizer, BasinOptimizer, AnneallingOptimizer, AnneallingTransOptimizer, @@ -35,6 +36,7 @@ "RandomSamplingOptimizer", "GridOptimizer", "IterativeLineOptimizer", + "ScipyGlobalOptimizer", "BasinOptimizer", "AnneallingOptimizer", "AnneallingTransOptimizer", diff --git a/catlearn/regression/gp/optimizers/globaloptimizer.py b/catlearn/regression/gp/optimizers/globaloptimizer.py index 90f9e16e..53c40b8a 100644 --- a/catlearn/regression/gp/optimizers/globaloptimizer.py +++ b/catlearn/regression/gp/optimizers/globaloptimizer.py @@ -1,43 +1,73 @@ +from numpy import ( + append, + array, + asarray, + concatenate, + nanargmin, + ndarray, + sort, + sum as sum_, + tile, + unique, + where, +) +from scipy import __version__ as scipy_version +from scipy.optimize import basinhopping, dual_annealing +from ase.parallel import world from .optimizer import Optimizer -import numpy as np +from .linesearcher import GoldenSearch +from .localoptimizer import ScipyOptimizer +from ..hpboundary import EducatedBoundaries, VariableTransformation class GlobalOptimizer(Optimizer): + """ + The global optimizer used for optimzing the objective function + wrt. the hyperparameters. + The global optimizer requires a local optimization method and + boundary conditions of the hyperparameters. + """ + def __init__( self, local_optimizer=None, - bounds=None, + bounds=VariableTransformation(), maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, **kwargs, ): """ - The global optimizer used for optimzing the objective function - wrt. the hyperparameters. - The global optimizer requires a local optimization method and - boundary conditions of the hyperparameters. + Initialize the global optimizer. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # This global optimizer can not be parallelized - self.parallel = False - # The gradients of the function are unused by the global optimizer - self.jac = False - # Set default bounds - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) # Set default local optimizer if local_optimizer is None: - from .localoptimizer import ScipyOptimizer - local_optimizer = ScipyOptimizer( maxiter=maxiter, bounds=bounds, @@ -48,17 +78,66 @@ def __init__( local_optimizer=local_optimizer, bounds=bounds, maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, **kwargs, ) - def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - raise NotImplementedError() + def set_dtype(self, dtype, **kwargs): + # Set the dtype for the global optimizer + super().set_dtype(dtype=dtype, **kwargs) + # Set the data type of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_dtype"): + self.bounds.set_dtype(dtype=dtype, **kwargs) + # Set the dtype for the local optimizer + if self.local_optimizer is not None and hasattr( + self.local_optimizer, "set_dtype" + ): + self.local_optimizer.set_dtype(dtype=dtype) + return self + + def set_seed(self, seed=None, **kwargs): + # Set the seed for the global optimizer + super().set_seed(seed=seed, **kwargs) + # Set the random seed of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_seed"): + self.bounds.set_seed(seed=seed, **kwargs) + # Set the seed for the local optimizer + if self.local_optimizer is not None and hasattr( + self.local_optimizer, + "set_seed", + ): + self.local_optimizer.set_seed(seed=seed, **kwargs) + return self + + def set_maxiter(self, maxiter, **kwargs): + super().set_maxiter(maxiter, **kwargs) + # Set the maxiter for the local optimizer + if self.local_optimizer is not None: + self.local_optimizer.update_arguments(maxiter=maxiter) + return self + + def set_jac(self, jac=True, **kwargs): + # The gradients of the function are unused by the global optimizer + self.jac = False + return self + + def set_parallel(self, parallel=False, **kwargs): + # This global optimizer can not be parallelized + self.parallel = False + return self def update_arguments( self, local_optimizer=None, bounds=None, maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -66,25 +145,52 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + # Set the local optimizer if local_optimizer is not None: self.local_optimizer = local_optimizer.copy() + elif not hasattr(self, "local_optimizer"): + self.local_optimizer = None + # Set the bounds if bounds is not None: self.bounds = bounds.copy() # Use the same boundary conditions in the local optimizer self.local_optimizer.update_arguments(bounds=self.bounds) - if maxiter is not None: - self.maxiter = int(maxiter) + elif not hasattr(self, "bounds"): + self.bounds = None + # Set the arguments for the parent class + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) return self def run_local_opt( @@ -118,11 +224,11 @@ def make_lines(self, parameters, ngrid, **kwargs): **kwargs, ) - def make_bounds(self, parameters, array=True, **kwargs): + def make_bounds(self, parameters, use_array=True, **kwargs): "Make the boundary conditions of the hyperparameters." return self.bounds.get_bounds( parameters=parameters, - array=array, + use_array=use_array, **kwargs, ) @@ -148,6 +254,10 @@ def get_arguments(self): local_optimizer=self.local_optimizer, bounds=self.bounds, maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -157,48 +267,57 @@ def get_arguments(self): class RandomSamplingOptimizer(GlobalOptimizer): + """ + The random sampling optimizer used for optimzing the objective function + wrt. the hyperparameters. + The random sampling optimizer samples the hyperparameters randomly + from the boundary conditions + and optimize all samples with the local optimizer. + """ + def __init__( self, local_optimizer=None, - bounds=None, + bounds=VariableTransformation(), maxiter=5000, - npoints=40, + jac=False, parallel=False, + npoints=40, + seed=None, + dtype=float, **kwargs, ): """ - The random sampling optimizer used for optimzing the objective function - wrt. the hyperparameters. - The random sampling optimizer samples the hyperparameters randomly - from the boundary conditions - and optimize all samples with the local optimizer. + Initialize the global optimizer. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - npoints : int - The number of hyperparameter points samled from - the boundary conditions. - parallel : bool + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool Whether to calculate the grid points in parallel over multiple CPUs. + npoints: int + The number of hyperparameter points samled from + the boundary conditions. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # The gradients of the function are unused by the global optimizer - self.jac = False - # Set default bounds - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) # Set default local optimizer if local_optimizer is None: - from .localoptimizer import ScipyOptimizer - local_optimizer = ScipyOptimizer( maxiter=int(maxiter / npoints), bounds=bounds, @@ -209,20 +328,23 @@ def __init__( local_optimizer=local_optimizer, bounds=bounds, maxiter=maxiter, - npoints=npoints, + jac=jac, parallel=parallel, + npoints=npoints, + seed=seed, + dtype=dtype, **kwargs, ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): # Draw random hyperparameter samples - thetas = np.array([theta]) + thetas = array([theta], dtype=self.dtype) if self.npoints > 1: thetas = self.sample_thetas( parameters, npoints=int(self.npoints - 1), ) - thetas = np.append(thetas, thetas, axis=0) + thetas = append(thetas, thetas, axis=0) # Make empty solution and lists sol = self.get_empty_solution() # Perform the local optimization for random samples @@ -239,13 +361,20 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): ) return sol + def set_parallel(self, parallel=False, **kwargs): + self.parallel = parallel + return self + def update_arguments( self, local_optimizer=None, bounds=None, maxiter=None, - npoints=None, + jac=None, parallel=None, + npoints=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -253,37 +382,47 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - npoints : int - The number of hyperparameter points samled from - the boundary conditions. - parallel : bool + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool Whether to calculate the grid points in parallel over multiple CPUs. + npoints: int + The number of hyperparameter points samled from + the boundary conditions. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if local_optimizer is not None: - self.local_optimizer = local_optimizer.copy() - if bounds is not None: - self.bounds = bounds.copy() - # Use the same boundary conditions in the local optimizer - self.local_optimizer.update_arguments(bounds=self.bounds) - if maxiter is not None: - self.maxiter = int(maxiter) - if parallel is not None: - self.parallel = parallel + # Set the arguments for the parent class + super().update_arguments( + local_optimizer=local_optimizer, + bounds=bounds, + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) + # Set the number of points if npoints is not None: if self.parallel: - from ase.parallel import world - self.npoints = self.get_optimal_npoints(npoints, world.size) else: self.npoints = int(npoints) @@ -358,8 +497,6 @@ def optimize_samples_parallel( **kwargs, ): "Perform the local optimization of the random samples in parallel." - from ase.parallel import world - rank, size = world.rank, world.size for t, theta in enumerate(thetas): if rank == t % size: @@ -399,7 +536,11 @@ def get_arguments(self): local_optimizer=self.local_optimizer, bounds=self.bounds, maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, npoints=self.npoints, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -409,68 +550,78 @@ def get_arguments(self): class GridOptimizer(GlobalOptimizer): + """ + The grid optimizer used for optimzing the objective function + wrt. the hyperparameters. + The grid optimizer makes a grid in the hyperparameter space from + the boundary conditions and evaluate them. + The grid point with the lowest function value can be optimized + with the local optimizer. + """ + def __init__( self, local_optimizer=None, - bounds=None, + bounds=VariableTransformation(), maxiter=5000, + jac=False, + parallel=False, n_each_dim=None, optimize=True, - parallel=False, + seed=None, + dtype=float, **kwargs, ): """ - The grid optimizer used for optimzing the objective function - wrt. the hyperparameters. - The grid optimizer makes a grid in the hyperparameter space from - the boundary conditions and evaluate them. - The grid point with the lowest function value can be optimized - with the local optimizer. + Initialize the global optimizer. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - n_each_dim : int or (H) list + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + n_each_dim: int or (H) list An integer or a list with number of grid points in each dimension of the hyperparameters. - optimize : bool + optimize: bool Whether to perform a local optimization on the best found solution. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # The gradients of the function are unused by the global optimizer - self.jac = False - # Set default bounds - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) # Set default local optimizer if local_optimizer is None: - from .localoptimizer import ScipyOptimizer - local_optimizer = ScipyOptimizer( maxiter=maxiter, bounds=bounds, use_bounds=False, ) - # Set n_each_dim as default - self.n_each_dim = None # Set all the arguments self.update_arguments( local_optimizer=local_optimizer, bounds=bounds, maxiter=maxiter, + jac=jac, + parallel=parallel, n_each_dim=n_each_dim, optimize=optimize, - parallel=parallel, + seed=seed, + dtype=dtype, **kwargs, ) @@ -479,7 +630,7 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): n_each_dim = self.get_n_each_dim(len(theta)) # Make grid either with the same or different numbers in each dimension lines = self.make_lines(parameters, ngrid=n_each_dim) - thetas = np.append( + thetas = append( [theta], self.make_grid(lines, maxiter=int(self.maxiter - 1)), axis=0, @@ -516,14 +667,21 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): ) return sol + def set_parallel(self, parallel=False, **kwargs): + self.parallel = parallel + return self + def update_arguments( self, local_optimizer=None, bounds=None, maxiter=None, + jac=None, + parallel=None, n_each_dim=None, optimize=None, - parallel=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -531,43 +689,56 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - n_each_dim : int or (H) list + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + n_each_dim: int or (H) list An integer or a list with number of grid points in each dimension of the hyperparameters. - optimize : bool + optimize: bool Whether to perform a local optimization on the best found solution. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if local_optimizer is not None: - self.local_optimizer = local_optimizer.copy() - if bounds is not None: - self.bounds = bounds.copy() - # Use the same boundary conditions in the local optimizer - self.local_optimizer.update_arguments(bounds=self.bounds) - if maxiter is not None: - self.maxiter = int(maxiter) - if parallel is not None: - self.parallel = parallel if n_each_dim is not None: - if isinstance(n_each_dim, (list, np.ndarray)): + if isinstance(n_each_dim, (list, ndarray)): self.n_each_dim = n_each_dim.copy() else: self.n_each_dim = n_each_dim + elif not hasattr(self, "n_each_dim"): + self.n_each_dim = None if optimize is not None: self.optimize = optimize + # Set the arguments for the parent class + super().update_arguments( + local_optimizer=local_optimizer, + bounds=bounds, + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) return self def make_grid(self, lines, maxiter=5000): @@ -575,7 +746,7 @@ def make_grid(self, lines, maxiter=5000): Make a grid in multi-dimensions from a list of 1D grids in each dimension. """ - lines = np.array(lines) + lines = array(lines, dtype=self.dtype) if len(lines.shape) < 2: lines = lines.reshape(1, -1) # Number of combinations @@ -591,26 +762,30 @@ def make_grid(self, lines, maxiter=5000): lines = lines[1:] for line in lines: dim_X = len(X) - X = np.concatenate([X] * len(line), axis=0) - X = np.concatenate( + X = tile(X, (len(line), 1)) + X = concatenate( [ X, - np.sort( - np.concatenate([line.reshape(-1)] * dim_X, axis=0) + sort( + concatenate([line.reshape(-1)] * dim_X, axis=0) ).reshape(-1, 1), ], axis=1, ) - return np.random.permutation(X)[:maxiter] + return self.rng.permutation(X)[:maxiter] # Randomly sample the grid points - X = np.array( - [np.random.choice(line, size=maxiter) for line in lines] + X = asarray( + [self.rng.choice(line, size=maxiter) for line in lines], + dtype=self.dtype, ).T - X = np.unique(X, axis=0) + X = unique(X, axis=0) while len(X) < maxiter: - x = np.array([np.random.choice(line, size=1) for line in lines]).T - X = np.append(X, x, axis=0) - X = np.unique(X, axis=0) + x = asarray( + [self.rng.choice(line, size=1) for line in lines], + dtype=self.dtype, + ).T + X = append(X, x, axis=0) + X = unique(X, axis=0) return X[:maxiter] def optimize_minimum( @@ -668,8 +843,6 @@ def get_n_each_dim(self, dim, **kwargs): def check_npoints(self, thetas, **kwargs): "Check if the number of points is well parallized if it is used." if self.parallel: - from ase.parallel import world - npoints = self.get_optimal_npoints(len(thetas), world.size) return thetas[:npoints] return thetas @@ -677,7 +850,7 @@ def check_npoints(self, thetas, **kwargs): def get_minimum(self, sol, thetas, f_list, **kwargs): "Find the minimum function value and update the solution." # Find the minimum function value - i_min = np.nanargmin(f_list) + i_min = nanargmin(f_list) # Get the number of used iterations thetas_len = len(thetas) # Update the number of used iterations @@ -699,9 +872,12 @@ def get_arguments(self): local_optimizer=self.local_optimizer, bounds=self.bounds, maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, n_each_dim=self.n_each_dim, optimize=self.optimize, - parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -711,62 +887,83 @@ def get_arguments(self): class IterativeLineOptimizer(GridOptimizer): + """ + The iteratively line optimizer used for optimzing + the objective function wrt. the hyperparameters. + The iteratively line optimizer makes a 1D grid in each dimension + of the hyperparameter space from the boundary conditions. + The grid points are then evaluated and the best value + updates the hyperparameter in the specific dimension. + This process is done iteratively over all dimensions and in loops. + The grid point with the lowest function value can be optimized + with the local optimizer. + """ + def __init__( self, local_optimizer=None, - bounds=None, + bounds=VariableTransformation(), maxiter=5000, + jac=False, + parallel=False, n_each_dim=None, loops=3, calculate_init=False, optimize=True, - parallel=False, + seed=None, + dtype=float, **kwargs, ): """ - The iteratively line optimizer used for optimzing - the objective function wrt. the hyperparameters. - The iteratively line optimizer makes a 1D grid in each dimension - of the hyperparameter space from the boundary conditions. - The grid points are then evaluated and the best value - updates the hyperparameter in the specific dimension. - This process is done iteratively over all dimensions and in loops. - The grid point with the lowest function value can be optimized - with the local optimizer. + Initialize the global optimizer. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - n_each_dim : int or (H) list + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + n_each_dim: int or (H) list An integer or a list with number of grid points in each dimension of the hyperparameters. - loops : int + loops: int The number of times all the hyperparameter dimensions have been searched. - calculate_init : bool + calculate_init: bool Whether to calculate the initial given hyperparameters. If it is parallelized, all CPUs will calculate this point. - optimize : bool + optimize: bool Whether to perform a local optimization on the best found solution. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ super().__init__( local_optimizer=local_optimizer, bounds=bounds, maxiter=maxiter, + jac=jac, + parallel=parallel, n_each_dim=n_each_dim, loops=loops, calculate_init=calculate_init, optimize=optimize, - parallel=parallel, + seed=seed, + dtype=dtype, **kwargs, ) @@ -806,11 +1003,14 @@ def update_arguments( local_optimizer=None, bounds=None, maxiter=None, + jac=None, + parallel=None, n_each_dim=None, loops=None, calculate_init=None, optimize=None, - parallel=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -818,56 +1018,67 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - local_optimizer : Local optimizer class + local_optimizer: Local optimizer class A local optimization method. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - n_each_dim : int or (H) list + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + n_each_dim: int or (H) list An integer or a list with number of grid points in each dimension of the hyperparameters. - loops : int + loops: int The number of times all the hyperparameter dimensions have been searched. - calculate_init : bool + calculate_init: bool Whether to calculate the initial given hyperparameters. If it is parallelized, all CPUs will calculate this point. - optimize : bool + optimize: bool Whether to perform a local optimization on the best found solution. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if local_optimizer is not None: - self.local_optimizer = local_optimizer.copy() - if bounds is not None: - self.bounds = bounds.copy() - # Use the same boundary conditions in the local optimizer - self.local_optimizer.update_arguments(bounds=self.bounds) - if maxiter is not None: - self.maxiter = int(maxiter) - if parallel is not None: - self.parallel = parallel + # Set the arguments for the parent class + super().update_arguments( + local_optimizer=local_optimizer, + bounds=bounds, + maxiter=maxiter, + jac=jac, + parallel=parallel, + n_each_dim=None, + optimize=optimize, + seed=seed, + dtype=dtype, + ) if loops is not None: self.loops = int(loops) if calculate_init is not None: self.calculate_init = calculate_init if n_each_dim is not None: - if isinstance(n_each_dim, (list, np.ndarray)): - if np.sum(n_each_dim) * self.loops > self.maxiter: + if isinstance(n_each_dim, (list, ndarray)): + if sum_(n_each_dim) * self.loops > self.maxiter: self.n_each_dim = self.get_n_each_dim(len(n_each_dim)) else: self.n_each_dim = n_each_dim.copy() else: self.n_each_dim = n_each_dim - if optimize is not None: - self.optimize = optimize return self def iterative_line(self, theta, lines, func, func_args=(), **kwargs): @@ -884,13 +1095,13 @@ def iterative_line(self, theta, lines, func, func_args=(), **kwargs): # Perform loops for i in range(self.loops): # Permute the dimensions - dim_perm = np.random.permutation(dims) + dim_perm = self.rng.permutation(dims) # Make sure the same dimension is not used after each other if dim_perm[0] == d: dim_perm = dim_perm[1:] for d in dim_perm: # Make the hyperparameter changes to the specific dimension - thetas = np.tile(theta, (len(lines[d]), 1)) + thetas = tile(theta, (len(lines[d]), 1)) thetas[:, d] = lines[d].copy() f_list = self.calculate_values( thetas, @@ -914,9 +1125,7 @@ def get_n_each_dim(self, dim): def get_n_each_dim_parallel(self, n_each_dim): "Number of points per dimension if it is parallelized." - from ase.parallel import world - - if isinstance(n_each_dim, (list, np.ndarray)): + if isinstance(n_each_dim, (list, ndarray)): for d, n_dim in enumerate(n_each_dim): n_each_dim[d] = self.get_optimal_npoints(n_dim, world.size) else: @@ -930,11 +1139,14 @@ def get_arguments(self): local_optimizer=self.local_optimizer, bounds=self.bounds, maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, n_each_dim=self.n_each_dim, loops=self.loops, calculate_init=self.calculate_init, optimize=self.optimize, - parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -943,89 +1155,120 @@ def get_arguments(self): return arg_kwargs, constant_kwargs, object_kwargs -class BasinOptimizer(GlobalOptimizer): +class FactorizedOptimizer(GlobalOptimizer): + """ + The factorized optimizer used for optimzing + the objective function wrt. the hyperparameters. + The factorized optimizer makes a 1D grid for each + hyperparameter from the boundary conditions. + The hyperparameters are then optimized with a line search optimizer. + The line search optimizer optimizes only one of the hyperparameters and + it therefore relies on a factorization method as + the objective function. + """ + def __init__( self, + line_optimizer=None, + bounds=VariableTransformation(), maxiter=5000, - jac=True, - opt_kwargs={}, - local_kwargs={}, + jac=False, + parallel=False, + ngrid=80, + calculate_init=False, + seed=None, + dtype=float, **kwargs, ): """ - The basin-hopping optimizer used for optimzing the objective function - wrt. the hyperparameters. - The basin-hopping optimizer is a wrapper to SciPy's basinhopping. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.basinhopping.html) - No local optimizer and boundary conditions are given to this optimizer. - The local optimizer is set by keywords in the local_kwargs and - it uses SciPy's minimizer. + Initialize the global optimizer. Parameters: - maxiter : int + line_optimizer: Line search optimizer class + A line search optimization method. + bounds: HPBoundaries class + A class of the boundary conditions of the hyperparameters. + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict - A dictionary with the arguments and keywords given - to SciPy's basinhopping. - local_kwargs : dict - A dictionary with the arguments and keywords given - to SciPy's local minimizer. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + ngrid: int + The number of grid points of the hyperparameter + that is optimized. + calculate_init: bool + Whether to calculate the initial given hyperparameters. + If it is parallelized, all CPUs will calculate this point. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # This global optimizer can not be parallelized - self.parallel = False - # Set default arguments for SciPy's basinhopping - self.opt_kwargs = dict( - niter=5, - interval=10, - T=1.0, - stepsize=0.1, - niter_success=None, - ) - # Set default arguments for SciPy's local minimizer - self.local_kwargs = dict( - options={"maxiter": int(maxiter / self.opt_kwargs["niter"])} - ) + # The gradients of the function are unused by the global optimizer + self.jac = False + # Set default line optimizer + if line_optimizer is None: + line_optimizer = GoldenSearch( + maxiter=int(maxiter), + parallel=parallel, + ) # Set all the arguments self.update_arguments( + line_optimizer=line_optimizer, + bounds=bounds, maxiter=maxiter, jac=jac, - opt_kwargs=opt_kwargs, - local_kwargs=local_kwargs, + parallel=parallel, + ngrid=ngrid, + calculate_init=calculate_init, + seed=seed, + dtype=dtype, **kwargs, ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - from scipy.optimize import basinhopping - - # Get the function arguments - func_args = self.get_func_arguments( + # Make an initial solution or use an empty solution + if self.calculate_init: + func_args = self.get_func_arguments( + parameters, + model, + X, + Y, + pdis, + jac=False, + **kwargs, + ) + sol = self.get_initial_solution(theta, func, func_args=func_args) + else: + sol = self.get_empty_solution() + # Make the lines of the hyperparameters + lines = asarray(self.make_lines(parameters, ngrid=self.ngrid)).T + # Optimize the hyperparameters with the line search + sol_s = self.run_line_opt( + func, + lines, parameters, model, X, Y, pdis, - self.jac, **kwargs, ) - # Get the function that evaluate the objective function - fun = self.get_fun(func) - # Set the minimizer kwargs - minimizer_kwargs = dict( - args=func_args, - jac=self.jac, - **self.local_kwargs, - ) - # Do the basin-hopping - sol = basinhopping( - fun, - x0=theta, - minimizer_kwargs=minimizer_kwargs, - **self.opt_kwargs, - ) + # Update the solution if it is better + sol = self.compare_solutions(sol, sol_s) + # Change the solution message + if sol["success"]: + sol["message"] = "Local optimization is converged." + else: + sol["message"] = "Local optimization is not converged." return self.get_final_solution( sol, func, @@ -1036,12 +1279,214 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): pdis, ) + def set_parallel(self, parallel=False, **kwargs): + self.parallel = parallel + return self + + def update_arguments( + self, + line_optimizer=None, + bounds=None, + maxiter=None, + jac=None, + parallel=None, + ngrid=None, + calculate_init=None, + seed=None, + dtype=None, + **kwargs, + ): + """ + Update the optimizer with its arguments. + The existing arguments are used if they are not given. + + Parameters: + line_optimizer: Line search optimizer class + A line search optimization method. + bounds: HPBoundaries class + A class of the boundary conditions of the hyperparameters. + maxiter: int + The maximum number of evaluations or iterations + the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + This is not implemented for this method. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + ngrid: int + The number of grid points of the hyperparameter + that is optimized. + calculate_init: bool + Whether to calculate the initial given hyperparameters. + If it is parallelized, all CPUs will calculate this point. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated object itself. + """ + # Set the arguments for the parent class + super().update_arguments( + local_optimizer=line_optimizer, + bounds=bounds, + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) + # Set the arguments + if ngrid is not None: + if self.parallel: + self.ngrid = self.get_optimal_npoints(ngrid, world.size) + else: + self.ngrid = int(ngrid) + if calculate_init is not None: + self.calculate_init = calculate_init + return self + + def run_line_opt( + self, + func, + lines, + parameters, + model, + X, + Y, + pdis, + **kwargs, + ): + "Run the line search optimization." + return self.local_optimizer.run( + func, + lines, + parameters, + model, + X, + Y, + pdis, + **kwargs, + ) + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + line_optimizer=self.local_optimizer, + bounds=self.bounds, + maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + ngrid=self.ngrid, + calculate_init=self.calculate_init, + seed=self.seed, + dtype=self.dtype, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class ScipyGlobalOptimizer(Optimizer): + """ + The global optimizer used for optimzing the objective function + wrt. the hyperparameters. + The global optimizer requires a local optimization method and + boundary conditions of the hyperparameters. + This global optimizer is a wrapper to SciPy's global optimizers. + """ + + def __init__( + self, + maxiter=5000, + jac=True, + parallel=False, + opt_kwargs={}, + local_kwargs={}, + seed=None, + dtype=float, + **kwargs, + ): + """ + Initialize the global optimizer. + + Parameters: + maxiter: int + The maximum number of evaluations or iterations + the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's optimizer. + local_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ + # Set default arguments for SciPy's global optimizer + self.opt_kwargs = dict() + # Set default arguments for SciPy's local minimizer + self.local_kwargs = dict(options={}) + # Set all the arguments + self.update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + opt_kwargs=opt_kwargs, + local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, + **kwargs, + ) + + def set_seed(self, seed=None, **kwargs): + super().set_seed(seed=seed, **kwargs) + # Set the random number generator for the global optimizer + if scipy_version >= "1.15": + self.opt_kwargs["rng"] = self.rng + else: + self.opt_kwargs["seed"] = self.seed + return self + + def set_jac(self, jac=True, **kwargs): + self.jac = jac + return self + + def set_parallel(self, parallel=False, **kwargs): + # This global optimizer can not be parallelized + self.parallel = False + return self + def update_arguments( self, maxiter=None, jac=None, + parallel=None, opt_kwargs=None, local_kwargs=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -1049,26 +1494,41 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict A dictionary with the arguments and keywords given - to SciPy's basinhopping. - local_kwargs : dict + to SciPy's optimizer. + local_kwargs: dict A dictionary with the arguments and keywords given to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) - if jac is not None: - self.jac = jac + # Set the arguments for the parent class + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) if opt_kwargs is not None: self.opt_kwargs.update(opt_kwargs) if local_kwargs is not None: @@ -1082,6 +1542,190 @@ def update_arguments( self.local_kwargs["options"].update(local_kwargs["options"]) else: self.local_kwargs.update(local_kwargs) + return self + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + opt_kwargs=self.opt_kwargs, + local_kwargs=self.local_kwargs, + seed=self.seed, + dtype=self.dtype, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + +class BasinOptimizer(ScipyGlobalOptimizer): + """ + The basin-hopping optimizer used for optimzing the objective function + wrt. the hyperparameters. + The basin-hopping optimizer is a wrapper to SciPy's basinhopping. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.basinhopping.html) + No local optimizer and boundary conditions are given to this optimizer. + The local optimizer is set by keywords in the local_kwargs and + it uses SciPy's minimizer. + """ + + def __init__( + self, + maxiter=5000, + jac=True, + parallel=False, + opt_kwargs={}, + local_kwargs={}, + seed=None, + dtype=float, + **kwargs, + ): + """ + Initialize the global optimizer. + + Parameters: + maxiter: int + The maximum number of evaluations or iterations + the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's basinhopping. + local_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + """ + # Set default arguments for SciPy's basinhopping + self.opt_kwargs = dict( + niter=5, + interval=10, + T=1.0, + stepsize=0.1, + niter_success=None, + ) + # Set default arguments for SciPy's local minimizer + self.local_kwargs = dict( + options={"maxiter": int(maxiter / self.opt_kwargs["niter"])} + ) + # Set all the arguments + self.update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + opt_kwargs=opt_kwargs, + local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, + **kwargs, + ) + + def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): + # Get the function arguments + func_args = self.get_func_arguments( + parameters, + model, + X, + Y, + pdis, + self.jac, + **kwargs, + ) + # Get the function that evaluate the objective function + fun = self.get_fun(func) + # Set the minimizer kwargs + minimizer_kwargs = dict( + args=func_args, + jac=self.jac, + **self.local_kwargs, + ) + # Do the basin-hopping + sol = basinhopping( + fun, + x0=theta, + minimizer_kwargs=minimizer_kwargs, + **self.opt_kwargs, + ) + return self.get_final_solution( + sol, + func, + parameters, + model, + X, + Y, + pdis, + ) + + def update_arguments( + self, + maxiter=None, + jac=None, + parallel=None, + opt_kwargs=None, + local_kwargs=None, + seed=None, + dtype=None, + **kwargs, + ): + """ + Update the optimizer with its arguments. + The existing arguments are used if they are not given. + + Parameters: + maxiter: int + The maximum number of evaluations or iterations + the global optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's basinhopping. + local_kwargs: dict + A dictionary with the arguments and keywords given + to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + + Returns: + self: The updated object itself. + """ + # Set the arguments for the parent class + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + opt_kwargs=opt_kwargs, + local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, + ) # Make sure not to many iterations are used in average maxiter_niter = int(self.maxiter / self.opt_kwargs["niter"]) if maxiter_niter < self.local_kwargs["options"]["maxiter"]: @@ -1094,8 +1738,11 @@ def get_arguments(self): arg_kwargs = dict( maxiter=self.maxiter, jac=self.jac, + parallel=self.parallel, opt_kwargs=self.opt_kwargs, local_kwargs=self.local_kwargs, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -1104,56 +1751,66 @@ def get_arguments(self): return arg_kwargs, constant_kwargs, object_kwargs -class AnneallingOptimizer(GlobalOptimizer): +class AnneallingOptimizer(ScipyGlobalOptimizer): + """ + The simulated annealing optimizer used for optimzing + the objective function wrt. the hyperparameters. + The simulated annealing optimizer is a wrapper to + SciPy's dual_annealing. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.dual_annealing.html) + No local optimizer is given to this optimizer. + The local optimizer is set by keywords in the local_kwargs and + it uses SciPy's minimizer. + """ + def __init__( self, - bounds=None, + bounds=EducatedBoundaries(use_log=True), maxiter=5000, jac=True, + parallel=False, opt_kwargs={}, local_kwargs={}, + seed=None, + dtype=float, **kwargs, ): """ - The simulated annealing optimizer used for optimzing - the objective function wrt. the hyperparameters. - The simulated annealing optimizer is a wrapper to - SciPy's dual_annealing. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.dual_annealing.html) - No local optimizer is given to this optimizer. - The local optimizer is set by keywords in the local_kwargs and - it uses SciPy's minimizer. + Initialize the global optimizer. Parameters: - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict A dictionary with the arguments and keywords given to SciPy's dual_annealing. - local_kwargs : dict + local_kwargs: dict A dictionary with the arguments and keywords given to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # Set default bounds - if bounds is None: - from ..hpboundary.educated import EducatedBoundaries - - bounds = EducatedBoundaries(log=True) - # This global optimizer can not be parallelized - self.parallel = False # Set default arguments for SciPy's dual_annealing self.opt_kwargs = dict( initial_temp=5230.0, restart_temp_ratio=2e-05, visit=2.62, accept=-5.0, - seed=None, no_local_search=False, ) # Set default arguments for SciPy's local minimizer @@ -1163,14 +1820,15 @@ def __init__( bounds=bounds, maxiter=maxiter, jac=jac, + parallel=parallel, opt_kwargs=opt_kwargs, local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, **kwargs, ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - from scipy.optimize import dual_annealing - # Get the function arguments func_args = self.get_func_arguments( parameters, @@ -1186,7 +1844,7 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): # Set the minimizer kwargs minimizer_kwargs = dict(jac=False, **self.local_kwargs) # Make boundary conditions - bounds = self.make_bounds(parameters, array=True) + bounds = self.make_bounds(parameters, use_array=True) # Do the dual simulated annealing sol = dual_annealing( fun, @@ -1208,13 +1866,32 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): pdis, ) + def set_seed(self, seed=None, **kwargs): + # Set the seed for the global optimizer + super().set_seed(seed=seed, **kwargs) + # Set the random seed of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_seed"): + self.bounds.set_seed(seed=seed, **kwargs) + return self + + def set_dtype(self, dtype, **kwargs): + # Set the dtype for the global optimizer + super().set_dtype(dtype=dtype, **kwargs) + # Set the data type of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_dtype"): + self.bounds.set_dtype(dtype=dtype, **kwargs) + return self + def update_arguments( self, bounds=None, maxiter=None, + parallel=None, jac=None, opt_kwargs=None, local_kwargs=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -1222,45 +1899,57 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict A dictionary with the arguments and keywords given to SciPy's dual_annealing. - local_kwargs : dict + local_kwargs: dict A dictionary with the arguments and keywords given to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ if bounds is not None: self.bounds = bounds.copy() - if maxiter is not None: - self.maxiter = int(maxiter) - if jac is not None: - self.jac = jac - if opt_kwargs is not None: - self.opt_kwargs.update(opt_kwargs) - if local_kwargs is not None: - if "options" in local_kwargs: - local_no_options = { - key: value - for key, value in local_kwargs.items() - if key != "options" - } - self.local_kwargs.update(local_no_options) - self.local_kwargs["options"].update(local_kwargs["options"]) - else: - self.local_kwargs.update(local_kwargs) + # Set the arguments for the parent class + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + opt_kwargs=opt_kwargs, + local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, + ) return self + def make_bounds(self, parameters, use_array=True, **kwargs): + "Make the boundary conditions of the hyperparameters." + return self.bounds.get_bounds( + parameters=parameters, + use_array=use_array, + **kwargs, + ) + def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization @@ -1268,8 +1957,11 @@ def get_arguments(self): bounds=self.bounds, maxiter=self.maxiter, jac=self.jac, + parallel=self.parallel, opt_kwargs=self.opt_kwargs, local_kwargs=self.local_kwargs, + seed=self.seed, + dtype=self.dtype, ) # Get the constants made within the class constant_kwargs = dict() @@ -1279,57 +1971,67 @@ def get_arguments(self): class AnneallingTransOptimizer(AnneallingOptimizer): + """ + The simulated annealing optimizer used for optimzing + the objective functionwrt. the hyperparameters. + The simulated annealing optimizer is a wrapper to + SciPy's dual_annealing. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.dual_annealing.html) + No local optimizer is given to this optimizer. + The local optimizer is set by keywords in the local_kwargs and + it uses SciPy's minimizer. + This simulated annealing optimizer uses variable transformation of + the hyperparameters to search the space. + """ + def __init__( self, - bounds=None, + bounds=VariableTransformation(), maxiter=5000, jac=True, + parallel=False, opt_kwargs={}, local_kwargs={}, + seed=None, + dtype=float, **kwargs, ): """ - The simulated annealing optimizer used for optimzing - the objective functionwrt. the hyperparameters. - The simulated annealing optimizer is a wrapper to - SciPy's dual_annealing. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.dual_annealing.html) - No local optimizer is given to this optimizer. - The local optimizer is set by keywords in the local_kwargs and - it uses SciPy's minimizer. - This simulated annealing optimizer uses variable transformation of - the hyperparameters to search the space. + Initialize the global optimizer. Parameters: - bounds : VariableTransformation class + bounds: VariableTransformation class A class of the variable transformation of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict A dictionary with the arguments and keywords given to SciPy's dual_annealing. - local_kwargs : dict + local_kwargs: dict A dictionary with the arguments and keywords given to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # Set default bounds - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) - # This global optimizer can not be parallelized - self.parallel = False # Set default arguments for SciPy's dual_annealing self.opt_kwargs = dict( initial_temp=5230.0, restart_temp_ratio=2e-05, visit=2.62, accept=-5.0, - seed=None, no_local_search=False, ) # Set default arguments for SciPy's local minimizer @@ -1339,14 +2041,15 @@ def __init__( bounds=bounds, maxiter=maxiter, jac=jac, + parallel=parallel, opt_kwargs=opt_kwargs, local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, **kwargs, ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - from scipy.optimize import dual_annealing - # Get the function arguments for the wrappers func_args_w = self.get_wrapper_arguments( func, @@ -1361,7 +2064,7 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): # Set the minimizer kwargs minimizer_kwargs = dict(jac=False, **self.local_kwargs) # Make boundary conditions - bounds = self.make_bounds(parameters, array=True, transformed=True) + bounds = self.make_bounds(parameters, use_array=True, transformed=True) # Do the dual simulated annealing sol = dual_annealing( self.func_vartrans, @@ -1383,8 +2086,11 @@ def update_arguments( bounds=None, maxiter=None, jac=None, + parallel=None, opt_kwargs=None, local_kwargs=None, + seed=None, + dtype=None, **kwargs, ): """ @@ -1392,49 +2098,50 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - bounds : VariableTransformation class + bounds: VariableTransformation class A class of the variable transformation of the hyperparameters. - maxiter : int + maxiter: int The maximum number of evaluations or iterations the global optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - opt_kwargs : dict + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This is not implemented for this method. + opt_kwargs: dict A dictionary with the arguments and keywords given to SciPy's dual_annealing. - local_kwargs : dict + local_kwargs: dict A dictionary with the arguments and keywords given to SciPy's local minimizer. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ + super().update_arguments( + bounds=bounds, + maxiter=maxiter, + jac=jac, + parallel=False, + opt_kwargs=opt_kwargs, + local_kwargs=local_kwargs, + seed=seed, + dtype=dtype, + ) if bounds is not None: - from ..hpboundary.hptrans import VariableTransformation - if not isinstance(bounds, VariableTransformation): - raise Exception( + raise ValueError( "A variable transformation as bounds has to be used!" ) - self.bounds = bounds.copy() - if maxiter is not None: - self.maxiter = int(maxiter) - if jac is not None: - self.jac = jac - if opt_kwargs is not None: - self.opt_kwargs.update(opt_kwargs) - if local_kwargs is not None: - if "options" in local_kwargs: - local_no_options = { - key: value - for key, value in local_kwargs.items() - if key != "options" - } - self.local_kwargs.update(local_no_options) - self.local_kwargs["options"].update(local_kwargs["options"]) - else: - self.local_kwargs.update(local_kwargs) return self def func_vartrans(self, ti, fun, parameters, func_args=(), **kwargs): @@ -1450,13 +2157,13 @@ def reverse_trasformation(self, ti, parameters, **kwargs): Transform the variable transformed hyperparameters back to hyperparameter log-space. """ - ti = np.where( + ti = where( ti < 1.0, - np.where(ti > 0.0, ti, self.bounds.eps), + where(ti > 0.0, ti, self.bounds.eps), 1.00 - self.bounds.eps, ) t = self.make_hp(ti, parameters) - theta = self.bounds.reverse_trasformation(t, array=True) + theta = self.bounds.reverse_trasformation(t, use_array=True) return theta def transform_solution(self, sol, **kwargs): @@ -1464,8 +2171,11 @@ def transform_solution(self, sol, **kwargs): Retransform the variable transformed hyperparameters in the solution back to hyperparameter log-space. """ - sol["x"] = self.bounds.reverse_trasformation(sol["hp"], array=True) - sol["hp"] = self.bounds.reverse_trasformation(sol["hp"], array=False) + sol["x"] = self.bounds.reverse_trasformation(sol["hp"], use_array=True) + sol["hp"] = self.bounds.reverse_trasformation( + sol["hp"], + use_array=False, + ) return sol def get_wrapper_arguments( @@ -1495,208 +2205,3 @@ def get_wrapper_arguments( # Get the function arguments for the wrappers func_args_w = (fun, parameters, func_args) return func_args_w - - -class FactorizedOptimizer(GlobalOptimizer): - def __init__( - self, - line_optimizer=None, - bounds=None, - maxiter=5000, - ngrid=80, - calculate_init=False, - parallel=False, - **kwargs, - ): - """ - The factorized optimizer used for optimzing - the objective function wrt. the hyperparameters. - The factorized optimizer makes a 1D grid for each - hyperparameter from the boundary conditions. - The hyperparameters are then optimized with a line search optimizer. - The line search optimizer optimizes only one of the hyperparameters and - it therefore relies on a factorization method as - the objective function. - - Parameters: - line_optimizer : Line search optimizer class - A line search optimization method. - bounds : HPBoundaries class - A class of the boundary conditions of the hyperparameters. - maxiter : int - The maximum number of evaluations or iterations - the global optimizer can use. - ngrid : int - The number of grid points of the hyperparameter - that is optimized. - calculate_init : bool - Whether to calculate the initial given hyperparameters. - If it is parallelized, all CPUs will calculate this point. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - """ - # The gradients of the function are unused by the global optimizer - self.jac = False - # Set default bounds - if bounds is None: - from ..hpboundary.hptrans import VariableTransformation - - bounds = VariableTransformation(bounds=None) - # Set default line optimizer - if line_optimizer is None: - from .linesearcher import GoldenSearch - - line_optimizer = GoldenSearch( - maxiter=int(maxiter), - parallel=parallel, - ) - # Set all the arguments - self.update_arguments( - line_optimizer=line_optimizer, - bounds=bounds, - maxiter=maxiter, - ngrid=ngrid, - calculate_init=calculate_init, - parallel=parallel, - **kwargs, - ) - - def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - # Make an initial solution or use an empty solution - if self.calculate_init: - func_args = self.get_func_arguments( - parameters, - model, - X, - Y, - pdis, - jac=False, - **kwargs, - ) - sol = self.get_initial_solution(theta, func, func_args=func_args) - else: - sol = self.get_empty_solution() - # Make the lines of the hyperparameters - lines = np.array(self.make_lines(parameters, ngrid=self.ngrid)).T - # Optimize the hyperparameters with the line search - sol_s = self.run_line_opt( - func, - lines, - parameters, - model, - X, - Y, - pdis, - **kwargs, - ) - # Update the solution if it is better - sol = self.compare_solutions(sol, sol_s) - # Change the solution message - if sol["success"]: - sol["message"] = "Local optimization is converged." - else: - sol["message"] = "Local optimization is not converged." - return self.get_final_solution( - sol, - func, - parameters, - model, - X, - Y, - pdis, - ) - - def update_arguments( - self, - line_optimizer=None, - bounds=None, - maxiter=None, - ngrid=None, - calculate_init=None, - parallel=None, - **kwargs, - ): - """ - Update the optimizer with its arguments. - The existing arguments are used if they are not given. - - Parameters: - line_optimizer : Line search optimizer class - A line search optimization method. - bounds : HPBoundaries class - A class of the boundary conditions of the hyperparameters. - maxiter : int - The maximum number of evaluations or iterations - the global optimizer can use. - ngrid : int - The number of grid points of the hyperparameter - that is optimized. - calculate_init : bool - Whether to calculate the initial given hyperparameters. - If it is parallelized, all CPUs will calculate this point. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - - Returns: - self: The updated object itself. - """ - if line_optimizer is not None: - self.line_optimizer = line_optimizer.copy() - if bounds is not None: - self.bounds = bounds.copy() - if maxiter is not None: - self.maxiter = int(maxiter) - if parallel is not None: - self.parallel = parallel - if ngrid is not None: - if self.parallel: - from ase.parallel import world - - self.ngrid = self.get_optimal_npoints(ngrid, world.size) - else: - self.ngrid = int(ngrid) - if calculate_init is not None: - self.calculate_init = calculate_init - return self - - def run_line_opt( - self, - func, - lines, - parameters, - model, - X, - Y, - pdis, - **kwargs, - ): - "Run the line search optimization." - return self.line_optimizer.run( - func, - lines, - parameters, - model, - X, - Y, - pdis, - **kwargs, - ) - - def get_arguments(self): - "Get the arguments of the class itself." - # Get the arguments given to the class in the initialization - arg_kwargs = dict( - line_optimizer=self.line_optimizer, - bounds=self.bounds, - maxiter=self.maxiter, - ngrid=self.ngrid, - calculate_init=self.calculate_init, - parallel=self.parallel, - ) - # Get the constants made within the class - constant_kwargs = dict() - # Get the objects made within the class - object_kwargs = dict() - return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/regression/gp/optimizers/linesearcher.py b/catlearn/regression/gp/optimizers/linesearcher.py index c9362400..94e8f710 100644 --- a/catlearn/regression/gp/optimizers/linesearcher.py +++ b/catlearn/regression/gp/optimizers/linesearcher.py @@ -1,56 +1,91 @@ from .localoptimizer import LocalOptimizer -import numpy as np +from numpy import ( + append, + argsort, + asarray, + concatenate, + empty, + exp, + floor, + full, + linspace, + nanargmin, + nanmax, + nanmin, + sqrt, + where, +) +from numpy.linalg import norm +from scipy.integrate import cumulative_trapezoid +from ase.parallel import world class LineSearchOptimizer(LocalOptimizer): + """ + The line search optimizer is used for optimzing + the objective function wrt. a single hyperparameter. + The LineSearchOptimizer does only work together with a GlobalOptimizer + that uses line searches (e.g. FactorizedOptimizer). + A line of the hyperparameter is required to run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=True, theta_index=None, - parallel=False, xtol=None, ftol=None, **kwargs, ): """ - The line search optimizer is used for optimzing - the objective function wrt. a single hyperparameter. - The LineSearchOptimizer does only work together with a GlobalOptimizer - that uses line searches (e.g. FactorizedOptimizer). - A line of the hyperparameter is required to run the line search. + Initialize the line search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # Line search optimizers cannot use gradients of the objective function - self.jac = False # Set the default theta_index self.theta_index = None # Set xtol and ftol to the tolerance if they are not given. @@ -58,11 +93,14 @@ def __init__( # Set all the arguments self.update_arguments( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, theta_index=theta_index, - parallel=parallel, xtol=xtol, ftol=ftol, **kwargs, @@ -75,41 +113,53 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): The grid/line of the hyperparameter has to be given. Parameters: - func : ObjectiveFunction class object + func: ObjectiveFunction class object The objective function class that is used to calculate the value. - line : (ngrid,H) array + line: (ngrid,H) array An array with the grid points of the hyperparameters. Only one of the hyperparameters is used, which is given by theta_index. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. - model : Model class object + model: Model class object The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. Returns: - dict : A solution dictionary with objective function value, + dict: A solution dictionary with objective function value, optimized hyperparameters, success statement, and number of used evaluations. """ raise NotImplementedError() + def set_jac(self, jac=False, **kwargs): + # Line search optimizers cannot use gradients of the objective function + self.jac = False + return self + + def set_parallel(self, parallel=False, **kwargs): + self.parallel = parallel + return self + def update_arguments( self, maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, tol=None, optimize=None, multiple_min=None, theta_index=None, - parallel=None, xtol=None, ftol=None, **kwargs, @@ -119,47 +169,61 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) - if tol is not None: - self.tol = tol + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + tol=tol, + ) if optimize is not None: self.optimize = optimize if multiple_min is not None: self.multiple_min = multiple_min if theta_index is not None: self.theta_index = int(theta_index) - if parallel is not None: - self.parallel = parallel if xtol is not None: self.xtol = xtol if ftol is not None: @@ -176,7 +240,7 @@ def find_minimas( **kwargs, ): """ - Find all the local minimums and their indicies or just + Find all the local minimums and their indices or just the global minimum and then check convergence. """ # Investigate multiple minimums @@ -200,12 +264,12 @@ def find_multiple_min( **kwargs, ): """ - Find all the local minimums and their indicies and + Find all the local minimums and their indices and then check convergence. """ # Find local minimas for middel part of line i_minimas = ( - np.where( + where( (fvalues[1:-1] < fvalues[:-2]) & (fvalues[2:] > fvalues[1:-1]) )[0] + 1 @@ -220,9 +284,9 @@ def find_multiple_min( i_minimas = i_minimas[i_keep] # Find local minimas for end parts of line if fvalues[0] - fvalues[1] < -self.ftol: - i_minimas = np.append([1], i_minimas) + i_minimas = append([1], i_minimas) if fvalues[-1] - fvalues[-2] < -self.ftol: - i_minimas = np.append(i_minimas, [len_l - 2]) + i_minimas = append(i_minimas, [len_l - 2]) # Check the distances in the local minimas are within the tolerance if len(i_minimas): i_keep = abs( @@ -230,9 +294,9 @@ def find_multiple_min( - xvalues[i_minimas - 1, theta_index] ) >= self.xtol * (1.0 + abs(xvalues[i_minimas, theta_index])) i_minimas = i_minimas[i_keep] - # Sort the indicies after function value sizes + # Sort the indices after function value sizes if len(i_minimas) > 1: - i_sort = np.argsort(fvalues[i_minimas]) + i_sort = argsort(fvalues[i_minimas]) i_minimas = i_minimas[i_sort] return i_minimas @@ -272,7 +336,7 @@ def find_single_min( - xvalues[i_minima - 1, theta_index] ) >= self.xtol * (1.0 + abs(xvalues[i_minima, theta_index])): i_minimas = [] - return np.array(i_minimas) + return asarray(i_minimas) def get_theta_index(self, parameters=[], **kwargs): "Get the theta_index." @@ -295,11 +359,14 @@ def get_arguments(self): # Get the arguments given to the class in the initialization arg_kwargs = dict( maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, tol=self.tol, optimize=self.optimize, multiple_min=self.multiple_min, theta_index=self.theta_index, - parallel=self.parallel, xtol=self.xtol, ftol=self.ftol, ) @@ -311,64 +378,14 @@ def get_arguments(self): class GoldenSearch(LineSearchOptimizer): - def __init__( - self, - maxiter=5000, - tol=1e-5, - optimize=True, - multiple_min=True, - theta_index=None, - parallel=False, - xtol=None, - ftol=None, - **kwargs, - ): - """ - The golden section search method is used as the line search optimizer. - The line search optimizer is used for - optimzing the objective function wrt. a single the hyperparameter. - The GoldenSearch does only work together with a GlobalOptimizer - that uses line searches (e.g. FactorizedOptimizer). - A line of the hyperparameter is required to run the line search. - - Parameters: - maxiter : int - The maximum number of evaluations or iterations - the optimizer can use. - tol : float - A tolerance criterion for convergence. - optimize : bool - Whether to optimize the line given by split it - into smaller intervals. - multiple_min : bool - Whether to optimize multiple minimums or just - optimize the lowest minimum. - theta_index : int or None - The index of the hyperparameter that is - optimized with the line search. - If theta_index=None, then it will use the index of - the length-scale. - If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float - A tolerance criterion of the hyperparameter for convergence. - ftol : float - A tolerance criterion of the objective function - for convergence. - """ - super().__init__( - maxiter=maxiter, - tol=tol, - optimize=optimize, - multiple_min=multiple_min, - theta_index=theta_index, - parallel=parallel, - xtol=xtol, - ftol=ftol, - **kwargs, - ) + """ + The golden section search method is used as the line search optimizer. + The line search optimizer is used for optimzing the objective function + wrt. a single the hyperparameter. + The GoldenSearch does only work together with a GlobalOptimizer + that uses line searches (e.g. FactorizedOptimizer). + A line of the hyperparameter is required to run the line search. + """ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): # Get the function arguments @@ -388,7 +405,7 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): line = line.reshape(len_l, -1) f_list = self.calculate_values(line, func, func_args=func_args) # Find the optimal value - i_min = np.nanargmin(f_list) + i_min = nanargmin(f_list) sol = { "fun": f_list[i_min], "x": line[i_min], @@ -397,8 +414,8 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): "nit": len_l, } # Check whether the object function is flat - if (np.nanmax(f_list) - f_list[i_min]) < self.ftol: - i = int(np.floor(0.3 * (len(line) - 1))) + if (nanmax(f_list) - f_list[i_min]) < self.ftol: + i = int(floor(0.3 * (len(line) - 1))) return { "fun": f_list[i], "x": line[i], @@ -442,7 +459,7 @@ def prepare_run_golden( # Get the function that evaluate the objective function fun = self.get_fun(func) for i_min in i_minimas: - # Find the indicies of the interval + # Find the indices of the interval x1 = i_min - 1 x4 = i_min + 1 # Get the function values of the endpoints of the interval @@ -480,14 +497,17 @@ def golden_search( maxiter=200, func_args=(), fbracket=None, - vec0=np.array([0.0]), - direc=np.array([1.0]), + vec0=[0.0], + direc=[1.0], direc_norm=None, **kwargs, ): "Perform a golden section search." + # Make arrays + vec0 = asarray(vec0, dtype=self.dtype) + direc = asarray(direc, dtype=self.dtype) # Golden ratio - r = (np.sqrt(5) - 1) / 2 + r = (sqrt(5) - 1) / 2 c = 1 - r # Number of function evaluations nfev = 0 @@ -502,10 +522,10 @@ def golden_search( f1, f4 = fbracket # Direction vector norm if direc_norm is None: - direc_norm = np.linalg.norm(direc) + direc_norm = norm(direc) # Check if the maximum number of iterations have been used if maxiter < 3: - i_min = np.nanargmin([f1, f4]) + i_min = nanargmin([f1, f4]) sol = { "fun": [f1, f4][i_min], "x": [vec1, vec4][i_min], @@ -516,7 +536,7 @@ def golden_search( return sol # Check if the coordinate convergence criteria is already met if abs(x4 - x1) * direc_norm <= self.xtol: - i_min = np.nanargmin([f1, f4]) + i_min = nanargmin([f1, f4]) sol = { "fun": [f1, f4][i_min], "x": [vec1, vec4][i_min], @@ -537,9 +557,9 @@ def golden_search( # Perform the line search success = False while nfev < maxiter: - i_min = np.nanargmin(f_list) + i_min = nanargmin(f_list) # Check for convergence - if np.nanmax(f_list) - f_list[i_min] <= self.ftol * ( + if nanmax(f_list) - f_list[i_min] <= self.ftol * ( 1.0 + abs(f_list[i_min]) ) or abs(x_list[3] - x_list[0]) * direc_norm <= self.xtol * ( 1.0 + direc_norm * abs(x_list[1]) @@ -563,7 +583,7 @@ def golden_search( f_list[2] = fun(vec0 + direc * x_list[2], *func_args) nfev += 1 # Get the solution - i_min = np.nanargmin(f_list) + i_min = nanargmin(f_list) sol = { "fun": f_list[i_min], "x": vec0 + direc * (x_list[i_min]), @@ -575,63 +595,80 @@ def golden_search( class FineGridSearch(LineSearchOptimizer): + """ + The fine grid search method is used as the line search optimizer. + The line search optimizer is used for optimzing the objective function + wrt. a single the hyperparameter. + Finer grids are made for all minimums of the objective function. + The FineGridSearch does only work together with a GlobalOptimizer + that uses line searches (e.g. FactorizedOptimizer). + A line of the hyperparameter is required to run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=True, ngrid=80, loops=3, theta_index=None, - parallel=False, xtol=None, ftol=None, **kwargs, ): """ - The fine grid search method is used as the line search optimizer. - The line search optimizer is used for optimzing the objective function - wrt. a single the hyperparameter. - Finer grids are made for all minimums of the objective function. - The FineGridSearch does only work together with a GlobalOptimizer - that uses line searches (e.g. FactorizedOptimizer). - A line of the hyperparameter is required to run the line search. + Initialize the line search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - ngrid : int + ngrid: int The number of grid points of the hyperparameter that is optimized. - loops : int + loops: int The number of loops where the grid points are made. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # Line search optimizers cannot use gradients of the objective function - self.jac = False # Set the default theta_index self.theta_index = None # Set xtol and ftol to the tolerance if they are not given. @@ -639,13 +676,16 @@ def __init__( # Set all the arguments self.update_arguments( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, ngrid=ngrid, loops=loops, theta_index=theta_index, - parallel=parallel, xtol=xtol, ftol=ftol, **kwargs, @@ -666,8 +706,8 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): theta_index = self.get_theta_index(parameters) # Make empty solution and lists sol = self.get_empty_solution() - lines = np.empty((0, len(line[0]))) - f_lists = np.empty((0)) + lines = empty((0, len(line[0])), dtype=self.dtype) + f_lists = empty((0), dtype=self.dtype) # Get the solution from loops of the fine grid method sol = self.run_grid_loops( func, @@ -682,16 +722,40 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): ) return sol + def set_ngrid(self, ngrid=None, **kwargs): + """ + Set the number of grid points of the hyperparameter + that is optimized. + + Parameters: + ngrid: int + The number of grid points of the hyperparameter + that is optimized. + + Returns: + self: The updated object itself. + """ + if self.parallel: + self.ngrid = int(int(ngrid / world.size) * world.size) + if self.ngrid == 0: + self.ngrid = world.size + else: + self.ngrid = int(ngrid) + return self + def update_arguments( self, maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, tol=None, optimize=None, multiple_min=None, ngrid=None, loops=None, theta_index=None, - parallel=None, xtol=None, ftol=None, **kwargs, @@ -701,67 +765,69 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - ngrid : int + ngrid: int The number of grid points of the hyperparameter that is optimized. - loops : int + loops: int The number of loops where the grid points are made. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) - if tol is not None: - self.tol = tol - if optimize is not None: - self.optimize = optimize - if multiple_min is not None: - self.multiple_min = multiple_min - if theta_index is not None: - self.theta_index = int(theta_index) - if parallel is not None: - self.parallel = parallel + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + tol=tol, + optimize=optimize, + multiple_min=multiple_min, + theta_index=theta_index, + xtol=xtol, + ftol=ftol, + ) if ngrid is not None: - if self.parallel: - from ase.parallel import world - - self.ngrid = int(int(ngrid / world.size) * world.size) - if self.ngrid == 0: - self.ngrid = world.size - else: - self.ngrid = int(ngrid) + self.set_ngrid(ngrid=ngrid) if loops is not None: self.loops = int(loops) - if xtol is not None: - self.xtol = xtol - if ftol is not None: - self.ftol = ftol return self def run_grid_loops( @@ -786,12 +852,12 @@ def run_grid_loops( line = line.reshape(len_l, -1) f_list = self.calculate_values(line, func, func_args=func_args) # Use previously calculated grid points - lines = np.append(lines, line, axis=0) - i_sort = np.argsort(lines[:, theta_index]) + lines = append(lines, line, axis=0) + i_sort = argsort(lines[:, theta_index]) lines = lines[i_sort] - f_lists = np.append(f_lists, f_list)[i_sort] + f_lists = append(f_lists, f_list)[i_sort] # Find the minimum value - i_min = np.nanargmin(f_lists) + i_min = nanargmin(f_lists) # Update the solution dictionary sol["nfev"] += len_l sol["nit"] += len_l @@ -845,7 +911,7 @@ def make_new_line( ): "Make a new line/grid for the minimums to optimize the hyperparameter." # Find the grid points that must be saved for later - i_d = np.array([[-1], [0], [1]], dtype=int) + i_d = asarray([[-1], [0], [1]], dtype=int) i_all = (i_minimas + i_d).T.reshape(-1) saved_lines = lines[i_all] saved_f_lists = f_lists[i_all] @@ -857,7 +923,7 @@ def make_new_line( i_minimas = i_minimas[: self.ngrid // 3] len_i = len(i_minimas) # Get the number of grid points for each minimum - di = np.full( + di = full( shape=len_i, fill_value=self.ngrid // len_i, dtype=int, @@ -866,16 +932,16 @@ def make_new_line( # if there are grid points to spare di[: int(self.ngrid % len_i)] += 1 # Make new line - newline = np.concatenate( + newline = concatenate( [ - np.linspace(lines[i - 1], lines[i + 1], di[j] + 2)[1:-1] + linspace(lines[i - 1], lines[i + 1], di[j] + 2)[1:-1] for j, i in enumerate(i_minimas) ] ) else: i_min = i_minimas[0] # Make new line - newline = np.linspace( + newline = linspace( lines[i_min - 1], lines[i_min + 1], self.ngrid + 2, @@ -887,13 +953,16 @@ def get_arguments(self): # Get the arguments given to the class in the initialization arg_kwargs = dict( maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, tol=self.tol, optimize=self.optimize, multiple_min=self.multiple_min, ngrid=self.ngrid, loops=self.loops, theta_index=self.theta_index, - parallel=self.parallel, xtol=self.xtol, ftol=self.ftol, ) @@ -905,9 +974,25 @@ def get_arguments(self): class TransGridSearch(FineGridSearch): + """ + The variable transformed grid search method is used + as the line search optimizer. + The line search optimizer is used for optimzing + the objective function wrt. a single the hyperparameter. + Grids are made by updating the variable transformation from + the objective function values. + The TransGridSearch does only work together with a GlobalOptimizer + that uses line searches (e.g. FactorizedOptimizer). + A line of the hyperparameter is required to run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=True, @@ -915,61 +1000,62 @@ def __init__( loops=3, use_likelihood=True, theta_index=None, - parallel=False, xtol=None, ftol=None, **kwargs, ): """ - The variable transformed grid search method is used - as the line search optimizer. - The line search optimizer is used for optimzing - the objective function wrt. a single the hyperparameter. - Grids are made by updating the variable transformation from - the objective function values. - The TransGridSearch does only work together with a GlobalOptimizer - that uses line searches (e.g. FactorizedOptimizer). - A line of the hyperparameter is required to run the line search. + Initialize the line search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - ngrid : int + ngrid: int The number of grid points of the hyperparameter that is optimized. - loops : int + loops: int The number of loops where the grid points are made. - use_likelihood : bool + use_likelihood: bool Whether to use the objective function as a log-likelihood or not. If the use_likelihood=False, the objective function is scaled and shifted with the maximum value. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # Line search optimizers cannot use gradients of the objective function - self.jac = False # Set the default theta_index self.theta_index = None # Set xtol and ftol to the tolerance if they are not given. @@ -977,6 +1063,10 @@ def __init__( # Set all the arguments self.update_arguments( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, @@ -984,7 +1074,6 @@ def __init__( loops=loops, use_likelihood=use_likelihood, theta_index=theta_index, - parallel=parallel, xtol=xtol, ftol=ftol, **kwargs, @@ -993,6 +1082,10 @@ def __init__( def update_arguments( self, maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, tol=None, optimize=None, multiple_min=None, @@ -1000,7 +1093,6 @@ def update_arguments( loops=None, use_likelihood=None, theta_index=None, - parallel=None, xtol=None, ftol=None, **kwargs, @@ -1010,74 +1102,74 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - ngrid : int + ngrid: int The number of grid points of the hyperparameter that is optimized. - loops : int + loops: int The number of loops where the grid points are made. - use_likelihood : bool + use_likelihood: bool Whether to use the objective function as a log-likelihood or not. If the use_likelihood=False, the objective function is scaled and shifted with the maximum value. - theta_index : int or None + theta_index: int or None The index of the hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the length-scale. If theta_index=None and no length-scale, then theta_index=0. - parallel : bool - Whether to calculate the grid points in parallel - over multiple CPUs. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) - if tol is not None: - self.tol = tol - if optimize is not None: - self.optimize = optimize - if multiple_min is not None: - self.multiple_min = multiple_min - if theta_index is not None: - self.theta_index = int(theta_index) - if parallel is not None: - self.parallel = parallel - if ngrid is not None: - if self.parallel: - from ase.parallel import world - - self.ngrid = int(int(ngrid / world.size) * world.size) - if self.ngrid == 0: - self.ngrid = world.size - else: - self.ngrid = ngrid - if loops is not None: - self.loops = int(loops) + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + tol=tol, + optimize=optimize, + multiple_min=multiple_min, + theta_index=theta_index, + xtol=xtol, + ftol=ftol, + ngrid=ngrid, + loops=loops, + ) if use_likelihood is not None: self.use_likelihood = use_likelihood - if xtol is not None: - self.xtol = xtol - if ftol is not None: - self.ftol = ftol return self def make_new_line( @@ -1093,23 +1185,22 @@ def make_new_line( Make new line/grid points from the variable transformation of the objective function. """ - from scipy.integrate import cumulative_trapezoid # Change the function to likelihood or to a scaled function from 0 to 1 if self.use_likelihood: - fs = np.exp(-(f_lists - np.nanmin(f_lists))) + fs = exp(-(f_lists - nanmin(f_lists))) else: - fs = -(f_lists - np.nanmax(f_lists)) - fs = fs / np.nanmax(fs) + fs = -(f_lists - nanmax(f_lists)) + fs = fs / nanmax(fs) # Calculate the cumulative distribution function values on the grid cdf = cumulative_trapezoid(fs, x=lines[:, theta_index], initial=0.0) cdf = cdf / cdf[-1] cdf_r = cdf.reshape(-1, 1) # Make new grid points on the inverse cumulative distribution function - dl = np.finfo(float).eps - newlines = np.linspace(0.0 + dl, 1.0 - dl, self.ngrid) + dl = self.eps + newlines = linspace(0.0 + dl, 1.0 - dl, self.ngrid) # Find the intervals where the new grid points are located - i_new = np.where((cdf_r[:-1] <= newlines) & (newlines < cdf_r[1:]))[0] + i_new = where((cdf_r[:-1] <= newlines) & (newlines < cdf_r[1:]))[0] i_new_a = i_new + 1 # Calculate the linear interpolation for the intervals of interest slope = (lines[i_new_a] - lines[i_new]) / ( @@ -1127,6 +1218,10 @@ def get_arguments(self): # Get the arguments given to the class in the initialization arg_kwargs = dict( maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, tol=self.tol, optimize=self.optimize, multiple_min=self.multiple_min, @@ -1134,7 +1229,6 @@ def get_arguments(self): loops=self.loops, use_likelihood=self.use_likelihood, theta_index=self.theta_index, - parallel=self.parallel, xtol=self.xtol, ftol=self.ftol, ) diff --git a/catlearn/regression/gp/optimizers/localoptimizer.py b/catlearn/regression/gp/optimizers/localoptimizer.py index 87692b23..dad369a2 100644 --- a/catlearn/regression/gp/optimizers/localoptimizer.py +++ b/catlearn/regression/gp/optimizers/localoptimizer.py @@ -1,52 +1,104 @@ from .optimizer import Optimizer +from scipy.optimize import minimize class LocalOptimizer(Optimizer): - def __init__(self, maxiter=5000, jac=True, tol=1e-3, **kwargs): + """ + The local optimizer used for optimzing the objective function + wrt. the hyperparameters. + """ + + def __init__( + self, + maxiter=5000, + jac=True, + parallel=False, + seed=None, + dtype=float, + tol=1e-3, + **kwargs, + ): """ - The local optimizer used for optimzing the objective function - wrt. the hyperparameters. + Initialize the local optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - tol : float + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. """ - # This optimizer can not be parallelized - self.parallel = False # Set all the arguments - self.update_arguments(maxiter=maxiter, jac=jac, tol=tol, **kwargs) + self.update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + tol=tol, + **kwargs, + ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): raise NotImplementedError() - def update_arguments(self, maxiter=None, jac=None, tol=None, **kwargs): + def update_arguments( + self, + maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, + tol=None, + **kwargs, + ): """ Update the optimizer with its arguments. The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - tol : float + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) - if jac is not None: - self.jac = jac + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) if tol is not None: self.tol = tol return self @@ -54,7 +106,14 @@ def update_arguments(self, maxiter=None, jac=None, tol=None, **kwargs): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(maxiter=self.maxiter, jac=self.jac, tol=self.tol) + arg_kwargs = dict( + maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, + tol=self.tol, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class @@ -63,10 +122,20 @@ def get_arguments(self): class ScipyOptimizer(LocalOptimizer): + """ + The local optimizer used for optimzing the objective function + wrt. the hyperparameters. + This method uses the SciPy minimizers. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) + """ + def __init__( self, maxiter=5000, jac=True, + parallel=False, + seed=None, + dtype=float, tol=1e-8, method="l-bfgs-b", bounds=None, @@ -76,35 +145,40 @@ def __init__( **kwargs, ): """ - The local optimizer used for optimzing the objective function - wrt. the hyperparameters. - This method uses the SciPy minimizers. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) + Initialize the local optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - tol : float + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - method : str + method: str The minimizer method used in SciPy. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. All global optimization methods are using boundary conditions. - use_bounds : bool + use_bounds: bool Whether to use the boundary conditions or not. Only some methods can use boundary conditions. - options : dict + options: dict Solver options used in the SciPy minimizer. - opt_kwargs : dict + opt_kwargs: dict Extra arguments used in the SciPy minimizer. """ - # This optimizer can not be parallelized - self.parallel = False # Set boundary conditions self.bounds = None # Set options @@ -115,6 +189,9 @@ def __init__( self.update_arguments( maxiter=maxiter, jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, method=method, bounds=bounds, @@ -125,8 +202,6 @@ def __init__( ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): - from scipy.optimize import minimize - # Get the objective function arguments func_args = self.get_func_arguments( parameters, @@ -138,7 +213,7 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): ) # Get bounds or set it to default argument if self.use_bounds: - bounds = self.make_bounds(parameters, array=True) + bounds = self.make_bounds(parameters, use_array=True) else: bounds = None # Minimize objective function with SciPy @@ -163,10 +238,38 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): pdis, ) + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype=dtype, **kwargs) + # Set the data type of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_dtype"): + self.bounds.set_dtype(dtype=dtype, **kwargs) + return self + + def set_seed(self, seed=None, **kwargs): + super().set_seed(seed=seed, **kwargs) + # Set the random seed of the bounds + if self.bounds is not None and hasattr(self.bounds, "set_seed"): + self.bounds.set_seed(seed=seed, **kwargs) + return self + + def set_maxiter(self, maxiter, **kwargs): + super().set_maxiter(maxiter, **kwargs) + # Set the maximum number of iterations in the options + if self.method in ["nelder-mead"]: + self.options["maxfev"] = self.maxiter + elif self.method in ["l-bfgs-b", "tnc"]: + self.options["maxfun"] = self.maxiter + else: + self.options["maxiter"] = self.maxiter + return self + def update_arguments( self, maxiter=None, jac=None, + parallel=None, + seed=None, + dtype=None, tol=None, method=None, bounds=None, @@ -180,49 +283,47 @@ def update_arguments( The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. - tol : float + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - method : str + method: str The minimizer method used in SciPy. - bounds : HPBoundaries class + bounds: HPBoundaries class A class of the boundary conditions of the hyperparameters. All global optimization methods are using boundary conditions. - use_bounds : bool + use_bounds: bool Whether to use the boundary conditions or not. Only some methods can use boundary conditions. - options : dict + options: dict Solver options used in the SciPy minimizer. - opt_kwargs : dict + opt_kwargs: dict Extra arguments used in the SciPy minimizer. Returns: self: The updated object itself. """ - if jac is not None: - self.jac = jac - if tol is not None: - self.tol = tol if method is not None: self.method = method.lower() # If method is updated then maxiter must be updated - if maxiter is None: + if maxiter is None and hasattr(self, "maxiter"): maxiter = self.maxiter if options is not None: self.options.update(options) - if maxiter is not None: - self.maxiter = int(maxiter) - if self.method in ["nelder-mead"]: - self.options["maxfev"] = self.maxiter - elif self.method in ["l-bfgs-b", "tnc"]: - self.options["maxfun"] = self.maxiter - else: - self.options["maxiter"] = self.maxiter if bounds is not None: self.bounds = bounds.copy() if use_bounds is not None: @@ -240,13 +341,22 @@ def update_arguments( self.use_bounds = False if opt_kwargs is not None: self.opt_kwargs.update(opt_kwargs) + # Set the arguments for the parent class + super().update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + tol=tol, + ) return self - def make_bounds(self, parameters, array=True, **kwargs): + def make_bounds(self, parameters, use_array=True, **kwargs): "Make the boundary conditions of the hyperparameters." return self.bounds.get_bounds( parameters=parameters, - array=array, + use_array=use_array, **kwargs, ) @@ -256,6 +366,9 @@ def get_arguments(self): arg_kwargs = dict( maxiter=self.maxiter, jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, tol=self.tol, method=self.method, bounds=self.bounds, @@ -271,61 +384,16 @@ def get_arguments(self): class ScipyPriorOptimizer(ScipyOptimizer): - def __init__( - self, - maxiter=5000, - jac=True, - tol=1e-8, - method="l-bfgs-b", - bounds=None, - use_bounds=False, - options={}, - opt_kwargs={}, - **kwargs, - ): - """ - The local optimizer used for optimzing the objective function - wrt.the hyperparameters. - This method uses the SciPy minimizers. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) - If prior distributions of the hyperparameters are used, - it will start by include - the prior distributions and then restart with - excluded prior distributions. - - Parameters: - maxiter : int - The maximum number of evaluations or iterations - the optimizer can use. - jac : bool - Whether to use the gradient of the objective function - wrt. the hyperparameters. - tol : float - A tolerance criterion for convergence. - method : str - The minimizer method used in SciPy. - bounds : HPBoundaries class - A class of the boundary conditions of the hyperparameters. - All global optimization methods are using boundary conditions. - use_bounds : bool - Whether to use the boundary conditions or not. - Only some methods can use boundary conditions. - options : dict - Solver options used in the SciPy minimizer. - opt_kwargs : dict - Extra arguments used in the SciPy minimizer. - """ - super().__init__( - maxiter=maxiter, - jac=jac, - tol=tol, - method=method, - bounds=bounds, - use_bounds=use_bounds, - options=options, - opt_kwargs=opt_kwargs, - **kwargs, - ) + """ + The local optimizer used for optimzing the objective function + wrt.the hyperparameters. + This method uses the SciPy minimizers. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) + If prior distributions of the hyperparameters are used, + it will start by include + the prior distributions and then restart with + excluded prior distributions. + """ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): # Get solution with the prior distributions @@ -354,59 +422,14 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): class ScipyGuessOptimizer(ScipyOptimizer): - def __init__( - self, - maxiter=5000, - jac=True, - tol=1e-8, - method="l-bfgs-b", - bounds=None, - use_bounds=False, - options={}, - opt_kwargs={}, - **kwargs, - ): - """ - The local optimizer used for optimzing the objective function - wrt. the hyperparameters. - This method uses the SciPy minimizers. - (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) - Use boundary conditions to give an extra guess of the hyperparameters - that also are optimized. - - Parameters: - maxiter : int - The maximum number of evaluations or iterations - the optimizer can use. - jac : bool - Whether to use the gradient of the objective function - wrt. the hyperparameters. - tol : float - A tolerance criterion for convergence. - method : str - The minimizer method used in SciPy. - bounds : HPBoundaries class - A class of the boundary conditions of the hyperparameters. - All global optimization methods are using boundary conditions. - use_bounds : bool - Whether to use the boundary conditions or not. - Only some methods can use boundary conditions. - options : dict - Solver options used in the SciPy minimizer. - opt_kwargs : dict - Extra arguments used in the SciPy minimizer. - """ - super().__init__( - maxiter=maxiter, - jac=jac, - tol=tol, - method=method, - bounds=bounds, - use_bounds=use_bounds, - options=options, - opt_kwargs=opt_kwargs, - **kwargs, - ) + """ + The local optimizer used for optimzing the objective function + wrt. the hyperparameters. + This method uses the SciPy minimizers. + (https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) + Use boundary conditions to give an extra guess of the hyperparameters + that also are optimized. + """ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): # Optimize the initial hyperparameters @@ -415,7 +438,7 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): if self.bounds is None: return sol # Use the boundaries to give an educated guess of the hyperparmeters - theta_guess = self.guess_hp(parameters, array=True) + theta_guess = self.guess_hp(parameters, use_array=True) sol_ed = super().run( func, theta_guess, @@ -431,6 +454,10 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): sol["nit"] = 2 return sol - def guess_hp(self, parameters, array=True, **kwargs): + def guess_hp(self, parameters, use_array=True, **kwargs): "Make a guess of the hyperparameters from the boundary conditions." - return self.bounds.get_hp(parameters=parameters, array=array, **kwargs) + return self.bounds.get_hp( + parameters=parameters, + use_array=use_array, + **kwargs, + ) diff --git a/catlearn/regression/gp/optimizers/noisesearcher.py b/catlearn/regression/gp/optimizers/noisesearcher.py index 88787d7d..a1369f88 100644 --- a/catlearn/regression/gp/optimizers/noisesearcher.py +++ b/catlearn/regression/gp/optimizers/noisesearcher.py @@ -1,34 +1,66 @@ -import numpy as np +from numpy import nanargmin from .linesearcher import ( - LineSearchOptimizer, - GoldenSearch, FineGridSearch, + GoldenSearch, + LineSearchOptimizer, + LocalOptimizer, TransGridSearch, ) class NoiseGrid(LineSearchOptimizer): - def __init__(self, maxiter=5000, **kwargs): + """ + The grid method is used as the line search optimizer. + The grid of relative-noise hyperparameter values is calculated + with the objective function. + The lowest of objective function values of the single grid + is used as the optimum. + A line of the relative-noise hyperparameter is required to + run the line search. + """ + + def __init__( + self, + maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, + **kwargs, + ): """ - The grid method is used as the line search optimizer. - The grid of relative-noise hyperparameter values is calculated - with the objective function. - The lowest of objective function values of the single grid - is used as the optimum. - A line of the relative-noise hyperparameter is required to - run the line search. + Initialize the relative-noise search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This optimizer can not be parallelized. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # This optimizer can not be parallelized - self.parallel = False - # Line search optimizers cannot use gradients of the objective function - self.jac = False # Set all the arguments - self.update_arguments(maxiter=maxiter, **kwargs) + self.update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + **kwargs, + ) def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): # Get the function arguments @@ -46,7 +78,7 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): line = line.reshape(len_l, -1) f_list = self.calculate_values(line, func, func_args=func_args) # Find the optimal value - i_min = np.nanargmin(f_list) + i_min = nanargmin(f_list) sol = { "fun": f_list[i_min], "x": line[i_min], @@ -56,21 +88,55 @@ def run(self, func, line, parameters, model, X, Y, pdis, **kwargs): } return sol - def update_arguments(self, maxiter=None, **kwargs): + def set_parallel(self, parallel=False, **kwargs): + # This optimizer can not be parallelized + self.parallel = False + return self + + def update_arguments( + self, + maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, + **kwargs, + ): """ Update the optimizer with its arguments. The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This optimizer can not be parallelized. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) + super(LocalOptimizer, self).update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + ) return self def get_func_arguments( @@ -98,7 +164,13 @@ def calculate_values(self, thetas, func, func_args=(), **kwargs): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(maxiter=self.maxiter) + arg_kwargs = dict( + maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class @@ -107,9 +179,21 @@ def get_arguments(self): class NoiseGoldenSearch(GoldenSearch): + """ + The golden section search method is used as the line search optimizer. + The line search optimizer is used for optimzing the objective function + wrt. the relative-noise hyperparameter. + A line of the relative-noise hyperparameter is required to + run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=False, @@ -119,47 +203,54 @@ def __init__( **kwargs, ): """ - The golden section search method is used as the line search optimizer. - The line search optimizer is used for optimzing the objective function - wrt. the relative-noise hyperparameter. - A line of the relative-noise hyperparameter is required to - run the line search. + Initialize the relative-noise search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This optimizer can not be parallelized. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - theta_index : int or None + theta_index: int or None The index of the relative-noise hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the relative-noise. If theta_index=None and no relative-noise, then theta_index=0. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # This optimizer can not be parallelized - self.parallel = False - # Line search optimizers cannot use gradients of the objective function - self.jac = False - # Set the default theta_index - self.theta_index = None - # Set xtol and ftol to the tolerance if they are not given. - xtol, ftol = self.set_tols(tol, xtol=xtol, ftol=ftol) - # Set all the arguments - self.update_arguments( + super().__init__( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, @@ -191,11 +282,34 @@ def calculate_values(self, thetas, func, func_args=(), **kwargs): "Calculate a list of values with a function." return func.get_all_eig_fun(thetas, *func_args) + def set_jac(self, jac=False, **kwargs): + # Line search optimizers cannot use gradients of the objective function + self.jac = False + return self + + def set_parallel(self, parallel=False, **kwargs): + # This optimizer can not be parallelized + self.parallel = False + return self + class NoiseFineGridSearch(FineGridSearch): + """ + The fine grid search method is used as the line search optimizer. + The line search optimizer is used for optimzing the objective function + wrt. the relative-noise hyperparameter. + Finer grids are made for all minimums of the objective function. + A line of the relative-noise hyperparameter is required to + run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=False, @@ -207,48 +321,54 @@ def __init__( **kwargs, ): """ - The fine grid search method is used as the line search optimizer. - The line search optimizer is used for optimzing the objective function - wrt. the relative-noise hyperparameter. - Finer grids are made for all minimums of the objective function. - A line of the relative-noise hyperparameter is required to - run the line search. + Initialize the relative-noise search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This optimizer can not be parallelized. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - theta_index : int or None + theta_index: int or None The index of the relative-noise hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the relative-noise. If theta_index=None and no relative-noise, then theta_index=0. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # This optimizer can not be parallelized - self.parallel = False - # Line search optimizers cannot use gradients of the objective function - self.jac = False - # Set the default theta_index - self.theta_index = None - # Set xtol and ftol to the tolerance if they are not given. - xtol, ftol = self.set_tols(tol, xtol=xtol, ftol=ftol) - # Set all the arguments - self.update_arguments( + super().__init__( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, @@ -282,11 +402,36 @@ def calculate_values(self, thetas, func, func_args=(), **kwargs): "Calculate a list of values with a function." return func.get_all_eig_fun(thetas, *func_args) + def set_jac(self, jac=False, **kwargs): + # Line search optimizers cannot use gradients of the objective function + self.jac = False + return self + + def set_parallel(self, parallel=False, **kwargs): + # This optimizer can not be parallelized + self.parallel = False + return self + class NoiseTransGridSearch(TransGridSearch): + """ + The variable transformed grid search method is used as + the line search optimizer. + The line search optimizer is used for optimzing the objective function + wrt. the relative-noise hyperparameter. + Grids are made by updating the variable transformation from + the objective function values. + A line of the relative-noise hyperparameter is required to + run the line search. + """ + def __init__( self, maxiter=5000, + jac=False, + parallel=False, + seed=None, + dtype=float, tol=1e-5, optimize=True, multiple_min=False, @@ -299,60 +444,64 @@ def __init__( **kwargs, ): """ - The variable transformed grid search method is used as - the line search optimizer. - The line search optimizer is used for optimzing the objective function - wrt. the relative-noise hyperparameter. - Grids are made by updating the variable transformation from - the objective function values. - A line of the relative-noise hyperparameter is required to - run the line search. + Initialize the relative-noise search optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - tol : float + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + The line search optimizers cannot use gradients + of the objective function. + parallel: bool + Whether to calculate the grid points in parallel + over multiple CPUs. + This optimizer can not be parallelized. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. + tol: float A tolerance criterion for convergence. - optimize : bool + optimize: bool Whether to optimize the line given by split it into smaller intervals. - multiple_min : bool + multiple_min: bool Whether to optimize multiple minimums or just optimize the lowest minimum. - ngrid : int + ngrid: int The number of grid points of the hyperparameter that is optimized. - loops : int + loops: int The number of loops where the grid points are made. - use_likelihood : bool + use_likelihood: bool Whether to use the objective function as a log-likelihood or not. If the use_likelihood=False, the objective function is scaled and shifted with the maximum value. - theta_index : int or None + theta_index: int or None The index of the relative-noise hyperparameter that is optimized with the line search. If theta_index=None, then it will use the index of the relative-noise. If theta_index=None and no relative-noise, then theta_index=0. - xtol : float + xtol: float A tolerance criterion of the hyperparameter for convergence. - ftol : float + ftol: float A tolerance criterion of the objective function for convergence. """ - # This optimizer can not be parallelized - self.parallel = False - # Line search optimizers cannot use gradients of the objective function - self.jac = False - # Set the default theta_index - self.theta_index = None - # Set xtol and ftol to the tolerance if they are not given. - xtol, ftol = self.set_tols(tol, xtol=xtol, ftol=ftol) - # Set all the arguments - self.update_arguments( + super().__init__( maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, tol=tol, optimize=optimize, multiple_min=multiple_min, @@ -386,3 +535,13 @@ def get_fun(self, func, **kwargs): def calculate_values(self, thetas, func, func_args=(), **kwargs): "Calculate a list of values with a function." return func.get_all_eig_fun(thetas, *func_args) + + def set_jac(self, jac=False, **kwargs): + # Line search optimizers cannot use gradients of the objective function + self.jac = False + return self + + def set_parallel(self, parallel=False, **kwargs): + # This optimizer can not be parallelized + self.parallel = False + return self diff --git a/catlearn/regression/gp/optimizers/optimizer.py b/catlearn/regression/gp/optimizers/optimizer.py index 30c3a60d..a55deb75 100644 --- a/catlearn/regression/gp/optimizers/optimizer.py +++ b/catlearn/regression/gp/optimizers/optimizer.py @@ -1,25 +1,54 @@ +from numpy import argmin, array, asarray, empty, finfo, inf +from numpy.random import default_rng, Generator, RandomState from scipy.optimize import OptimizeResult -import numpy as np +from ase.parallel import world, broadcast class Optimizer: - def __init__(self, maxiter=5000, jac=True, **kwargs): + """ + The optimizer used for optimzing the objective function wrt. + the hyperparameters. + """ + + def __init__( + self, + maxiter=5000, + jac=True, + parallel=False, + seed=None, + dtype=float, + **kwargs, + ): """ - The optimizer used for optimzing the objective function - wrt. the hyperparameters. + Initialize the optimizer. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # This optimizer can not be parallelized - self.parallel = False # Set all the arguments - self.update_arguments(maxiter=maxiter, jac=jac, **kwargs) + self.update_arguments( + maxiter=maxiter, + jac=jac, + parallel=parallel, + seed=seed, + dtype=dtype, + **kwargs, + ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): """ @@ -27,51 +56,164 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): wrt. the hyperparameters. Parameters: - func : ObjectiveFunction class object + func: ObjectiveFunction class object The objective function class that is used to calculate the value. - theta : (H) array + theta: (H) array An array with the hyperparameter values. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. - model : Model class object + model: Model class object The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. Returns: - dict : A solution dictionary with objective function value, + dict: A solution dictionary with objective function value, optimized hyperparameters, success statement, and number of used evaluations. """ raise NotImplementedError() - def update_arguments(self, maxiter=None, jac=None, **kwargs): + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + # Set a small number to avoid division by zero + self.eps = 1.1 * finfo(self.dtype).eps + return self + + def set_seed(self, seed=None, **kwargs): + """ + Set the random seed. + + Parameters: + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + + Returns: + self: The instance itself. + """ + if seed is not None: + self.seed = seed + if isinstance(seed, int): + self.rng = default_rng(self.seed) + elif isinstance(seed, Generator) or isinstance(seed, RandomState): + self.rng = seed + else: + self.seed = None + self.rng = default_rng() + return self + + def set_maxiter(self, maxiter, **kwargs): + """ + Set the maximum number of iterations. + + Parameters: + maxiter: int + The maximum number of evaluations or iterations + the optimizer can use. + + Returns: + self: The updated object itself. + """ + self.maxiter = int(maxiter) + return self + + def set_jac(self, jac=True, **kwargs): + """ + Set whether to use the gradient of the objective function + wrt. the hyperparameters. + + Parameters: + jac: bool + Whether to use the gradient of the objective function + wrt. the hyperparameters. + + Returns: + self: The updated object itself. + """ + self.jac = jac + return self + + def set_parallel(self, parallel=False, **kwargs): + """ + Set whether to use parallelization. + + Parameters: + parallel: bool + Whether to use parallelization. + + Returns: + self: The updated object itself. + """ + # This optimizer can not be parallelized + self.parallel = False + return self + + def update_arguments( + self, + maxiter=None, + jac=None, + parallel=None, + seed=None, + dtype=None, + **kwargs, + ): """ Update the optimizer with its arguments. The existing arguments are used if they are not given. Parameters: - maxiter : int + maxiter: int The maximum number of evaluations or iterations the optimizer can use. - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + seed: int (optional) + The random seed. + The seed can be an integer, RandomState, or Generator instance. + If not given, the default random number generator is used. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. Returns: self: The updated object itself. """ - if maxiter is not None: - self.maxiter = int(maxiter) if jac is not None: - self.jac = jac + self.set_jac(jac) + if parallel is not None or not hasattr(self, "parallel"): + self.set_parallel(parallel) + if maxiter is not None: + self.set_maxiter(maxiter) + # Set the seed + if seed is not None or not hasattr(self, "seed"): + self.set_seed(seed) + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype) return self def get_final_solution( @@ -113,20 +255,18 @@ def get_final_solution_parallel( **kwargs, ): "Get all final solutions from each function at each rank." - from ase.parallel import world, broadcast - size = world.size fun_sol = func.get_stored_solution() sol = func.get_solution(sol, parameters, model, X, Y, pdis) fun_sols = [broadcast(fun_sol["fun"], root=r) for r in range(size)] - rank_min = np.argmin(fun_sols) + rank_min = argmin(fun_sols) return broadcast(sol, root=rank_min) def get_empty_solution(self, **kwargs): "Get an empty solution without any function evaluations." sol = { - "fun": np.inf, - "x": np.array([]), + "fun": inf, + "x": empty(0, dtype=self.dtype), "success": False, "nfev": 0, "nit": 0, @@ -137,7 +277,7 @@ def get_empty_solution(self, **kwargs): def get_initial_solution(self, theta, func, func_args=(), **kwargs): "Get a solution with the evaluation of the initial hyperparameters." sol = { - "fun": np.inf, + "fun": inf, "x": theta, "success": False, "nfev": 1, @@ -172,22 +312,25 @@ def calculate_values(self, thetas, func, func_args=(), **kwargs): func_args=func_args, **kwargs, ) - return np.array([func.function(theta, *func_args) for theta in thetas]) + return asarray( + [func.function(theta, *func_args) for theta in thetas], + dtype=self.dtype, + ) def calculate_values_parallel(self, thetas, func, func_args=(), **kwargs): "Calculate a list of values with a function in parallel." - from ase.parallel import world, broadcast - rank, size = world.rank, world.size - f_list = np.array( + f_list = asarray( [ func.function(theta, *func_args) for t, theta in enumerate(thetas) if rank == t % size - ] + ], + dtype=self.dtype, ) - return np.array( - [broadcast(f_list, root=r) for r in range(size)] + return asarray( + [broadcast(f_list, root=r) for r in range(size)], + dtype=self.dtype, ).T.reshape(-1) def compare_solutions(self, sol1, sol2, **kwargs): @@ -210,7 +353,8 @@ def compare_solutions(self, sol1, sol2, **kwargs): def make_hp(self, theta, parameters, **kwargs): "Make hyperparameter dictionary from lists." - theta, parameters = np.array(theta), np.array(parameters) + theta = array(theta, dtype=self.dtype) + parameters = asarray(parameters) parameters_set = sorted(set(parameters)) hp = {para_s: theta[parameters == para_s] for para_s in parameters_set} return hp @@ -218,7 +362,13 @@ def make_hp(self, theta, parameters, **kwargs): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(maxiter=self.maxiter, jac=self.jac) + arg_kwargs = dict( + maxiter=self.maxiter, + jac=self.jac, + parallel=self.parallel, + seed=self.seed, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class @@ -250,46 +400,59 @@ def __repr__(self): class FunctionEvaluation(Optimizer): - def __init__(self, jac=True, **kwargs): + """ + A method used for evaluating the objective function for + the given hyperparameters. + """ + + def __init__(self, jac=True, parallel=False, dtype=float, **kwargs): """ - A method used for evaluating the objective function for - the given hyperparameters. + Initialize the function evaluation method. Parameters: - jac : bool + jac: bool Whether to use the gradient of the objective function wrt. the hyperparameters. + parallel: bool + Whether to use parallelization. + This is not implemented for this method. + dtype: type (optional) + The data type of the arrays. + If None, the default data type is used. """ - # This optimizer can not be parallelized - self.parallel = False # Set all the arguments - self.update_arguments(jac=jac, **kwargs) + self.update_arguments( + jac=jac, + parallel=parallel, + dtype=dtype, + **kwargs, + ) def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): """ Run the evaluation of the objective function wrt. the hyperparameters. Parameters: - func : ObjectiveFunction class object + func: ObjectiveFunction class object The objective function class that is used to calculate the value. - theta : (H) array + theta: (H) array An array with the hyperparameter values. - parameters : (H) list of strings + parameters: (H) list of strings A list of names of the hyperparameters. - model : Model class object + model: Model class object The Machine Learning Model with kernel and prior that are optimized. - X : (N,D) array + X: (N,D) array Training features with N data points and D dimensions. - Y : (N,1) array or (N,D+1) array + Y: (N,1) array or (N,D+1) array Training targets with or without derivatives with N data points. - pdis : dict + pdis: dict A dict of prior distributions for each hyperparameter type. Returns: - dict : A solution dictionary with objective function value, + dict: A solution dictionary with objective function value, hyperparameters, success statement, and number of used evaluations. """ @@ -313,27 +476,14 @@ def run(self, func, theta, parameters, model, X, Y, pdis, **kwargs): pdis, ) - def update_arguments(self, maxiter=None, jac=None, **kwargs): - """ - Update the class with its arguments. - The existing arguments are used if they are not given. - - Parameters: - jac : bool - Whether to use the gradient of the objective function - wrt. the hyperparameters. - - Returns: - self: The updated object itself. - """ - if jac is not None: - self.jac = jac - return self - def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(jac=self.jac) + arg_kwargs = dict( + jac=self.jac, + parallel=self.parallel, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/gamma.py b/catlearn/regression/gp/pdistributions/gamma.py index 71292008..2bd86ed1 100644 --- a/catlearn/regression/gp/pdistributions/gamma.py +++ b/catlearn/regression/gp/pdistributions/gamma.py @@ -1,38 +1,62 @@ -import numpy as np +from numpy import array, asarray, exp, log, ndarray, sum as sum_, sqrt from .pdistributions import Prior_distribution from scipy.special import loggamma class Gamma_prior(Prior_distribution): - def __init__(self, a=1e-20, b=1e-20, **kwargs): + """ + Gamma prior distribution used for each type + of hyperparameters in log-space. + The Gamma distribution is variable transformed from + linear- to log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, a=1e-20, b=1e-20, dtype=float, **kwargs): """ - Gamma prior distribution used for each type - of hyperparameters in log-space. - The Gamma distribution is variable transformed from - linear- to log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. Parameters: a: float or (H) array The shape parameter. b: float or (H) array The scale parameter. + dtype: type + The data type of the arrays. """ - self.update_arguments(a=a, b=b, **kwargs) + self.update_arguments(a=a, b=b, dtype=dtype, **kwargs) def ln_pdf(self, x): - ln_pdf = self.lnpre + 2.0 * self.a * x - self.b * np.exp(2.0 * x) + ln_pdf = self.lnpre + 2.0 * self.a * x - self.b * exp(2.0 * x) if self.nosum: return ln_pdf - return np.sum(ln_pdf, axis=-1) + return sum_(ln_pdf, axis=-1) def ln_deriv(self, x): - return 2.0 * self.a - 2.0 * self.b * np.exp(2.0 * x) + return 2.0 * self.a - 2.0 * self.b * exp(2.0 * x) + + def calc_lnpre(self): + """ + Calculate the lnpre value. + This is used to calculate the ln_pdf value. + """ + self.lnpre = log(2.0) + self.a * log(self.b) - loggamma(self.a) + return self.lnpre + + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + if hasattr(self, "a") and isinstance(self.a, ndarray): + self.a = asarray(self.a, dtype=self.dtype) + if hasattr(self, "b") and isinstance(self.b, ndarray): + self.b = asarray(self.b, dtype=self.dtype) + if hasattr(self, "lnpre"): + self.calc_lnpre() + return self - def update_arguments(self, a=None, b=None, **kwargs): + def update_arguments(self, a=None, b=None, dtype=None, **kwargs): """ Update the object with its arguments. The existing arguments are used if they are not given. @@ -42,21 +66,27 @@ def update_arguments(self, a=None, b=None, **kwargs): The shape parameter. b: float or (H) array The scale parameter. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ + # Set the arguments for the parent class + super().update_arguments( + dtype=dtype, + ) if a is not None: if isinstance(a, (float, int)): self.a = a else: - self.a = np.array(a).reshape(-1) + self.a = array(a, dtype=self.dtype).reshape(-1) if b is not None: if isinstance(b, (float, int)): self.b = b else: - self.b = np.array(b).reshape(-1) - self.lnpre = np.log(2.0) + self.a * np.log(self.b) - loggamma(self.a) + self.b = array(b, dtype=self.dtype).reshape(-1) + self.calc_lnpre() if isinstance(self.a, (float, int)) and isinstance( self.b, (float, int) ): @@ -66,14 +96,16 @@ def update_arguments(self, a=None, b=None, **kwargs): return self def mean_var(self, mean, var): - mean, var = np.exp(mean), np.exp(2.0 * np.sqrt(var)) + mean = (exp(mean),) + var = exp(2.0 * sqrt(var)) a = mean**2.0 / var if a == 0: a = 1 return self.update_arguments(a=a, b=mean / var) def min_max(self, min_v, max_v): - min_v, max_v = np.exp(min_v), np.exp(max_v) + min_v = exp(min_v) + max_v = exp(max_v) mean = 0.5 * (min_v + max_v) var = 0.5 * (max_v - min_v) ** 2 return self.update_arguments(a=mean**2 / var, b=mean / var) @@ -81,7 +113,7 @@ def min_max(self, min_v, max_v): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(a=self.a, b=self.b) + arg_kwargs = dict(a=self.a, b=self.b, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/gen_normal.py b/catlearn/regression/gp/pdistributions/gen_normal.py index 0881f8f9..1c2cc77b 100644 --- a/catlearn/regression/gp/pdistributions/gen_normal.py +++ b/catlearn/regression/gp/pdistributions/gen_normal.py @@ -1,16 +1,20 @@ -import numpy as np +from numpy import array, asarray, log, ndarray, sum as sum_, sqrt from .pdistributions import Prior_distribution class Gen_normal_prior(Prior_distribution): - def __init__(self, mu=0.0, s=10.0, v=2, **kwargs): + """ + Independent Generalized Normal prior distribution used for each type + of hyperparameters in log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, mu=0.0, s=10.0, v=2, dtype=float, **kwargs): """ - Independent Generalized Normal prior distribution used for each type - of hyperparameters in log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. Parameters: mu: float or (H) array @@ -19,25 +23,37 @@ def __init__(self, mu=0.0, s=10.0, v=2, **kwargs): The scale of the generalized normal distribution. v: float or (H) array The shape or magnitude of the generalized normal distribution. + dtype: type + The data type of the arrays. """ - self.update_arguments(mu=mu, s=s, v=v, **kwargs) + self.update_arguments(mu=mu, s=s, v=v, dtype=dtype, **kwargs) def ln_pdf(self, x): - lnpdf = ( + ln_pdf = ( -(((x - self.mu) / self.s) ** (2 * self.v)) - - np.log(self.s) - + np.log(0.52) + - log(self.s) + + log(0.52) ) if self.nosum: - return lnpdf - return np.sum(lnpdf, axis=-1) + return ln_pdf + return sum_(ln_pdf, axis=-1) def ln_deriv(self, x): return (-(2.0 * self.v) * ((x - self.mu) ** (2 * self.v - 1))) / ( self.s ** (2 * self.v) ) - def update_arguments(self, mu=None, s=None, v=None, **kwargs): + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + if hasattr(self, "mu") and isinstance(self.mu, ndarray): + self.mu = asarray(self.mu, dtype=self.dtype) + if hasattr(self, "std") and isinstance(self.std, ndarray): + self.std = asarray(self.std, dtype=self.dtype) + if hasattr(self, "v") and isinstance(self.v, ndarray): + self.v = asarray(self.v, dtype=self.dtype) + return self + + def update_arguments(self, mu=None, s=None, v=None, dtype=None, **kwargs): """ Update the object with its arguments. The existing arguments are used if they are not given. @@ -49,25 +65,31 @@ def update_arguments(self, mu=None, s=None, v=None, **kwargs): The scale of the generalized normal distribution. v: float or (H) array The shape or magnitude of the generalized normal distribution. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ + # Set the arguments for the parent class + super().update_arguments( + dtype=dtype, + ) if mu is not None: if isinstance(mu, (float, int)): self.mu = mu else: - self.mu = np.array(mu).reshape(-1) + self.mu = array(mu, dtype=self.dtype).reshape(-1) if s is not None: if isinstance(s, (float, int)): self.s = s else: - self.s = np.array(s).reshape(-1) + self.s = array(s, dtype=self.dtype).reshape(-1) if v is not None: if isinstance(v, (float, int)): self.v = v else: - self.v = np.array(v).reshape(-1) + self.v = array(v, dtype=self.dtype).reshape(-1) if ( isinstance(self.mu, (float, int)) and isinstance(self.s, (float, int)) @@ -79,19 +101,19 @@ def update_arguments(self, mu=None, s=None, v=None, **kwargs): return self def mean_var(self, mean, var): - return self.update_arguments(mu=mean, s=np.sqrt(var / 0.32)) + return self.update_arguments(mu=mean, s=sqrt(var / 0.32)) def min_max(self, min_v, max_v): mu = (max_v + min_v) / 2.0 return self.update_arguments( mu=mu, - s=np.sqrt(2.0 / 0.32) * (max_v - mu), + s=sqrt(2.0 / 0.32) * (max_v - mu), ) def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(mu=self.mu, s=self.s, v=self.v) + arg_kwargs = dict(mu=self.mu, s=self.s, v=self.v, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/invgamma.py b/catlearn/regression/gp/pdistributions/invgamma.py index 26d5224d..1a10b61e 100644 --- a/catlearn/regression/gp/pdistributions/invgamma.py +++ b/catlearn/regression/gp/pdistributions/invgamma.py @@ -1,38 +1,62 @@ -import numpy as np +from numpy import array, asarray, exp, log, ndarray, sum as sum_, sqrt from .pdistributions import Prior_distribution from scipy.special import loggamma class Invgamma_prior(Prior_distribution): - def __init__(self, a=1e-20, b=1e-20, **kwargs): + """ + Inverse-Gamma prior distribution used for each type + of hyperparameters in log-space. + The inverse-gamma distribution is variable transformed from + linear- to log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, a=1e-20, b=1e-20, dtype=float, **kwargs): """ - Inverse-Gamma prior distribution used for each type - of hyperparameters in log-space. - The inverse-gamma distribution is variable transformed from - linear- to log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. Parameters: a: float or (H) array The shape parameter. b: float or (H) array The scale parameter. + dtype: type + The data type of the arrays. """ - self.update_arguments(a=a, b=b, **kwargs) + self.update_arguments(a=a, b=b, dtype=dtype, **kwargs) def ln_pdf(self, x): - ln_pdf = self.lnpre - 2.0 * self.a * x - self.b * np.exp(-2.0 * x) + ln_pdf = self.lnpre - 2.0 * self.a * x - self.b * exp(-2.0 * x) if self.nosum: return ln_pdf - return np.sum(ln_pdf, axis=-1) + return sum_(ln_pdf, axis=-1) def ln_deriv(self, x): - return -2.0 * self.a + 2.0 * self.b * np.exp(-2.0 * x) + return -2.0 * self.a + 2.0 * self.b * exp(-2.0 * x) + + def calc_lnpre(self): + """ + Calculate the lnpre value. + This is used to calculate the ln_pdf value. + """ + self.lnpre = log(2.0) + self.a * log(self.b) - loggamma(self.a) + return self.lnpre + + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + if hasattr(self, "a") and isinstance(self.a, ndarray): + self.a = asarray(self.a, dtype=self.dtype) + if hasattr(self, "b") and isinstance(self.b, ndarray): + self.b = asarray(self.b, dtype=self.dtype) + if hasattr(self, "lnpre"): + self.calc_lnpre() + return self - def update_arguments(self, a=None, b=None, **kwargs): + def update_arguments(self, a=None, b=None, dtype=None, **kwargs): """ Update the object with its arguments. The existing arguments are used if they are not given. @@ -42,21 +66,27 @@ def update_arguments(self, a=None, b=None, **kwargs): The shape parameter. b: float or (H) array The scale parameter. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ + # Set the arguments for the parent class + super().update_arguments( + dtype=dtype, + ) if a is not None: if isinstance(a, (float, int)): self.a = a else: - self.a = np.array(a).reshape(-1) + self.a = array(a, dtype=self.dtype).reshape(-1) if b is not None: if isinstance(b, (float, int)): self.b = b else: - self.b = np.array(b).reshape(-1) - self.lnpre = np.log(2.0) + self.a * np.log(self.b) - loggamma(self.a) + self.b = array(b, dtype=self.dtype).reshape(-1) + self.calc_lnpre() if isinstance(self.a, (float, int)) and isinstance( self.b, (float, int) ): @@ -66,12 +96,13 @@ def update_arguments(self, a=None, b=None, **kwargs): return self def mean_var(self, mean, var): - mean, var = np.exp(mean), np.exp(2.0 * np.sqrt(var)) - min_v = mean - np.sqrt(var) * 2.0 + mean = exp(mean) + var = exp(2.0 * sqrt(var)) + min_v = mean - sqrt(var) * 2.0 return self.update_arguments(a=min_v, b=min_v) def min_max(self, min_v, max_v): - b = np.exp(2.0 * min_v) + b = exp(2.0 * min_v) return self.update_arguments(a=b, b=b) def copy(self): @@ -80,7 +111,7 @@ def copy(self): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(a=self.a, b=self.b) + arg_kwargs = dict(a=self.a, b=self.b, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/normal.py b/catlearn/regression/gp/pdistributions/normal.py index 4789b66d..631e5536 100644 --- a/catlearn/regression/gp/pdistributions/normal.py +++ b/catlearn/regression/gp/pdistributions/normal.py @@ -1,39 +1,53 @@ -import numpy as np +from numpy import array, asarray, log, ndarray, pi, sum as sum_, sqrt from .pdistributions import Prior_distribution class Normal_prior(Prior_distribution): - def __init__(self, mu=0.0, std=10.0, **kwargs): + """ + Independent Normal or Gaussian prior distribution used for each type + of hyperparameters in log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, mu=0.0, std=10.0, dtype=float, **kwargs): """ - Independent Normal prior distribution used for each type - of hyperparameters in log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. Parameters: mu: float or (H) array The mean of the normal distribution. std: float or (H) array The standard deviation of the normal distribution. + dtype: type + The data type of the arrays. """ - self.update_arguments(mu=mu, std=std, **kwargs) + self.update_arguments(mu=mu, std=std, dtype=dtype, **kwargs) def ln_pdf(self, x): ln_pdf = ( - -np.log(self.std) - - 0.5 * np.log(2.0 * np.pi) + -log(self.std) + - 0.5 * log(2.0 * pi) - 0.5 * ((x - self.mu) / self.std) ** 2 ) if self.nosum: return ln_pdf - return np.sum(ln_pdf, axis=-1) + return sum_(ln_pdf, axis=-1) def ln_deriv(self, x): return -(x - self.mu) / self.std**2 - def update_arguments(self, mu=None, std=None, **kwargs): + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + if hasattr(self, "mu") and isinstance(self.mu, ndarray): + self.mu = asarray(self.mu, dtype=self.dtype) + if hasattr(self, "std") and isinstance(self.std, ndarray): + self.std = asarray(self.std, dtype=self.dtype) + return self + + def update_arguments(self, mu=None, std=None, dtype=None, **kwargs): """ Update the object with its arguments. The existing arguments are used if they are not given. @@ -43,20 +57,26 @@ def update_arguments(self, mu=None, std=None, **kwargs): The mean of the normal distribution. std: float or (H) array The standard deviation of the normal distribution. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ + # Set the arguments for the parent class + super().update_arguments( + dtype=dtype, + ) if mu is not None: if isinstance(mu, (float, int)): self.mu = mu else: - self.mu = np.array(mu).reshape(-1) + self.mu = array(mu, dtype=self.dtype).reshape(-1) if std is not None: if isinstance(std, (float, int)): self.std = std else: - self.std = np.array(std).reshape(-1) + self.std = array(std, dtype=self.dtype).reshape(-1) if isinstance(self.mu, (float, int)) and isinstance( self.std, (float, int) ): @@ -66,16 +86,16 @@ def update_arguments(self, mu=None, std=None, **kwargs): return self def mean_var(self, mean, var): - return self.update_arguments(mu=mean, std=np.sqrt(var)) + return self.update_arguments(mu=mean, std=sqrt(var)) def min_max(self, min_v, max_v): mu = 0.5 * (min_v + max_v) - return self.update_arguments(mu=mu, std=np.sqrt(2.0) * (max_v - mu)) + return self.update_arguments(mu=mu, std=sqrt(2.0) * (max_v - mu)) def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(mu=self.mu, std=self.std) + arg_kwargs = dict(mu=self.mu, std=self.std, dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/pdistributions.py b/catlearn/regression/gp/pdistributions/pdistributions.py index f2ef668f..915562a8 100644 --- a/catlearn/regression/gp/pdistributions/pdistributions.py +++ b/catlearn/regression/gp/pdistributions/pdistributions.py @@ -1,17 +1,25 @@ -import numpy as np +from numpy import exp class Prior_distribution: - def __init__(self, **kwargs): + """ + Prior probability distribution used for each type + of hyperparameters in log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, dtype=float, **kwargs): """ - Prior probability distribution used for each type - of hyperparameters in log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. + + Parameters: + dtype: type + The data type of the arrays. """ - self.update_arguments(**kwargs) + self.update_arguments(dtype=dtype, **kwargs) def pdf(self, x): """ @@ -33,7 +41,7 @@ def pdf(self, x): (M) array: M values of the probability density function if M different values is given. """ - return np.exp(self.ln_pdf(x)) + return exp(self.ln_pdf(x)) def deriv(self, x): "The derivative of the probability density function as respect to x." @@ -68,14 +76,36 @@ def ln_deriv(self, x): """ raise NotImplementedError() - def update_arguments(self, **kwargs): + def set_dtype(self, dtype, **kwargs): + """ + Set the data type of the arrays. + + Parameters: + dtype: type + The data type of the arrays. + + Returns: + self: The updated object itself. + """ + # Set the data type + self.dtype = dtype + return self + + def update_arguments(self, dtype=None, **kwargs): """ Update the object with its arguments. The existing arguments are used if they are not given. + Parameters: + dtype: type + The data type of the arrays. + Returns: self: The updated object itself. """ + # Set the data type + if dtype is not None or not hasattr(self, "dtype"): + self.set_dtype(dtype=dtype) return self def mean_var(self, mean, var): @@ -95,7 +125,7 @@ def min_max(self, min_v, max_v): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict() + arg_kwargs = dict(dtype=self.dtype) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/uniform.py b/catlearn/regression/gp/pdistributions/uniform.py index 7b876f38..303184ec 100644 --- a/catlearn/regression/gp/pdistributions/uniform.py +++ b/catlearn/regression/gp/pdistributions/uniform.py @@ -1,16 +1,30 @@ -import numpy as np +from numpy import ( + array, + asarray, + log, + inf, + nan_to_num, + ndarray, + sum as sum_, + sqrt, + where, +) from .pdistributions import Prior_distribution class Uniform_prior(Prior_distribution): - def __init__(self, start=-18.0, end=18.0, prob=1.0, **kwargs): + """ + Uniform prior distribution used for each type + of hyperparameters in log-space. + If the type of the hyperparameter is multi dimensional (H), + it is given in the axis=-1. + If multiple values (M) of the hyperparameter(/s) + are calculated simultaneously, it has to be in a (M,H) array. + """ + + def __init__(self, start=-18.0, end=18.0, prob=1.0, dtype=float, **kwargs): """ - Uniform prior distribution used for each type - of hyperparameters in log-space. - If the type of the hyperparameter is multi dimensional (H), - it is given in the axis=-1. - If multiple values (M) of the hyperparameter(/s) - are calculated simultaneously, it has to be in a (M,H) array. + Initialization of the prior distribution. Parameters: start: float or (H) array @@ -21,24 +35,49 @@ def __init__(self, start=-18.0, end=18.0, prob=1.0, **kwargs): the hyperparameter in log-space. prob: float or (H) array The non-zero prior distribution value. + dtype: type + The data type of the arrays. """ - self.update_arguments(start=start, end=end, prob=prob, **kwargs) + self.update_arguments( + start=start, + end=end, + prob=prob, + dtype=dtype, + **kwargs, + ) def ln_pdf(self, x): - ln_0 = -np.log(np.nan_to_num(np.inf)) - ln_pdf = np.where( + ln_0 = -log(nan_to_num(inf)) + ln_pdf = where( x >= self.start, - np.where(x <= self.end, np.log(self.prob), ln_0), + where(x <= self.end, log(self.prob), ln_0), ln_0, ) if self.nosum: return ln_pdf - return np.sum(ln_pdf, axis=-1) + return sum_(ln_pdf, axis=-1) def ln_deriv(self, x): return 0.0 * x - def update_arguments(self, start=None, end=None, prob=None, **kwargs): + def set_dtype(self, dtype, **kwargs): + super().set_dtype(dtype, **kwargs) + if hasattr(self, "start") and isinstance(self.start, ndarray): + self.start = asarray(self.start, dtype=self.dtype) + if hasattr(self, "end") and isinstance(self.end, ndarray): + self.end = asarray(self.end, dtype=self.dtype) + if hasattr(self, "prob") and isinstance(self.prob, ndarray): + self.prob = asarray(self.prob, dtype=self.dtype) + return self + + def update_arguments( + self, + start=None, + end=None, + prob=None, + dtype=None, + **kwargs, + ): """ Update the object with its arguments. The existing arguments are used if they are not given. @@ -52,25 +91,31 @@ def update_arguments(self, start=None, end=None, prob=None, **kwargs): the hyperparameter in log-space. prob: float or (H) array The non-zero prior distribution value. + dtype: type + The data type of the arrays. Returns: self: The updated object itself. """ + # Set the arguments for the parent class + super().update_arguments( + dtype=dtype, + ) if start is not None: if isinstance(start, (float, int)): self.start = start else: - self.start = np.array(start).reshape(-1) + self.start = array(start, dtype=self.dtype).reshape(-1) if end is not None: if isinstance(end, (float, int)): self.end = end else: - self.end = np.array(end).reshape(-1) + self.end = array(end, dtype=self.dtype).reshape(-1) if prob is not None: if isinstance(prob, (float, int)): self.prob = prob else: - self.prob = np.array(prob).reshape(-1) + self.prob = array(prob, dtype=self.dtype).reshape(-1) if ( isinstance(self.start, (float, int)) and isinstance(self.end, (float, int)) @@ -82,7 +127,7 @@ def update_arguments(self, start=None, end=None, prob=None, **kwargs): return self def mean_var(self, mean, var): - std = np.sqrt(var) + std = sqrt(var) return self.update_arguments( start=mean - 4.0 * std, end=mean + 4.0 * std, @@ -99,7 +144,12 @@ def min_max(self, min_v, max_v): def get_arguments(self): "Get the arguments of the class itself." # Get the arguments given to the class in the initialization - arg_kwargs = dict(start=self.start, end=self.start, prob=self.prob) + arg_kwargs = dict( + start=self.start, + end=self.start, + prob=self.prob, + dtype=self.dtype, + ) # Get the constants made within the class constant_kwargs = dict() # Get the objects made within the class diff --git a/catlearn/regression/gp/pdistributions/update_pdis.py b/catlearn/regression/gp/pdistributions/update_pdis.py index 1a0373ff..35623dd7 100644 --- a/catlearn/regression/gp/pdistributions/update_pdis.py +++ b/catlearn/regression/gp/pdistributions/update_pdis.py @@ -1,4 +1,16 @@ -def update_pdis(model, parameters, X, Y, bounds=None, pdis=None, **kwargs): +from ..hpboundary.strict import StrictBoundaries + + +def update_pdis( + model, + parameters, + X, + Y, + bounds=None, + pdis=None, + dtype=float, + **kwargs, +): """ Update given prior distribution of hyperparameters from educated guesses in log space. @@ -9,9 +21,7 @@ def update_pdis(model, parameters, X, Y, bounds=None, pdis=None, **kwargs): # Make boundary conditions for updating the prior distributions if bounds is None: # Use strict educated guesses for the boundary conditions if not given - from ..hpboundary.strict import StrictBoundaries - - bounds = StrictBoundaries(log=True, use_prior_mean=True) + bounds = StrictBoundaries(log=True, use_prior_mean=True, dtype=dtype) # Update boundary conditions to the data bounds.update_bounds(model, X, Y, parameters) # Make prior distributions for hyperparameters from boundary conditions diff --git a/catlearn/structures/__init__.py b/catlearn/structures/__init__.py new file mode 100644 index 00000000..ec65aa55 --- /dev/null +++ b/catlearn/structures/__init__.py @@ -0,0 +1,4 @@ +from .structure import Structure + + +__all__ = ["Structure"] diff --git a/catlearn/optimize/neb/__init__.py b/catlearn/structures/neb/__init__.py similarity index 92% rename from catlearn/optimize/neb/__init__.py rename to catlearn/structures/neb/__init__.py index bc7a4840..8e5d0c32 100644 --- a/catlearn/optimize/neb/__init__.py +++ b/catlearn/structures/neb/__init__.py @@ -3,7 +3,6 @@ from .ewneb import EWNEB from .avgewneb import AvgEWNEB from .maxewneb import MaxEWNEB -from .nebimage import NEBImage from .interpolate_band import interpolate, make_interpolation __all__ = [ diff --git a/catlearn/structures/neb/avgewneb.py b/catlearn/structures/neb/avgewneb.py new file mode 100644 index 00000000..18c3cd49 --- /dev/null +++ b/catlearn/structures/neb/avgewneb.py @@ -0,0 +1,33 @@ +from numpy import where +from .ewneb import EWNEB + + +class AvgEWNEB(EWNEB): + """ + The average energy-weighted Nudged Elastic Band method implementation. + The energy-weighted method uses energy weighting to calculate the + spring constants. + The average weigting for both ends of the spring are used + instead of the forward energy weighting. + """ + + def get_spring_constants(self, **kwargs): + # Get the spring constants + energies = self.get_energies() + # Get the reference energy + if self.use_minimum: + e0 = min([energies[0], energies[-1]]) + else: + e0 = max([energies[0], energies[-1]]) + # Get the maximum energy + emax = energies.max() + # Calculate the weighted spring constants + k_l = self.k * self.kl_scale + if e0 < emax: + a = (emax - energies) / (emax - e0) + a = where(a < 1.0, a, 1.0) + a = 0.5 * (a[1:] + a[:-1]) + k = ((1.0 - a) * self.k) + (a * k_l) + else: + k = k_l + return k diff --git a/catlearn/structures/neb/ewneb.py b/catlearn/structures/neb/ewneb.py new file mode 100644 index 00000000..08bd133b --- /dev/null +++ b/catlearn/structures/neb/ewneb.py @@ -0,0 +1,125 @@ +from numpy import max as max_, where +from ase.parallel import world +from .improvedneb import ImprovedTangentNEB + + +class EWNEB(ImprovedTangentNEB): + """ + The energy-weighted Nudged Elastic Band method implementation. + The energy-weighted method uses energy weighting to calculate the spring + constants. + See: + https://doi.org/10.1021/acs.jctc.1c00462 + """ + + def __init__( + self, + images, + k=0.1, + kl_scale=0.1, + use_minimum=False, + climb=False, + remove_rotation_and_translation=False, + mic=True, + use_image_permutation=False, + save_properties=False, + parallel=False, + comm=world, + **kwargs + ): + """ + Initialize the NEB instance. + + Parameters: + images: List of ASE Atoms instances + The ASE Atoms instances used as the images of the initial path + that is optimized. + k: List of floats or float + The (Nimg-1) spring forces acting between each image. + In the energy-weighted Nudged Elastic Band method, this spring + constants are the upper spring constants. + kl_scale: float + The scaling factor for the lower spring constants. + use_minimum: bool + Whether to use the minimum energy as the reference energy + for the spring constants. + If False, the maximum energy is used. + climb: bool + Whether to use climbing image in the NEB. + See: + https://doi.org/10.1063/1.1329672 + remove_rotation_and_translation: bool + Whether to remove rotation and translation in interpolation + and when predicting forces. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + use_image_permutation: bool + Whether to permute images to minimize the path length. + It assumes a greedy algorithm to find the minimum path length + by selecting the next image that is closest to the previous + image. + It is only used in the initialization of the NEB. + save_properties: bool + Whether to save the properties by making a copy of the images. + parallel: bool + Whether to run the calculations in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + """ + super().__init__( + images, + k=k, + climb=climb, + remove_rotation_and_translation=remove_rotation_and_translation, + mic=mic, + use_image_permutation=use_image_permutation, + save_properties=save_properties, + parallel=parallel, + comm=comm, + **kwargs + ) + self.kl_scale = kl_scale + self.use_minimum = use_minimum + + def get_spring_constants(self, **kwargs): + # Get the energies + energies = self.get_energies() + # Get the reference energy + if self.use_minimum: + e0 = min([energies[0], energies[-1]]) + else: + e0 = max([energies[0], energies[-1]]) + # Get the maximum energy + emax = energies.max() + # Calculate the weighted spring constants + k_l = self.k * self.kl_scale + if e0 < emax: + used_energies = max_([energies[1:], energies[:-1]], axis=0) + a = (emax - used_energies) / (emax - e0) + k = where(a < 1.0, (1.0 - a) * self.k + a * k_l, k_l) + else: + k = k_l + return k + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + images=self.images, + k=self.k, + kl_scale=self.kl_scale, + use_minimum=self.use_minimum, + climb=self.climb, + remove_rotation_and_translation=self.rm_rot_trans, + mic=self.mic, + use_image_permutation=self.use_image_permutation, + save_properties=self.save_properties, + parallel=self.parallel, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/structures/neb/improvedneb.py b/catlearn/structures/neb/improvedneb.py new file mode 100644 index 00000000..6df03014 --- /dev/null +++ b/catlearn/structures/neb/improvedneb.py @@ -0,0 +1,56 @@ +from numpy import einsum, empty, sqrt +from numpy.linalg import norm +from .orgneb import OriginalNEB + + +class ImprovedTangentNEB(OriginalNEB): + """ + The improved tangent Nudged Elastic Band method implementation. + The improved tangent method uses energy weighting to calculate the tangent. + + See: + https://doi.org/10.1063/1.1323224 + """ + + def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): + # Get the spring constants + k = self.get_spring_constants() + # Calculate the parallel forces + forces_parallel = k[1:] * sqrt(einsum("ijk,ijk->i", pos_p, pos_p)) + forces_parallel -= k[:-1] * sqrt(einsum("ijk,ijk->i", pos_m, pos_m)) + forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent + return forces_parallel + + def get_tangent(self, pos_p, pos_m, **kwargs): + tangent = empty((int(self.nimages - 2), self.natoms, 3)) + energies = self.get_energies() + for i in range(1, self.nimages - 1): + if energies[i + 1] > energies[i] and energies[i] > energies[i - 1]: + tangent[i - 1] = pos_p[i - 1] + elif ( + energies[i + 1] < energies[i] and energies[i] < energies[i - 1] + ): + tangent[i - 1] = pos_m[i - 1] + elif energies[i + 1] > energies[i - 1]: + energy_dif = [ + abs(energies[i + 1] - energies[i]), + abs(energies[i - 1] - energies[i]), + ] + tangent[i - 1] = pos_p[i - 1] * max(energy_dif) + tangent[i - 1] += pos_m[i - 1] * min(energy_dif) + elif energies[i + 1] < energies[i - 1]: + energy_dif = [ + abs(energies[i + 1] - energies[i]), + abs(energies[i - 1] - energies[i]), + ] + tangent[i - 1] = pos_p[i - 1] * min(energy_dif) + tangent[i - 1] += pos_m[i - 1] * max(energy_dif) + else: + tangent[i - 1] = pos_p[i - 1] / norm(pos_p[i - 1]) + tangent[i - 1] += pos_m[i - 1] / norm(pos_m[i - 1]) + # Normalization of tangent + tangent_norm = sqrt(einsum("ijk,ijk->i", tangent, tangent)).reshape( + -1, 1, 1 + ) + tangent = tangent / tangent_norm + return tangent diff --git a/catlearn/structures/neb/interpolate_band.py b/catlearn/structures/neb/interpolate_band.py new file mode 100644 index 00000000..7038146e --- /dev/null +++ b/catlearn/structures/neb/interpolate_band.py @@ -0,0 +1,622 @@ +from numpy import ndarray +from numpy.linalg import norm +from numpy.random import default_rng +from ase.io import read +from ase.optimize import FIRE +from ase.build import minimize_rotation_and_translation +from .improvedneb import ImprovedTangentNEB +from ...regression.gp.calculator.copy_atoms import copy_atoms +from ...regression.gp.fingerprint.geometry import mic_distance + + +def interpolate( + start, + end, + ts=None, + n_images=15, + method="linear", + mic=True, + remove_rotation_and_translation=False, + **interpolation_kwargs, +): + """ + Make a NEB interpolation between the start and end structure. + A transition state structure can be given to guide the NEB interpolation. + + Parameters: + start: ASE Atoms instance + The starting structure for the NEB interpolation. + end: ASE Atoms instance + The ending structure for the NEB interpolation. + ts: ASE Atoms instance (optional) + An intermediate state the NEB interpolation should go through. + Then, the method should be one of the following: 'linear', 'idpp', + 'rep', 'born', or 'ends'. + n_images: int + The number of images in the NEB interpolation. + method: str or list of ASE Atoms instances + The method to use for the NEB interpolation. If a list of + ASE Atoms instances is given, then the interpolation will be + made between the start and end structure using the images in + the list. If a string is given, then it should be one of the + following: 'linear', 'idpp', 'rep', or 'ends'. The string can + also be the name of a trajectory file. In that case, the + interpolation will be made using the images in the trajectory + file. The trajectory file should contain the start and end + structure. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + remove_rotation_and_translation: bool + If True, then the rotation and translation of the end + structure is removed before the interpolation is made. + interpolation_kwargs: dict + Additional keyword arguments to pass to the interpolation + methods. + """ + # Copy the start and end structures + start = copy_atoms(start) + end = copy_atoms(end) + # The rotation and translation should be removed the end structure + # is optimized compared to start structure + if remove_rotation_and_translation: + start.center() + end.center() + minimize_rotation_and_translation(start, end) + # If the transition state is not given then make a regular interpolation + if ts is None: + images = make_interpolation( + start, + end, + n_images=n_images, + method=method, + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + **interpolation_kwargs, + ) + return images + # Copy the transition state structure + ts = copy_atoms(ts) + # Check if the method is compatible with the interpolation for the TS + if not ( + isinstance(method, str) + and method in ["linear", "idpp", "rep", "born", "ends"] + ): + raise ValueError( + "The method should be one of the following: " + "'linear', 'idpp', 'rep', 'born', or 'ends." + ) + # Get the interpolated path from the start structure to the TS structure + images = make_interpolation( + start, + ts, + n_images=n_images, + method=method, + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + **interpolation_kwargs, + ) + # Get the cumulative distance from the start to the TS structure + dis_st = get_images_distance(images) + # Get the interpolated path from the TS structure to the end structure + images = make_interpolation( + ts, + end, + n_images=n_images, + method=method, + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + **interpolation_kwargs, + ) + # Get the cumulative distance from the TS to the end structure + dis_et = get_images_distance(images) + # Calculate the number of images from start to the TS from the distance + n_images_st = int(n_images * dis_st / (dis_st + dis_et)) + n_images_st = 2 if n_images_st < 2 else n_images_st + # Get the interpolated path from the start structure to + # the TS structure with the correct number of images + images1 = make_interpolation( + start, + ts, + n_images=n_images_st, + method=method, + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + **interpolation_kwargs, + ) + # Get the interpolated path from the TS structure to + # the end structure with the corrct number of images + images2 = make_interpolation( + ts, + end, + n_images=int(n_images - n_images_st + 1), + method=method, + mic=mic, + remove_rotation_and_translation=remove_rotation_and_translation, + **interpolation_kwargs, + )[1:] + return list(images1) + list(images2) + + +def make_interpolation( + start, + end, + n_images=15, + method="linear", + mic=True, + **interpolation_kwargs, +): + """ + Make the NEB interpolation path. + The method can be one of the following: 'linear', 'idpp', 'rep', + 'born', or 'ends'. If a list of ASE Atoms instances is given, + then the interpolation will be made between the start and end + structure using the images in the list. If a string is given, + then it should be the name of a trajectory file. In that case, + the interpolation will be made using the images in the trajectory + file. The trajectory file should contain the start and end structure. + + Parameters: + start: ASE Atoms instance + The starting structure for the NEB interpolation. + end: ASE Atoms instance + The ending structure for the NEB interpolation. + n_images: int + The number of images in the NEB interpolation. + method: str or list of ASE Atoms instances + The method to use for the NEB interpolation. If a list of + ASE Atoms instances is given, then the interpolation will be + made between the start and end structure using the images in + the list. If a string is given, then it should be one of the + following: 'linear', 'idpp', 'rep', or 'ends'. The string can + also be the name of a trajectory file. In that case, the + interpolation will be made using the images in the trajectory + file. The trajectory file should contain the start and end + structure. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + interpolation_kwargs: dict + Additional keyword arguments to pass to the interpolation + methods. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + # Use a premade interpolation path + if isinstance(method, (list, ndarray)): + # Check if the number of images in the method is equal to n_images + if len(method) != n_images: + raise ValueError( + "The number of images in the method should be " + "equal to n_images." + ) + images = [copy_atoms(image) for image in method[1:-1]] + images = [copy_atoms(start)] + images + [copy_atoms(end)] + elif isinstance(method, str) and method.lower() not in [ + "linear", + "idpp", + "rep", + "born", + "ends", + ]: + # Import interpolation from a trajectory file + images = read(method, "-{}:".format(n_images)) + images = [copy_atoms(start)] + images[1:-1] + [copy_atoms(end)] + else: + # Make path by the NEB methods interpolation + images = [start.copy() for _ in range(1, n_images - 1)] + images = [copy_atoms(start)] + images + [copy_atoms(end)] + if method.lower() == "ends": + images = make_end_interpolations( + images, + mic=mic, + **interpolation_kwargs, + ) + else: + images = make_linear_interpolation( + images, + mic=mic, + **interpolation_kwargs, + ) + if method.lower() == "idpp": + images = make_idpp_interpolation( + images, + mic=mic, + **interpolation_kwargs, + ) + elif method.lower() == "rep": + images = make_rep_interpolation( + images, + mic=mic, + **interpolation_kwargs, + ) + elif method.lower() == "born": + images = make_born_interpolation( + images, + mic=mic, + **interpolation_kwargs, + ) + return images + + +def make_linear_interpolation( + images, + mic=False, + use_perturbation=False, + d_perturb=0.02, + seed=1, + **kwargs, +): + """ + Make the linear interpolation from initial to final state. + + Parameters: + images: list of ASE Atoms instances + The list of images to interpolate between. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + use_perturbation: bool + If True, then the images are perturbed with a Gaussian noise + with a standard deviation of d_perturb. + d_perturb: float + The standard deviation of the Gaussian noise used to perturb + the images if use_perturbation is True. + seed: int (optional) + The random seed used to generate the Gaussian noise if + use_perturbation is True. + If seed is None, then the default random number generator + is used. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + # Get the position of initial state + pos0 = images[0].get_positions() + # Get the distance to the final state + dist_vec = images[-1].get_positions() - pos0 + # Calculate the minimum-image convention if mic=True + if mic: + _, dist_vec = mic_distance( + dist_vec, + cell=images[0].get_cell(), + pbc=images[0].pbc, + use_vector=True, + ) + # Calculate the distance moved for each image + dist_vec = dist_vec / float(len(images) - 1) + # Make random generator if perturbation is used + if use_perturbation: + rng = default_rng(seed) + # Set the positions + for i in range(1, len(images) - 1): + # Get the position of the image + pos = pos0 + (i * dist_vec) + # Add perturbation if requested + if use_perturbation: + pos += rng.normal(0.0, d_perturb, size=pos.shape) + # Set the position of the image + images[i].set_positions(pos) + return images + + +def make_idpp_interpolation( + images, + mic=False, + fmax=1.0, + steps=100, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + local_opt=FIRE, + local_kwargs={}, + **kwargs, +): + """ + Make the IDPP interpolation from initial to final state + from NEB optimization. + + Parameters: + images: list of ASE Atoms instances + The list of images to interpolate between. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + fmax: float + The maximum force for the optimization. + steps: int + The number of optimization steps. + neb_method: class + The NEB method to use for the optimization. + The default is ImprovedTangentNEB. + neb_kwargs: dict + The keyword arguments for the NEB method. + local_opt: ASE optimizer object + The local optimizer object to use for the optimization. + The default is FIRE. + local_kwargs: dict + The keyword arguments for the local optimizer. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + + from ...regression.gp.baseline import IDPP + + # Get all distances in the system + dist0 = images[0].get_all_distances(mic=mic) + # Calculate the differences in the distances in the system for IDPP + dist = (images[-1].get_all_distances(mic=mic) - dist0) / float( + len(images) - 1 + ) + # Use IDPP as calculator + for i, image in enumerate(images[1:-1]): + target = dist0 + (i + 1) * dist + image.calc = IDPP(target=target, mic=mic) + # Make default NEB + neb = neb_method(images, **neb_kwargs) + # Set local optimizer arguments + local_kwargs_default = dict(trajectory="idpp.traj", logfile="idpp.log") + if issubclass(local_opt, FIRE): + local_kwargs_default.update( + dict(dt=0.05, a=1.0, astart=1.0, fa=0.999, maxstep=0.2) + ) + local_kwargs_default.update(local_kwargs) + # Optimize NEB path with IDPP + with local_opt(neb, **local_kwargs_default) as opt: + opt.run(fmax=fmax, steps=steps) + return images + + +def make_rep_interpolation( + images, + mic=False, + fmax=1.0, + steps=100, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + local_opt=FIRE, + local_kwargs={}, + calc_kwargs={}, + trajectory="rep.traj", + logfile="rep.log", + **kwargs, +): + """ + Make a repulsive potential to get the interpolation from NEB optimization. + + Parameters: + images: list of ASE Atoms instances + The list of images to interpolate between. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + fmax: float + The maximum force for the optimization. + steps: int + The number of optimization steps. + neb_method: class + The NEB method to use for the optimization. + The default is ImprovedTangentNEB. + neb_kwargs: dict + The keyword arguments for the NEB method. + local_opt: ASE optimizer object + The local optimizer object to use for the optimization. + The default is FIRE. + local_kwargs: dict + The keyword arguments for the local optimizer. + calc_kwargs: dict + The keyword arguments for the repulsive potential calculator. + trajectory: str (optional) + The name of the trajectory file to save the optimization path. + If None, then the trajectory is not saved. + logfile: str (optional) + The name of the log file to save the optimization output. + If None, then the log file is not saved. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + from ...regression.gp.baseline import RepulsionCalculator + + # Use Repulsive potential as calculator + for image in images[1:-1]: + image.calc = RepulsionCalculator(mic=mic, **calc_kwargs) + # Make default NEB + neb = neb_method(images, **neb_kwargs) + # Set local optimizer arguments + local_kwargs_default = dict(trajectory=trajectory, logfile=logfile) + if issubclass(local_opt, FIRE): + local_kwargs_default.update( + dict(dt=0.05, a=1.0, astart=1.0, fa=0.999, maxstep=0.2) + ) + local_kwargs_default.update(local_kwargs) + # Optimize NEB path with repulsive potential + with local_opt(neb, **local_kwargs_default) as opt: + opt.run(fmax=fmax, steps=steps) + return images + + +def make_born_interpolation( + images, + mic=False, + fmax=1.0, + steps=100, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + local_opt=FIRE, + local_kwargs={}, + calc_kwargs={}, + trajectory="born.traj", + logfile="born.log", + **kwargs, +): + """ + Make a Born repulsive potential to get the interpolation from NEB + optimization. + + Parameters: + images: list of ASE Atoms instances + The list of images to interpolate between. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + fmax: float + The maximum force for the optimization. + steps: int + The number of optimization steps. + neb_method: class + The NEB method to use for the optimization. + The default is ImprovedTangentNEB. + neb_kwargs: dict + The keyword arguments for the NEB method. + local_opt: ASE optimizer object + The local optimizer object to use for the optimization. + The default is FIRE. + local_kwargs: dict + The keyword arguments for the local optimizer. + calc_kwargs: dict + The keyword arguments for the Born repulsive potential calculator. + trajectory: str (optional) + The name of the trajectory file to save the optimization path. + If None, then the trajectory is not saved. + logfile: str (optional) + The name of the log file to save the optimization output. + If None, then the log file is not saved. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + from ...regression.gp.baseline import BornRepulsionCalculator + + # Use Repulsive potential as calculator + for image in images[1:-1]: + image.calc = BornRepulsionCalculator(mic=mic, **calc_kwargs) + # Make default NEB + neb = neb_method(images, **neb_kwargs) + # Set local optimizer arguments + local_kwargs_default = dict(trajectory=trajectory, logfile=logfile) + if issubclass(local_opt, FIRE): + local_kwargs_default.update( + dict(dt=0.05, a=1.0, astart=1.0, fa=0.999, maxstep=0.2) + ) + local_kwargs_default.update(local_kwargs) + # Optimize NEB path with repulsive potential + with local_opt(neb, **local_kwargs_default) as opt: + opt.run(fmax=fmax, steps=steps) + return images + + +def make_end_interpolations( + images, + mic=False, + trust_dist=0.2, + use_perturbation=False, + d_perturb=0.02, + seed=1, + **kwargs, +): + """ + Make the linear interpolation from initial to final state, + but place the images at the initial and final states with + the maximum distance as trust_dist. + + Parameters: + images: list of ASE Atoms instances + The list of images to interpolate between. + mic: bool + If True, then the minimum-image convention is used for the + interpolation. If False, then the images are not constrained + to the minimum-image convention. + trust_dist: float + The maximum distance between the initial and final state. + If the distance between the initial and final state is smaller + than trust_dist, then the images are placed at the initial and + final states with the maximum distance as trust_dist. + use_perturbation: bool + If True, then the images are perturbed with a Gaussian noise + with a standard deviation of d_perturb. + d_perturb: float + The standard deviation of the Gaussian noise used to perturb + the images if use_perturbation is True. + seed: int (optional) + The random seed used to generate the Gaussian noise if + use_perturbation is True. + If seed is None, then the default random number generator + is used. + + Returns: + list of ASE Atoms instances + The list of images with the interpolation between + the initial and final state. + """ + # Get the number of images + n_images = len(images) + # Get the position of initial state + pos0 = images[0].get_positions() + # Get the distance to the final state + dist_vec = images[-1].get_positions() - pos0 + # Calculate the minimum-image convention if mic=True + if mic: + _, dist_vec = mic_distance( + dist_vec, + cell=images[0].get_cell(), + pbc=images[0].pbc, + use_vector=True, + ) + # Calculate the scaled distance + scale_dist = 2.0 * trust_dist / norm(dist_vec) + # Check if the distance is within the trust distance + if scale_dist >= 1.0: + return make_linear_interpolation( + images, + mic=mic, + use_perturbation=use_perturbation, + d_perturb=d_perturb, + seed=seed, + **kwargs, + ) + # Calculate the distance moved for each image + dist_vec = dist_vec * (scale_dist / float(n_images - 1)) + # Get the position of final state + posn = images[-1].get_positions() + # Make random generator if perturbation is used + if use_perturbation: + rng = default_rng(seed) + # Set the positions + nfirst = int(0.5 * (n_images - 1)) + for i in range(1, n_images - 1): + if i <= nfirst: + pos = pos0 + (i * dist_vec) + else: + pos = posn - ((n_images - 1 - i) * dist_vec) + # Add perturbation if requested + if use_perturbation: + pos += rng.normal(0.0, d_perturb, size=pos.shape) + # Set the position of the image + images[i].set_positions(pos) + return images + + +def get_images_distance(images): + "Get the cumulative distacnce of the images." + dis = 0.0 + for i in range(len(images) - 1): + dis += norm(images[i + 1].get_positions() - images[i].get_positions()) + return dis diff --git a/catlearn/structures/neb/maxewneb.py b/catlearn/structures/neb/maxewneb.py new file mode 100644 index 00000000..dd31b8ea --- /dev/null +++ b/catlearn/structures/neb/maxewneb.py @@ -0,0 +1,120 @@ +from numpy import where +from ase.parallel import world +from .improvedneb import ImprovedTangentNEB + + +class MaxEWNEB(ImprovedTangentNEB): + """ + The maximum energy-weighted Nudged Elastic Band method implementation. + The energy-weighted method uses energy weighting to calculate the + spring constants. + The maximum energy subtracted by the energy difference (dE) is used as + the reference energy for the spring constants. + """ + + def __init__( + self, + images, + k=0.1, + kl_scale=0.1, + dE=0.01, + climb=False, + remove_rotation_and_translation=False, + mic=True, + use_image_permutation=False, + save_properties=False, + parallel=False, + comm=world, + **kwargs + ): + """ + Initialize the NEB instance. + + Parameters: + images: List of ASE Atoms instances + The ASE Atoms instances used as the images of the initial path + that is optimized. + k: List of floats or float + The (Nimg-1) spring forces acting between each image. + In the energy-weighted Nudged Elastic Band method, this spring + constants are the upper spring constants. + kl_scale: float + The scaling factor for the lower spring constants. + dE: float + The energy difference between the maximum energy + and the used reference energy. + climb: bool + Whether to use climbing image in the NEB. + See: + https://doi.org/10.1063/1.1329672 + remove_rotation_and_translation: bool + Whether to remove rotation and translation in interpolation + and when predicting forces. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + use_image_permutation: bool + Whether to permute images to minimize the path length. + It assumes a greedy algorithm to find the minimum path length + by selecting the next image that is closest to the previous + image. + It is only used in the initialization of the NEB. + save_properties: bool + Whether to save the properties by making a copy of the images. + parallel: bool + Whether to run the calculations in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + """ + super().__init__( + images, + k=k, + climb=climb, + remove_rotation_and_translation=remove_rotation_and_translation, + mic=mic, + use_image_permutation=use_image_permutation, + save_properties=save_properties, + parallel=parallel, + comm=comm, + **kwargs + ) + self.kl_scale = kl_scale + self.dE = dE + + def get_spring_constants(self, **kwargs): + # Get the spring constants + energies = self.get_energies() + # Get the maximum energy + emax = energies.max() + # Calculate the reference energy + e0 = emax - self.dE + # Calculate the weighted spring constants + k_l = self.k * self.kl_scale + if e0 < emax: + a = (emax - energies[:-1]) / (emax - e0) + k = where(a < 1.0, (1.0 - a) * self.k + a * k_l, k_l) + else: + k = k_l + return k + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + images=self.images, + k=self.k, + kl_scale=self.kl_scale, + dE=self.dE, + climb=self.climb, + remove_rotation_and_translation=self.rm_rot_trans, + mic=self.mic, + use_image_permutation=self.use_image_permutation, + save_properties=self.save_properties, + parallel=self.parallel, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs diff --git a/catlearn/structures/neb/orgneb.py b/catlearn/structures/neb/orgneb.py new file mode 100644 index 00000000..c1f00aa3 --- /dev/null +++ b/catlearn/structures/neb/orgneb.py @@ -0,0 +1,629 @@ +from numpy import ( + arange, + argmax, + array, + asarray, + einsum, + empty, + full, + nanmax, + ones, + sqrt, + vdot, + zeros, +) +from ase.calculators.singlepoint import SinglePointCalculator +from ase.build import minimize_rotation_and_translation +from ase.parallel import world, broadcast +import warnings +from ..structure import Structure +from ...regression.gp.fingerprint.geometry import mic_distance +from ...regression.gp.calculator.copy_atoms import compare_atoms + + +class OriginalNEB: + """ + The orginal Nudged Elastic Band method implementation for the tangent + and parallel force. + + See: + https://doi.org/10.1142/9789812839664_0016 + """ + + def __init__( + self, + images, + k=0.1, + climb=False, + remove_rotation_and_translation=False, + mic=True, + use_image_permutation=False, + save_properties=False, + parallel=False, + comm=world, + **kwargs, + ): + """ + Initialize the NEB instance. + + Parameters: + images: List of ASE Atoms instances + The ASE Atoms instances used as the images of the initial path + that is optimized. + k: List of floats or float + The (Nimg-1) spring forces acting between each image. + climb: bool + Whether to use climbing image in the NEB. + See: + https://doi.org/10.1063/1.1329672 + remove_rotation_and_translation: bool + Whether to remove rotation and translation in interpolation + and when predicting forces. + mic: bool + Minimum Image Convention (Shortest distances when + periodic boundary conditions are used). + use_image_permutation: bool + Whether to permute images to minimize the path length. + It assumes a greedy algorithm to find the minimum path length + by selecting the next image that is closest to the previous + image. + It is only used in the initialization of the NEB. + save_properties: bool + Whether to save the properties by making a copy of the images. + parallel: bool + Whether to run the calculations in parallel. + comm: ASE communicator instance + The communicator instance for parallelization. + + """ + # Check that the endpoints are the same + self.check_images(images) + # Set images + if save_properties: + self.images = [Structure(image) for image in images] + else: + self.images = images + self.nimages = len(images) + self.natoms = len(images[0]) + # Set the spring constant + if isinstance(k, (int, float)): + self.k = full(self.nimages - 1, k) + else: + self.k = k.copy() + # Set the parameters + self.climb = climb + self.rm_rot_trans = remove_rotation_and_translation + self.mic = mic + self.save_properties = save_properties + self.use_image_permutation = use_image_permutation + # Set the parallelization + self.parallel = parallel + if parallel: + self.parallel_setup(comm) + if (self.nimages - 2) % self.size != 0: + if self.rank == 0: + warnings.warn( + "The number of moving images are not chosen " + "optimal for the number of processors when running in " + "parallel!" + ) + else: + self.remove_parallel_setup() + # Find the minimum path length if requested + self.permute_images() + # Set the properties + self.reset() + + def check_images( + self, + images, + properties_to_check=["atoms", "cell", "pbc"], + ): + "Check that the images are the same structures." + ends_equal = compare_atoms( + images[0], + images[-1], + properties_to_check=properties_to_check, + ) + ends_move_equal = compare_atoms( + images[0], + images[1], + properties_to_check=properties_to_check, + ) + if not (ends_equal and ends_move_equal): + raise ValueError("The images are not the same structures.") + return self + + def interpolate(self, method="linear", mic=True, **kwargs): + """ + Make an interpolation between the start and end structure. + + Parameters: + method: str + The method used for performing the interpolation. + The optional methods is {linear, idpp, ends}. + mic: bool + Whether to use the minimum-image convention. + + Returns: + self: The instance itself. + """ + from .interpolate_band import interpolate + + self.images = interpolate( + self.images[0], + self.images[-1], + n_images=self.nimages, + method=method, + mic=mic, + remove_rotation_and_translation=self.rm_rot_trans, + **kwargs, + ) + return self + + def get_positions(self): + """ + Get the positions of all the moving images in one array. + + Returns: + ((Nimg-2)*Natoms,3) array: Coordinates of all atoms in + all the moving images. + """ + positions = array( + [image.get_positions() for image in self.images[1:-1]] + ) + return positions.reshape(-1, 3) + + def set_positions(self, positions, **kwargs): + """ + Set the positions of all the images in one array. + + Parameters: + positions: ((Nimg-2)*Natoms,3) array + Coordinates of all atoms in all the moving images. + """ + self.reset() + for i, image in enumerate(self.images[1:-1]): + posi = i * self.natoms + posip = (i + 1) * self.natoms + image.set_positions(positions[posi:posip]) + pass + + def get_potential_energy(self, **kwargs): + """ + Get the potential energy of the NEB as the sum of energies. + + Returns: + float: Sum of energies of moving images. + """ + return (self.get_energies(**kwargs)[1:-1]).sum() + + def get_forces(self, **kwargs): + """ + Get the forces of the NEB as the stacked forces of the moving images. + + Returns: + ((Nimg-2)*Natoms,3) array: Forces of all the atoms in + all the moving images. + """ + # Remove rotation and translation + if self.rm_rot_trans: + for i in range(1, self.nimages): + minimize_rotation_and_translation( + self.images[i - 1], + self.images[i], + ) + # Get the forces for each image + forces = self.calculate_forces(**kwargs) + # Get change in the coordinates to the previous and later image + position_plus, position_minus = self.get_position_diff() + # Calculate the tangent to the moving images + tangent = self.get_tangent(position_plus, position_minus) + # Calculate the parallel forces between images + parallel_forces = self.get_parallel_forces( + tangent, + position_plus, + position_minus, + ) + # Calculate the perpendicular forces + perpendicular_forces = self.get_perpendicular_forces(tangent, forces) + # Calculate the full force + forces_new = parallel_forces + perpendicular_forces + # Calculate the force of the climbing image + if self.climb: + forces_new = self.get_climb_forces(forces_new, forces, tangent) + return forces_new.reshape(-1, 3) + + def get_x(self): + return self.get_positions().ravel() + + def set_x(self, x): + self.set_positions(x.reshape(-1, 3)) + + def get_gradient(self): + return self.get_forces().ravel() + + def get_value(self, *args, **kwargs): + return self.get_potential_energy(*args, **kwargs) + + def gradient_norm(self, gradient): + forces = gradient.reshape(-1, 3) + return sqrt(einsum("ij,ij->i", forces, forces)).max() + + def ndofs(self): + "Number of degrees of freedom in the NEB." + return 3 * len(self) + + def get_image_positions(self): + """ + Get the positions of the images. + + Returns: + ((Nimg),Natoms,3) array: The positions for all atoms in + all the images. + """ + return asarray([image.get_positions() for image in self.images]) + + def get_climb_forces(self, forces_new, forces, tangent, **kwargs): + "Get the forces of the climbing image." + i_max = argmax(self.get_energies()[1:-1]) + forces_parallel = 2.0 * vdot(forces[i_max], tangent[i_max]) + forces_parallel = forces_parallel * tangent[i_max] + forces_new[i_max] = forces[i_max] - forces_parallel + return forces_new + + def calculate_forces(self, **kwargs): + "Calculate the forces for all the images separately." + if self.real_forces is None: + self.calculate_properties() + return self.real_forces[1:-1].copy() + + def get_energies(self, **kwargs): + "Get the individual energy for each image." + if self.energies is None: + self.calculate_properties() + return self.energies + + def calculate_properties(self, **kwargs): + "Calculate the energy and forces for each image." + # Initialize the arrays + self.real_forces = zeros((self.nimages, self.natoms, 3)) + self.energies = zeros((self.nimages)) + # Get the energy of the fixed images + self.energies[0] = self.images[0].get_potential_energy() + self.energies[-1] = self.images[-1].get_potential_energy() + # Check if the calculation is done in parallel + if self.parallel: + return self.calculate_properties_parallel(**kwargs) + # Calculate the energy and forces for each image + for i, image in enumerate(self.images[1:-1]): + self.real_forces[i + 1] = image.get_forces() + self.energies[i + 1] = image.get_potential_energy() + return self.energies, self.real_forces + + def calculate_properties_parallel(self, **kwargs): + "Calculate the energy and forces for each image in parallel." + # Calculate the energy and forces for each image + for i, image in enumerate(self.images[1:-1]): + if self.rank == (i % self.size): + self.real_forces[i + 1] = image.get_forces() + self.energies[i + 1] = image.get_potential_energy() + # Broadcast the results + for i in range(1, self.nimages - 1): + root = (i - 1) % self.size + self.energies[i], self.real_forces[i] = broadcast( + (self.energies[i], self.real_forces[i]), + root=root, + comm=self.comm, + ) + return self.energies, self.real_forces + + def emax(self, **kwargs): + "Get maximum energy of the moving images." + return nanmax(self.get_energies(**kwargs)[1:-1]) + + def get_parallel_forces(self, tangent, pos_p, pos_m, **kwargs): + "Get the parallel forces between the images." + # Get the spring constants + k = self.get_spring_constants() + k = k.reshape(-1, 1, 1) + # Calculate the parallel forces + forces_parallel = (k[1:] * pos_p) - (k[:-1] * pos_m) + forces_parallel = (forces_parallel * tangent).sum(axis=(1, 2)) + forces_parallel = forces_parallel.reshape(-1, 1, 1) * tangent + return forces_parallel + + def get_perpendicular_forces(self, tangent, forces, **kwargs): + "Get the perpendicular forces to the images." + f_parallel = (forces * tangent).sum(axis=(1, 2)) + f_parallel = f_parallel.reshape(-1, 1, 1) * tangent + return forces - f_parallel + + def get_position_diff(self): + """ + Get the change in the coordinates relative to + the previous and later image. + """ + positions = self.get_image_positions() + position_diff = positions[1:] - positions[:-1] + pbc = self.get_pbc() + if self.mic and pbc.any(): + cell = self.get_cell() + _, position_diff = mic_distance( + position_diff, + cell=cell, + pbc=pbc, + use_vector=True, + ) + return position_diff[1:], position_diff[:-1] + + def get_tangent(self, pos_p, pos_m, **kwargs): + "Calculate the tangent to the moving images." + # Normalization factors + pos_m_norm = sqrt(einsum("ijk,ijk->i", pos_m, pos_m)).reshape(-1, 1, 1) + pos_p_norm = sqrt(einsum("ijk,ijk->i", pos_p, pos_p)).reshape(-1, 1, 1) + # Normalization of tangent + tangent_m = pos_m / pos_m_norm + tangent_p = pos_p / pos_p_norm + # Sum them + tangent = tangent_m + tangent_p + # Normalization of tangent + tangent_norm = sqrt(einsum("ijk,ijk->i", tangent, tangent)).reshape( + -1, 1, 1 + ) + tangent = tangent / tangent_norm + return tangent + + def get_spring_constants(self, **kwargs): + "Get the spring constants for the images." + return self.k + + def get_path_length(self, **kwargs): + "Get the path length of the NEB." + # Get the distances between the images + pos_p, pos_m = self.get_position_diff() + # Calculate the path length + path_len = sqrt(einsum("ijk,ijk->i", pos_p, pos_p)).sum() + path_len += sqrt(einsum("ij,ij->", pos_m[0], pos_m[0])) + return path_len + + def permute_images(self, **kwargs): + """ + Set the minimum path length by minimizing the distance between + the images by permuting the images. + """ + # Check if there are enough images to optimize + if self.nimages <= 3 or not self.use_image_permutation: + return self + # Find the minimum path length + selected_indices = self.find_minimum_path_length(**kwargs) + # Set the images to the selected indices + self.images = [self.images[i] for i in selected_indices] + # Reset energies and forces + self.reset() + return self + + def find_minimum_path_length(self, **kwargs): + """ + Find the minimum path length by minimizing the distance between + the images. + """ + # Get the positions of the images + positions = self.get_image_positions() + # Get the periodic boundary conditions + pbc = self.get_pbc() + cell = self.get_cell() + use_mic = self.mic and pbc.any() + if not use_mic: + positions = positions.reshape(self.nimages, -1) + # Set the indices for the selected images + indices = arange(self.nimages, dtype=int) + selected_indices = empty(self.nimages, dtype=int) + selected_indices[0] = 0 + selected_indices[-1] = self.nimages - 1 + i_f = 1 + i_b = self.nimages - 2 + i_min_f = 0 + i_min_b = self.nimages - 1 + is_forward = True + i_min = i_min_f + # Create a boolean array to keep track of available images + available = ones(self.nimages, dtype=bool) + available[0] = available[-1] = False + # Loop until all images are selected + while available.any(): + candidates = indices[available] + # Calculate the distance vectors from the current images + dist = positions[candidates] - positions[i_min, None] + if use_mic: + dist = [ + mic_distance( + dis, + cell=cell, + pbc=pbc, + use_vector=False, + )[0] + for dis in dist + ] + dist = asarray(dist) + # Calculate the distances + dist = sqrt(einsum("ij,ij->i", dist, dist)) + # Find the minimum distance from the current images + i_min = dist.argmin() + if is_forward: + # Find the minimum distance from the start image + i_min_f = candidates[i_min] + selected_indices[i_f] = i_min_f + available[i_min_f] = False + i_f += 1 + i_min = i_min_b + else: + # Find the minimum distance from the end image + i_min_b = candidates[i_min] + selected_indices[i_b] = i_min_b + available[i_min_b] = False + i_b -= 1 + i_min = i_min_f + # Switch the direction for the next iteration + is_forward = not is_forward + return selected_indices + + def reset(self): + "Reset the stored properties." + self.energies = None + self.real_forces = None + return self + + def parallel_setup(self, comm, **kwargs): + "Setup the parallelization." + if comm is None: + self.comm = world + else: + self.comm = comm + self.rank = self.comm.rank + self.size = self.comm.size + return self + + def remove_parallel_setup(self): + "Remove the parallelization by removing the communicator." + self.comm = None + self.rank = 0 + self.size = 1 + return self + + def get_residual(self, **kwargs): + "Get the residual of the NEB." + forces = self.get_forces() + return sqrt(einsum("ij,ij->i", forces, forces)).max() + + def set_calculator(self, calculators, copy_calc=False, **kwargs): + """ + Set the calculators for all the images. + + Parameters: + calculators: List of ASE Calculators or ASE Calculator + The calculator used for all the images if a list is given. + If a single calculator is given, it is used for all images. + """ + self.reset() + if isinstance(calculators, (list, tuple)): + if len(calculators) != self.nimages - 2: + raise ValueError( + "The number of calculators must be " + "equal to the number of moving images." + ) + for i, image in enumerate(self.images[1:-1]): + if copy_calc: + image.calc = calculators[i].copy() + else: + image.calc = calculators[i] + else: + for image in self.images[1:-1]: + if copy_calc: + image.calc = calculators.copy() + else: + image.calc = calculators + return self + + @property + def calc(self): + """ + The calculator objects. + """ + return [image.calc for image in self.images[1:-1]] + + @calc.setter + def calc(self, calculators): + return self.set_calculator(calculators) + + def converged(self, forces, fmax): + forces = forces.reshape(-1, 3) + return sqrt(einsum("ij,ij->i", forces, forces)).max() < fmax + + def is_neb(self): + return True + + def __ase_optimizable__(self): + return self + + def __len__(self): + return int(self.nimages - 2) * self.natoms + + def freeze_results_on_image(self, atoms, **results_to_include): + atoms.calc = SinglePointCalculator(atoms=atoms, **results_to_include) + return atoms + + def iterimages(self): + # Allows trajectory to convert NEB into several images + for i, atoms in enumerate(self.images): + if i == 0 or i == self.nimages - 1: + yield atoms + else: + atoms = atoms.copy() + atoms = self.freeze_results_on_image( + atoms, + energy=self.energies[i], + forces=self.real_forces[i], + ) + yield atoms + + def get_pbc(self): + """ + Get the periodic boundary conditions of the images. + + Returns: + (3,) array: The periodic boundary conditions of the images. + """ + return asarray(self.images[0].get_pbc()) + + def get_cell(self): + """ + Get the cell of the images. + + Returns: + (3,3) array: The cell of the images. + """ + return asarray(self.images[0].get_cell()) + + def get_arguments(self): + "Get the arguments of the class itself." + # Get the arguments given to the class in the initialization + arg_kwargs = dict( + images=self.images, + k=self.k, + climb=self.climb, + remove_rotation_and_translation=self.rm_rot_trans, + mic=self.mic, + use_image_permutation=self.use_image_permutation, + save_properties=self.save_properties, + parallel=self.parallel, + comm=self.comm, + ) + # Get the constants made within the class + constant_kwargs = dict() + # Get the objects made within the class + object_kwargs = dict() + return arg_kwargs, constant_kwargs, object_kwargs + + def copy(self): + "Copy the object." + # Get all arguments + arg_kwargs, constant_kwargs, object_kwargs = self.get_arguments() + # Make a clone + clone = self.__class__(**arg_kwargs) + # Check if constants have to be saved + if len(constant_kwargs.keys()): + for key, value in constant_kwargs.items(): + clone.__dict__[key] = value + # Check if objects have to be saved + if len(object_kwargs.keys()): + for key, value in object_kwargs.items(): + clone.__dict__[key] = value.copy() + return clone + + def __repr__(self): + arg_kwargs = self.get_arguments()[0] + str_kwargs = ",".join( + [f"{key}={value}" for key, value in arg_kwargs.items()] + ) + return "{}({})".format(self.__class__.__name__, str_kwargs) diff --git a/catlearn/structures/structure.py b/catlearn/structures/structure.py new file mode 100644 index 00000000..e9970e66 --- /dev/null +++ b/catlearn/structures/structure.py @@ -0,0 +1,191 @@ +from numpy import einsum, sqrt +from ase import Atoms +from ..regression.gp.calculator.copy_atoms import copy_atoms + + +class Structure(Atoms): + def __init__(self, atoms, *args, **kwargs): + self.atoms = atoms + self.__dict__.update(atoms.__dict__) + if atoms.calc is not None and len(atoms.calc.results): + self.store_results() + else: + self.reset() + + def set_positions(self, *args, **kwargs): + self.atoms.set_positions(*args, **kwargs) + self.reset() + return + + def set_scaled_positions(self, *args, **kwargs): + self.atoms.set_scaled_positions(*args, **kwargs) + self.reset() + return + + def set_cell(self, *args, **kwargs): + self.atoms.set_cell(*args, **kwargs) + self.reset() + return + + def set_pbc(self, *args, **kwargs): + self.atoms.set_pbc(*args, **kwargs) + self.reset() + return + + def set_initial_charges(self, *args, **kwargs): + self.atoms.set_initial_charges(*args, **kwargs) + self.reset() + return + + def set_initial_magnetic_moments(self, *args, **kwargs): + self.atoms.set_initial_magnetic_moments(*args, **kwargs) + self.reset() + return + + def set_momenta(self, *args, **kwargs): + self.atoms.set_momenta(*args, **kwargs) + self.reset() + return + + def set_velocities(self, *args, **kwargs): + self.atoms.set_velocities(*args, **kwargs) + self.reset() + return + + def get_property(self, name, allow_calculation=True, **kwargs): + """ + Get or calculate the requested property. + + Parameters: + name : str + The name of the requested property. + allow_calculation : bool + Whether the property is allowed to be calculated. + + Returns: + float or list: The requested property. + """ + if self.is_saved: + if name in self.results: + output = self.atoms_saved.calc.get_property( + name, + atoms=self.atoms_saved, + allow_calculation=True, + **kwargs, + ) + return output + output = self.atoms.calc.get_property( + name, + atoms=self.atoms, + allow_calculation=allow_calculation, + **kwargs, + ) + self.store_results() + return output + + def get_forces(self, *args, **kwargs): + if self.is_saved: + if "force" in self.results: + return self.atoms_saved.get_forces(*args, **kwargs) + forces = self.atoms.get_forces(*args, **kwargs) + self.store_results() + return forces + + def get_potential_energy(self, *args, **kwargs): + if self.is_saved: + if "energy" in self.results: + return self.atoms_saved.get_potential_energy(*args, **kwargs) + energy = self.atoms.get_potential_energy(*args, **kwargs) + self.store_results() + return energy + + def get_x(self): + return self.get_positions().ravel() + + def set_x(self, x): + self.set_positions(x.reshape(-1, 3)) + + def get_gradient(self): + return self.get_forces().ravel() + + def get_value(self, *args, **kwargs): + return self.get_potential_energy(*args, **kwargs) + + def gradient_norm(self, gradient): + forces = gradient.reshape(-1, 3) + return sqrt(einsum("ij,ij->i", forces, forces)).max() + + def get_uncertainty(self, *args, **kwargs): + if self.is_saved: + if "uncertainty" in self.results: + unc = self.atoms_saved.calc.get_uncertainty( + self.atoms_saved, + *args, + **kwargs, + ) + return unc + unc = self.atoms.calc.get_uncertainty( + self.atoms, + *args, + **kwargs, + ) + self.store_results() + return unc + + def converged(self, forces, fmax): + forces = forces.reshape(-1, 3) + return sqrt(einsum("ij,ij->i", forces, forces)).max() < fmax + + def is_neb(self): + return False + + def __ase_optimizable__(self): + return self + + def set_calculator(self, calc, copy_calc=False, **kwargs): + if copy_calc: + self.atoms.calc = calc.copy() + else: + self.atoms.calc = calc + self.reset() + return + + @property + def calc(self): + """ + The calculator objects. + """ + if self.is_saved: + return self.atoms_saved.calc + return self.atoms.calc + + @calc.setter + def calc(self, calc): + return self.set_calculator(calc) + + def copy(self): + return self.atoms.copy() + + def get_structure(self): + return self.atoms + + def get_atoms(self): + return self.get_structure() + + def get_saved_structure(self): + return self.atoms_saved + + def reset(self): + self.atoms_saved = self.atoms.copy() + self.results = {} + self.is_saved = False + return self + + def store_results(self, **kwargs): + """ + Store the calculated results. + """ + self.atoms_saved = copy_atoms(self.atoms) + self.results = self.atoms_saved.calc.results.copy() + self.is_saved = True + return self.atoms_saved diff --git a/catlearn/tools/__init__.py b/catlearn/tools/__init__.py new file mode 100644 index 00000000..d6014786 --- /dev/null +++ b/catlearn/tools/__init__.py @@ -0,0 +1,3 @@ +from .plot import plot_minimize, plot_neb, plot_neb_fit_mlcalc, plot_all_neb + +__all__ = ["plot_minimize", "plot_neb", "plot_neb_fit_mlcalc", "plot_all_neb"] diff --git a/catlearn/tools/plot.py b/catlearn/tools/plot.py new file mode 100644 index 00000000..e87eb61c --- /dev/null +++ b/catlearn/tools/plot.py @@ -0,0 +1,503 @@ +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm +from ase.io import read +from ase.parallel import world +from ..structures.neb import ImprovedTangentNEB + + +def plot_minimize( + pred_atoms, + eval_atoms, + use_uncertainty=True, + ax=None, + loc=0, + **kwargs, +): + """ + Plot the predicted and evaluated atoms in a 2D plot. + + Parameters: + pred_atoms: ASE atoms instance + The predicted atoms. + eval_atoms: ASE atoms instance + The evaluated atoms. + use_uncertainty: bool + If True, use the uncertainty of the atoms. + ax: matplotlib axis instance + The axis to plot the NEB images. + loc: int + The location of the legend. + + Returns: + ax: matplotlib axis instance + """ + # Make figure if it is not given + if ax is None: + _, ax = plt.subplots() + # Only plot the atoms on the master rank + if world.rank != 0: + return ax + # Get the energies of the predicted atoms + if isinstance(pred_atoms, str): + pred_atoms = read(pred_atoms, ":") + pred_energies = [get_true_predicted_energy(atoms) for atoms in pred_atoms] + # Get the uncertainties of the atoms if requested + uncertainties = None + if use_uncertainty: + uncertainties = np.array( + [get_uncertainty(atoms) for atoms in pred_atoms] + ) + # Get the energies of the evaluated atoms + if isinstance(eval_atoms, str): + eval_atoms = read(eval_atoms, ":") + eval_energies = [atoms.get_potential_energy() for atoms in eval_atoms] + # Get the reference energy + e_ref = eval_energies[0] + # Make the energies relative to the first energy + pred_energies = np.array(pred_energies) - e_ref + eval_energies = np.array(eval_energies) - e_ref + # Make x values + x_values = np.arange(1, len(eval_energies) + 1) + x_trunc = -len(pred_energies) + x_pred = x_values[x_trunc:] + # Plot the energies of the atoms + ax.plot(x_pred, pred_energies, "o-", color="red", label="Predicted") + ax.plot(x_values, eval_energies, "o-", color="black", label="Evaluated") + # Plot the uncertainties of the atoms if requested + if uncertainties is not None: + ax.fill_between( + x_pred, + pred_energies - uncertainties, + pred_energies + uncertainties, + color="red", + alpha=0.3, + ) + ax.fill_between( + x_pred, + pred_energies - 2.0 * uncertainties, + pred_energies + 2.0 * uncertainties, + color="red", + alpha=0.2, + ) + # Make labels + ax.set_xlabel("Iteration") + ax.set_ylabel("Potential energy / [eV]") + ax.legend(loc=loc) + return ax + + +def get_neb_data( + images, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + climb=False, + use_uncertainty=False, + use_projection=False, +): + """ + Get the NEB data for plotting. + + Parameters: + images: list of ASE atoms instances + The images of the NEB calculation. + neb_method: class + The NEB method to use. + neb_kwargs: dict + The keyword arguments for the NEB method. + climb: bool + If True, use the climbing image method. + use_uncertainty: bool + If True, use the uncertainty of the images. + use_projection: bool + If True, use the projection of the derivatives on the tangent. + """ + # Default values for the neb method + used_neb_kwargs = dict( + k=3.0, + remove_rotation_and_translation=False, + save_properties=True, + mic=True, + ) + used_neb_kwargs.update(neb_kwargs) + # Initialize the NEB method + neb = neb_method(images, climb=climb, **used_neb_kwargs) + # Get the energies of the images + energies = [get_true_predicted_energy(image) for image in images] + energies = np.array(energies) - energies[0] + # Get the uncertainties of the images if requested + uncertainties = None + if use_uncertainty: + uncertainties = [get_uncertainty(image) for image in images[1:-1]] + uncertainties = np.concatenate([[0.0], uncertainties, [0.0]]) + # Get the distances between the images + pos_p, pos_m = neb.get_position_diff() + distances = np.linalg.norm(pos_p, axis=(1, 2)) + distances = np.concatenate([[0.0], [np.linalg.norm(pos_m[0])], distances]) + distances = np.cumsum(distances) + # Use projection of the derivatives on the tangent + if use_projection: + # Get the forces + forces = [images[0].get_forces()] + forces = forces + [ + get_true_predicted_forces(image) for image in images[1:-1] + ] + forces = forces + [images[-1].get_forces()] + forces = np.array(forces) + # Get the tangent + tangent = neb.get_tangent(pos_p, pos_m) + tangent = np.concatenate([[pos_m[0]], tangent, [pos_p[0]]], axis=0) + tangent_norm = np.linalg.norm(tangent, axis=(1, 2)).reshape(-1, 1, 1) + tangent = tangent / tangent_norm + # Get the projection of the derivatives on the tangent + deriv_proj = -np.sum(forces * tangent, axis=(1, 2)) + else: + deriv_proj = None + return neb, distances, energies, uncertainties, deriv_proj + + +def plot_neb( + images, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + climb=True, + use_uncertainty=True, + use_projection=True, + proj_len_scale=0.4, + ax=None, + **kwargs, +): + """ + Plot the NEB images in a 2D plot. + + Parameters: + images: list of ASE atoms instances + The images of the NEB calculation. + neb_method: class + The NEB method to use. + neb_kwargs: dict + The keyword arguments for the NEB method. + climb: bool + If True, use the climbing image method. + use_uncertainty: bool + If True, use the uncertainty of the images. + use_projection: bool + If True, use the projection of the derivatives on the tangent. + proj_len_scale: float + The scale of the projection length. + It is only used if use_projection is True. + ax: matplotlib axis instance + The axis to plot the NEB images. + + Returns: + ax: matplotlib axis instance + """ + # Make figure if it is not given + if ax is None: + _, ax = plt.subplots() + # Only plot the atoms on the master rank + if world.rank != 0: + return ax + # Get data from NEB + _, distances, energies, uncertainties, deriv_proj = get_neb_data( + images, + neb_method=neb_method, + neb_kwargs=neb_kwargs, + climb=climb, + use_uncertainty=use_uncertainty, + use_projection=use_projection, + ) + # Plot the NEB images + ax.plot(distances, energies, "o-", color="black") + if uncertainties is not None: + ax.errorbar( + distances, + energies, + yerr=uncertainties, + color="black", + capsize=3, + ) + ax.errorbar( + distances, + energies, + yerr=2.0 * uncertainties, + color="black", + capsize=1.5, + ) + # Plot the projection of the derivatives + if use_projection: + # Get length of projection + proj_len = proj_len_scale * distances[-1] / len(images) + for i, deriv in enumerate(deriv_proj): + dist = distances[i] + energy = energies[i] + x_range = [dist - proj_len, dist + proj_len] + y_range = [energy - deriv * proj_len, energy + deriv * proj_len] + ax.plot( + x_range, + y_range, + color="red", + ) + # Make labels + ax.set_xlabel("Distance / [Ã…]") + ax.set_ylabel("Potential energy / [eV]") + title = "Reaction energy = {:.3f} eV \n".format(energies[-1]) + title += "Activation energy = {:.3f} eV".format(energies.max()) + ax.set_title(title) + return ax + + +def plot_neb_fit_mlcalc( + images, + mlcalc, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + climb=True, + use_uncertainty=True, + distance_step=0.01, + include_noise=True, + ax=None, + **kwargs, +): + """ + Plot the NEB images in a 2D plot with the ML calculator predictions. + + Parameters: + images: list of ASE atoms instances + The images of the NEB calculation. + mlcalc: ML calculator instance + The ML calculator to use for the predictions. + neb_method: class + The NEB method to use. + neb_kwargs: dict + The keyword arguments for the NEB method. + climb: bool + If True, use the climbing image method. + use_uncertainty: bool + If True, use the uncertainty of the predictions. + distance_step: float + The step size for the distance between the images. + include_noise: bool + Whether to include noise in the uncertainty from the model. + ax: matplotlib axis instance + The axis to plot the NEB images. + + Returns: + ax: matplotlib axis instance + """ + # Make figure if it is not given + if ax is None: + _, ax = plt.subplots() + # Only plot the atoms on the master rank + if world.rank != 0: + return ax + # Get data from NEB + neb, distances, energies, _, _ = get_neb_data( + images, + neb_method=neb_method, + neb_kwargs=neb_kwargs, + climb=climb, + use_uncertainty=False, + use_projection=False, + ) + # Get the reference energy + e0 = images[0].get_potential_energy() + # Update whether to include noise in uncertainty prediction + mlcalc = mlcalc.update_mlmodel_arguments(include_noise=include_noise) + # Get the first image + image = images[0].copy() + image.info["results"] = {} + image.calc = mlcalc + pos0 = image.get_positions() + # Get the distances between the images + pos_p, pos_m = neb.get_position_diff() + displacements = np.append([pos_m[0]], pos_p, axis=0) + # Get the curve positions, energies, and uncertainties + cum_distance = 0.0 + pred_distance = [] + pred_energies = [] + uncertainties = [] + for i, disp in enumerate(displacements): + # Get the distance between the points on the curve + dist = np.linalg.norm(disp) + scalings = np.arange(0.0, dist, distance_step) / dist + scalings = np.append(scalings, [1.0]) + if i != 0: + scalings = scalings[1:] + for scaling in scalings: + # Calculate the position for the point on the curve + pred_pos = pos0 + scaling * disp + pred_distance.append(cum_distance + scaling * dist) + image.set_positions(pred_pos) + # Get the curve energy + energy = get_true_predicted_energy(image) + pred_energies.append(energy) + # Get the curve uncertainty + if use_uncertainty: + unc = image.calc.get_uncertainty(image) + uncertainties.append(unc) + pos0 += disp + cum_distance += dist + # Make numpy arrays + pred_distance = np.array(pred_distance) + pred_energies = np.array(pred_energies) - e0 + uncertainties = np.array(uncertainties) + # Plot the NEB images + ax.plot(distances, energies, "o", color="black") + ax.plot(pred_distance, pred_energies, "-", color="red") + if len(uncertainties): + ax.fill_between( + pred_distance, + pred_energies - uncertainties, + pred_energies + uncertainties, + color="red", + alpha=0.3, + ) + ax.fill_between( + pred_distance, + pred_energies - 2.0 * uncertainties, + pred_energies + 2.0 * uncertainties, + color="red", + alpha=0.2, + ) + # Make labels + ax.set_xlabel("Distance / [Ã…]") + ax.set_ylabel("Potential energy / [eV]") + title = "Reaction energy = {:.3f} eV \n".format(energies[-1]) + title += "Activation energy = {:.3f} eV".format(pred_energies.max()) + ax.set_title(title) + return ax + + +def plot_all_neb( + neb_traj, + n_images, + neb_method=ImprovedTangentNEB, + neb_kwargs={}, + ax=None, + cmap=cm.jet, + alpha=0.7, + **kwargs, +): + """ + Plot all the NEB images in a 2D plot. + + Parameters: + neb_traj: list of ASE atoms instances or str + The NEB trajectories of the NEB calculation. + It can be a list of all ASE atoms instances for all NEB bands. + It can also be a string to the file containing the NEB + trajectories. + n_images: int + The number of images in each NEB band. + neb_method: class + The NEB method to use. + neb_kwargs: dict + The keyword arguments for the NEB method. + ax: matplotlib axis instance + The axis to plot the NEB images. + cmap: matplotlib colormap + The colormap to use for the NEB bands. + alpha: float + The transparency of the NEB bands except for the last. + + Returns: + ax: matplotlib axis instance + """ + # Make figure if it is not given + if ax is None: + _, ax = plt.subplots() + # Only plot the atoms on the master rank + if world.rank != 0: + return ax + # Calculate the number of NEB bands + if isinstance(neb_traj, str): + neb_traj = read(neb_traj, ":") + n_neb = len(neb_traj) // n_images + # Plot all NEB bands + for i in range(n_neb): + # Get the images of the NEB band + ni = i * n_images + ni1 = (i + 1) * n_images + images = neb_traj[ni:ni1] + # Get data from NEB band + _, distances, energies, _, _ = get_neb_data( + images, + neb_method=neb_method, + neb_kwargs=neb_kwargs, + climb=False, + use_uncertainty=False, + use_projection=False, + ) + # Get the color + if n_neb == 1: + color = cmap(1) + else: + color = cmap(i / (n_neb - 1)) + # Get the transparency + if i + 1 == n_neb: + alpha = 1.0 + # Plot the NEB images + ax.plot(distances, energies, "o-", color=color, alpha=alpha) + # Add colorbar + if n_neb == 1: + colors = cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(0, 1)) + else: + colors = cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(0, n_neb - 1)) + cbar = plt.colorbar(colors, ax=ax) + cbar.set_label("NEB band index") + # Make labels + ax.set_xlabel("Distance / [Ã…]") + ax.set_ylabel("Potential energy / [eV]") + return ax + + +def get_true_predicted_energy(atoms, **kwargs): + """ + Get the true predicted energy of the atoms. + Since the BOCalculator will return the predicted energy and + the uncertainty times the kappa value, this should be avoided. + """ + energy = atoms.get_potential_energy() + if ( + hasattr(atoms.calc, "results") + and "predicted energy" in atoms.calc.results + ): + energy = atoms.calc.results["predicted energy"] + elif ( + "results" in atoms.info and "predicted energy" in atoms.info["results"] + ): + energy = atoms.info["results"]["predicted energy"] + return energy + + +def get_uncertainty(atoms, **kwargs): + """ + Get the uncertainty of the atoms. + """ + if hasattr(atoms.calc, "results") and "uncertainty" in atoms.calc.results: + uncertainty = atoms.calc.results["uncertainty"] + elif "results" in atoms.info and "uncertainty" in atoms.info["results"]: + uncertainty = atoms.info["results"]["uncertainty"] + else: + uncertainty = atoms.calc.get_uncertainty(atoms) + return uncertainty + + +def get_true_predicted_forces(atoms, **kwargs): + """ + Get the true predicted forces of the atoms. + Since the BOCalculator will return the predicted forces and + the uncertainty times the kappa value, this should be avoided. + """ + forces = atoms.get_forces() + if ( + hasattr(atoms.calc, "results") + and "predicted forces" in atoms.calc.results + ): + forces = atoms.calc.results["predicted forces"] + elif ( + "results" in atoms.info and "predicted forces" in atoms.info["results"] + ): + forces = atoms.info["results"]["predicted forces"] + return forces diff --git a/setup.py b/setup.py index b33c7c7a..0591e9de 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,9 @@ packages=find_packages(), python_requires=">=3.8", install_requires=["numpy>=1.20.3", "scipy>=1.8.0", "ase>=3.22.1"], - extras_require={"optional": ["mpi4py>=3.0.3", "dscribe>=2.1"]}, + extras_require={ + "optional": ["mpi4py>=3.0.3", "dscribe>=2.1", "matplotlib>=3.8"] + }, test_suite="tests", tests_require=["unittest"], keywords=["python", "gaussian process", "machine learning", "regression"], diff --git a/tests/functions.py b/tests/functions.py index 1d05f41d..f1e144f7 100644 --- a/tests/functions.py +++ b/tests/functions.py @@ -1,63 +1,70 @@ -import numpy as np +from numpy import argmax, array, concatenate, cos, linspace, sin, sqrt +from numpy.linalg import norm +from numpy.random import default_rng, Generator, RandomState +from ase import Atoms +from ase.calculators.emt import EMT + + +def get_rng(seed): + "Get the random number generator." + if isinstance(seed, int) or seed is None: + rng = default_rng(seed) + elif isinstance(seed, Generator): + rng = seed + elif isinstance(seed, RandomState): + rng = seed + return rng def create_func(gridsize=200, seed=1): "Generate the data set from a trial function" - np.random.seed(seed) - x = np.linspace(-40, 100, gridsize).reshape(-1, 1) - f = 3 * (np.sin((x / 20) ** 2) - 3 * np.sin(0.6 * x / 20) + 17) + rng = get_rng(seed) + x = linspace(-40, 100, gridsize).reshape(-1, 1) + f = 3 * (sin((x / 20) ** 2) - 3 * sin(0.6 * x / 20) + 17) g = 3 * ( - (2 * x / (20**2)) * np.cos((x / 20) ** 2) - - 3 * (0.6 / 20) * np.cos(0.6 * x / 20) + (2 * x / (20**2)) * cos((x / 20) ** 2) + - 3 * (0.6 / 20) * cos(0.6 * x / 20) ) - i_perm = np.random.permutation(list(range(len(x)))) + i_perm = rng.permutation(list(range(len(x)))) return x[i_perm], f[i_perm], g[i_perm] def create_h2_atoms(gridsize=200, seed=1): "Generate the trial data set of H2 ASE atoms with EMT" - from ase import Atoms - from ase.calculators.emt import EMT - - z_list = np.linspace(0.2, 4.0, gridsize) + rng = get_rng(seed) + z_list = linspace(0.2, 4.0, gridsize) atoms_list = [] energies, forces = [], [] for z in z_list: - h2 = Atoms("H2", positions=np.array([[0.0, 0.0, 0.0], [z, 0.0, 0.0]])) + h2 = Atoms("H2", positions=array([[0.0, 0.0, 0.0], [z, 0.0, 0.0]])) h2.center(vacuum=10.0) h2.calc = EMT() energies.append(h2.get_potential_energy()) forces.append(h2.get_forces().reshape(-1)) atoms_list.append(h2) - np.random.seed(seed) - i_perm = np.random.permutation(list(range(len(atoms_list)))) + i_perm = rng.permutation(list(range(len(atoms_list)))) atoms_list = [atoms_list[i] for i in i_perm] return ( atoms_list, - np.array(energies).reshape(-1, 1)[i_perm], - np.array(forces)[i_perm], + array(energies).reshape(-1, 1)[i_perm], + array(forces)[i_perm], ) def make_train_test_set(x, f, g, tr=20, te=20, use_derivatives=True): "Genterate the training and test sets" x_tr, f_tr, g_tr = x[:tr], f[:tr], g[:tr] - x_te, f_te, g_te = x[tr : tr + te], f[tr : tr + te], g[tr : tr + te] + t_all = tr + te + x_te, f_te, g_te = x[tr:t_all], f[tr:t_all], g[tr:t_all] if use_derivatives: - f_tr = np.concatenate( - [f_tr.reshape(tr, 1), g_tr.reshape(tr, -1)], - axis=1, - ) - f_te = np.concatenate( - [f_te.reshape(te, 1), g_te.reshape(te, -1)], - axis=1, - ) + f_tr = concatenate([f_tr.reshape(tr, 1), g_tr.reshape(tr, -1)], axis=1) + f_te = concatenate([f_te.reshape(te, 1), g_te.reshape(te, -1)], axis=1) return x_tr, f_tr, x_te, f_te def calculate_rmse(ytest, ypred): "Calculate the Root-mean squarred error" - return np.sqrt(np.mean((ypred - ytest) ** 2)) + return sqrt(((ypred - ytest) ** 2).mean()) def check_minima( @@ -175,11 +182,11 @@ def check_fmax(atoms, calc, fmax=0.05): atoms_c = atoms.copy() atoms_c.calc = calc forces = atoms_c.get_forces() - return np.linalg.norm(forces, axis=1).max() < fmax + return norm(forces, axis=1).max() < fmax def check_image_fmax(images, calc, fmax=0.05): "Check images from NEB has a saddle point." energies = [image.get_potential_energy() for image in images] - i_max = np.argmax(energies) + i_max = argmax(energies) return check_fmax(images[i_max], calc, fmax=fmax) diff --git a/tests/test_adsorption.py b/tests/test_adsorption.py new file mode 100644 index 00000000..5d3824f4 --- /dev/null +++ b/tests/test_adsorption.py @@ -0,0 +1,90 @@ +import unittest +from .functions import get_slab_ads, check_fmax + + +class TestAdsorption(unittest.TestCase): + """ + Test if the Adsorption works and give the right output. + """ + + def test_adsorption_init(self): + "Test if the Adsorption can be initialized." + import numpy as np + from catlearn.activelearning.adsorption import AdsorptionAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + slab, ads = get_slab_ads() + # Make the boundary conditions for the global search + bounds = np.array( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.5, 0.95], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] + ) + # Initialize Adsorption AL + AdsorptionAL( + slab=slab, + adsorbate=ads, + ase_calc=EMT(), + unc_convergence=0.025, + bounds=bounds, + min_data=4, + verbose=False, + seed=seed, + ) + + def test_adsorption_run(self): + "Test if the Adsorption can run and converge." + import numpy as np + from catlearn.activelearning.adsorption import AdsorptionAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + slab, ads = get_slab_ads() + # Make the boundary conditions for the global search + bounds = np.array( + [ + [0.0, 0.5], + [0.0, 0.5], + [0.5, 0.95], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] + ) + # Initialize Adsorption AL + ads_al = AdsorptionAL( + slab=slab, + adsorbate=ads, + ase_calc=EMT(), + unc_convergence=0.025, + bounds=bounds, + min_data=4, + verbose=False, + seed=seed, + ) + # Test if the Adsorption AL can be run + ads_al.run( + fmax=0.05, + steps=50, + max_unc=0.3, + ml_steps=4000, + ) + # Check that Adsorption AL converged + self.assertTrue(ads_al.converged() is True) + # Check that Adsorption AL give a minimum + atoms = ads_al.get_best_structures() + self.assertTrue(check_fmax(atoms, EMT(), fmax=0.05)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_gp_baseline.py b/tests/test_gp_baseline.py index 71839b50..88f1b777 100644 --- a/tests/test_gp_baseline.py +++ b/tests/test_gp_baseline.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_h2_atoms, make_train_test_set @@ -24,15 +23,18 @@ def test_predict(self): ) from catlearn.regression.gp.baseline import ( BaselineCalculator, + BornRepulsionCalculator, RepulsionCalculator, MieCalculator, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_h2_atoms(gridsize=50, seed=1) + x, f, g = create_h2_atoms(gridsize=50, seed=seed) # Whether to learn from the derivatives use_derivatives = True - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, _, x_te, f_te = make_train_test_set( x, f, g, @@ -43,29 +45,27 @@ def test_predict(self): # Make the hyperparameter fitter optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-8, ) hpfitter = HyperparameterFitter( func=LogLikelihood(), optimizer=optimizer, + round_hp=3, ) # Define the list of baseline objects that are tested baseline_list = [ BaselineCalculator(), - RepulsionCalculator(r_scale=0.7), + BornRepulsionCalculator(), + RepulsionCalculator(), MieCalculator(), ] # Make a list of the error values that the test compares to - error_list = [0.00165, 1.93820, 3.33650] + error_list = [0.47624, 0.47624, 5.03338, 0.38677] # Test the baseline objects for index, baseline in enumerate(baseline_list): with self.subTest(baseline=baseline): # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, kernel=SE( use_derivatives=use_derivatives, @@ -75,16 +75,14 @@ def test_predict(self): ) # Make the fingerprint fp = Cartesian( - reduce_dimensions=True, use_derivatives=use_derivatives, ) # Set up the database database = Database( fingerprint=fp, - reduce_dimensions=True, use_derivatives=use_derivatives, - negative_forces=True, use_fingerprint=True, + round_targets=5, ) # Define the machine learning model mlmodel = MLModel( @@ -93,15 +91,14 @@ def test_predict(self): optimize=True, baseline=baseline, ) - # Set random seed to give the same results every time - np.random.seed(1) - # Construct the machine learning calculator and add the data + # Construct the machine learning calculator mlcalc = MLCalculator( mlmodel=mlmodel, - calculate_uncertainty=True, - calculate_forces=True, - verbose=False, + round_pred=5, ) + # Set the random seed for the calculator + mlcalc.set_seed(seed=seed) + # Add the training data to the calculator mlcalc.add_training(x_tr) # Test if the right number of training points is added self.assertTrue(mlcalc.get_training_set_size() == 10) @@ -115,7 +112,7 @@ def test_predict(self): atoms.get_forces() # Test the prediction energy error for a single test system error = abs(f_te.item(0) - energy) - self.assertTrue(abs(error - error_list[index]) < 1e-4) + self.assertTrue(abs(error - error_list[index]) < 1e-2) if __name__ == "__main__": diff --git a/tests/test_gp_calc.py b/tests/test_gp_calc.py index 970b82e1..88cd8306 100644 --- a/tests/test_gp_calc.py +++ b/tests/test_gp_calc.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_h2_atoms, make_train_test_set @@ -30,24 +29,29 @@ def test_predict(self): ) from catlearn.regression.gp.calculator import MLModel, MLCalculator + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_h2_atoms(gridsize=50, seed=1) + x, f, g = create_h2_atoms(gridsize=50, seed=seed) # Whether to learn from the derivatives use_derivatives = True - x_tr, f_tr, x_te, f_te = make_train_test_set( - x, f, g, tr=10, te=1, use_derivatives=use_derivatives + x_tr, _, x_te, f_te = make_train_test_set( + x, + f, + g, + tr=10, + te=1, + use_derivatives=use_derivatives, ) # Make the hyperparameter fitter optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-8, ) hpfitter = HyperparameterFitter( func=LogLikelihood(), optimizer=optimizer, + round_hp=3, ) # Set the maximum number of points to use for the reduced databases npoints = 8 @@ -58,88 +62,84 @@ def test_predict(self): ( DatabaseDistance, True, - dict(npoints=npoints, initial_indicies=[0]), + dict(npoints=npoints, initial_indices=[0]), ), ( DatabaseDistance, True, - dict(npoints=npoints, initial_indicies=[]), + dict(npoints=npoints, initial_indices=[]), ), ( DatabaseHybrid, True, - dict(npoints=npoints, initial_indicies=[0]), + dict(npoints=npoints, initial_indices=[0]), ), - (DatabaseHybrid, True, dict(npoints=npoints, initial_indicies=[])), - (DatabaseMin, True, dict(npoints=npoints, initial_indicies=[0])), - (DatabaseMin, True, dict(npoints=npoints, initial_indicies=[])), + (DatabaseHybrid, True, dict(npoints=npoints, initial_indices=[])), + (DatabaseMin, True, dict(npoints=npoints, initial_indices=[0])), + (DatabaseMin, True, dict(npoints=npoints, initial_indices=[])), ( DatabaseRandom, True, - dict(npoints=npoints, initial_indicies=[0]), + dict(npoints=npoints, initial_indices=[0]), ), - (DatabaseRandom, True, dict(npoints=npoints, initial_indicies=[])), - (DatabaseLast, True, dict(npoints=npoints, initial_indicies=[0])), - (DatabaseLast, True, dict(npoints=npoints, initial_indicies=[])), + (DatabaseRandom, True, dict(npoints=npoints, initial_indices=[])), + (DatabaseLast, True, dict(npoints=npoints, initial_indices=[0])), + (DatabaseLast, True, dict(npoints=npoints, initial_indices=[])), ( DatabaseRestart, True, - dict(npoints=npoints, initial_indicies=[0]), + dict(npoints=npoints, initial_indices=[0]), ), ( DatabaseRestart, True, - dict(npoints=npoints, initial_indicies=[]), + dict(npoints=npoints, initial_indices=[]), ), ( DatabasePointsInterest, True, dict( - npoints=npoints, initial_indicies=[0], point_interest=x_te + npoints=npoints, initial_indices=[0], point_interest=x_te ), ), ( DatabasePointsInterest, True, - dict( - npoints=npoints, initial_indicies=[], point_interest=x_te - ), + dict(npoints=npoints, initial_indices=[], point_interest=x_te), ), ( DatabasePointsInterestEach, True, dict( - npoints=npoints, initial_indicies=[0], point_interest=x_te + npoints=npoints, initial_indices=[0], point_interest=x_te ), ), ( DatabasePointsInterestEach, True, - dict( - npoints=npoints, initial_indicies=[], point_interest=x_te - ), + dict(npoints=npoints, initial_indices=[], point_interest=x_te), ), ] # Make a list of the error values that the test compares to error_list = [ - 0.00166, - 0.00166, - 0.00359, - 0.00359, - 0.00003, - 0.00003, - 0.000002, - 0.000002, - 0.000018, - 0.00003, - 0.01270, - 0.02064, - 0.00655, - 0.00102, - 0.000002, - 0.000002, - 0.000002, - 0.000002, + 0.47624, + 0.47624, + 5.19594, + 5.19594, + 1.95852, + 5.19594, + 0.71664, + 0.71664, + 0.89497, + 5.23694, + 1.13717, + 3.52768, + 7.38153, + 9.47098, + 0.38060, + 0.38060, + 0.38060, + 0.38060, ] # Test the database objects for index, (data, use_fingerprint, data_kwarg) in enumerate( @@ -152,7 +152,7 @@ def test_predict(self): ): # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, kernel=SE( use_derivatives=use_derivatives, @@ -162,16 +162,14 @@ def test_predict(self): ) # Make the fingerprint fp = Cartesian( - reduce_dimensions=True, use_derivatives=use_derivatives, ) # Set up the database database = data( fingerprint=fp, - reduce_dimensions=True, use_derivatives=use_derivatives, - negative_forces=True, use_fingerprint=use_fingerprint, + round_targets=5, **data_kwarg ) # Define the machine learning model @@ -180,16 +178,15 @@ def test_predict(self): database=database, optimize=True, baseline=None, - verbose=False, ) - # Set random seed to give the same results every time - np.random.seed(1) - # Construct the machine learning calculator and add the data + # Construct the machine learning calculator mlcalc = MLCalculator( mlmodel=mlmodel, - calculate_uncertainty=True, - calculate_forces=True, + round_pred=5, ) + # Set the random seed for the calculator + mlcalc.set_seed(seed=seed) + # Add the training data to the calculator mlcalc.add_training(x_tr) # Test if the right number of training points is added if index in [0, 1]: @@ -218,7 +215,93 @@ def test_predict(self): atoms.get_forces() # Test the prediction energy error for a single test system error = abs(f_te.item(0) - energy) - self.assertTrue(abs(error - error_list[index]) < 1e-4) + self.assertTrue(abs(error - error_list[index]) < 1e-2) + + def test_bayesian_calc(self): + "Test if the GP bayesian calculator can predict energy and forces." + from catlearn.regression.gp.models import GaussianProcess + from catlearn.regression.gp.kernel import SE + from catlearn.regression.gp.optimizers import ScipyOptimizer + from catlearn.regression.gp.objectivefunctions.gp import LogLikelihood + from catlearn.regression.gp.hpfitter import HyperparameterFitter + from catlearn.regression.gp.fingerprint import Cartesian + from catlearn.regression.gp.calculator import Database + from catlearn.regression.gp.calculator import MLModel, BOCalculator + + # Set random seed to give the same results every time + seed = 1 + # Create the data set + x, f, g = create_h2_atoms(gridsize=50, seed=seed) + # Whether to learn from the derivatives + use_derivatives = True + x_tr, _, x_te, f_te = make_train_test_set( + x, + f, + g, + tr=10, + te=1, + use_derivatives=use_derivatives, + ) + # Make the hyperparameter fitter + optimizer = ScipyOptimizer( + maxiter=500, + jac=True, + ) + hpfitter = HyperparameterFitter( + func=LogLikelihood(), + optimizer=optimizer, + round_hp=3, + ) + # Make the fingerprint + use_fingerprint = True + fp = Cartesian( + use_derivatives=use_derivatives, + ) + # Set up the database + database = Database( + fingerprint=fp, + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + round_targets=5, + ) + # Construct the Gaussian process + gp = GaussianProcess( + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), + use_derivatives=use_derivatives, + kernel=SE( + use_derivatives=use_derivatives, + use_fingerprint=use_fingerprint, + ), + hpfitter=hpfitter, + ) + # Define the machine learning model + mlmodel = MLModel( + model=gp, + database=database, + optimize=True, + baseline=None, + ) + # Construct the machine learning calculator + mlcalc = BOCalculator( + mlmodel=mlmodel, + kappa=2.0, + round_pred=5, + ) + # Set the random seed for the calculator + mlcalc.set_seed(seed=seed) + # Add the training data to the calculator + mlcalc.add_training(x_tr) + # Train the machine learning calculator + mlcalc.train_model() + # Use a single test system for calculating the energy + # and forces with the machine learning calculator + atoms = x_te[0].copy() + atoms.calc = mlcalc + energy = atoms.get_potential_energy() + atoms.get_forces() + # Test the prediction energy error for a single test system + error = abs(f_te.item(0) - energy) + self.assertTrue(abs(error - 1.05160) < 1e-2) if __name__ == "__main__": diff --git a/tests/test_gp_ensemble.py b/tests/test_gp_ensemble.py index 3bed9d01..033386fd 100644 --- a/tests/test_gp_ensemble.py +++ b/tests/test_gp_ensemble.py @@ -18,8 +18,10 @@ def test_variance_ensemble(self): from catlearn.regression.gp.ensemble import EnsembleClustering from catlearn.regression.gp.ensemble.clustering import K_means + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -32,15 +34,18 @@ def test_variance_ensemble(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Construct the clustering object - clustering = K_means(k=4, maxiter=20, tol=1e-3, metric="euclidean") + clustering = K_means( + n_clusters=4, + maxiter=20, + ) # Define the list of whether to use variance as the ensemble method var_list = [False, True] # Make a list of the error values that the test compares to - error_list = [3.90019, 1.73281] + error_list = [4.61443, 0.48256] for index, use_variance_ensemble in enumerate(var_list): with self.subTest(use_variance_ensemble=use_variance_ensemble): # Construct the ensemble model @@ -50,11 +55,11 @@ def test_variance_ensemble(self): use_variance_ensemble=use_variance_ensemble, ) # Set random seed to give the same results every time - np.random.seed(1) + enmodel.set_seed(seed=seed) # Train the machine learning model enmodel.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = enmodel.predict( + ypred, _, _ = enmodel.predict( x_te, get_variance=False, get_derivatives=False, @@ -75,13 +80,16 @@ def test_clustering(self): K_means, K_means_auto, K_means_number, + K_means_enumeration, FixedClustering, RandomClustering, RandomClustering_number, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -94,34 +102,38 @@ def test_clustering(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Define the list of clustering objects that are tested clustering_list = [ - K_means(k=4, maxiter=20, tol=1e-3, metric="euclidean"), + K_means(n_clusters=4, maxiter=20), K_means_auto( min_data=6, max_data=12, maxiter=20, - tol=1e-3, - metric="euclidean", ), K_means_number( data_number=12, maxiter=20, - tol=1e-3, - metric="euclidean", ), + K_means_enumeration(data_number=12), FixedClustering( centroids=np.array([[-30.0], [60.0]]), - metric="euclidean", ), RandomClustering(n_clusters=4, equal_size=True), RandomClustering_number(data_number=12), ] # Make a list of the error values that the test compares to - error_list = [1.73289, 1.75136, 1.73401, 1.74409, 1.88037, 0.61394] + error_list = [ + 0.48256, + 0.63066, + 0.62649, + 0.91445, + 0.62650, + 0.70163, + 0.67975, + ] # Test the baseline objects for index, clustering in enumerate(clustering_list): with self.subTest(clustering=clustering): @@ -132,11 +144,11 @@ def test_clustering(self): use_variance_ensemble=True, ) # Set random seed to give the same results every time - np.random.seed(1) + enmodel.set_seed(seed=seed) # Train the machine learning model enmodel.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = enmodel.predict( + ypred, _, _ = enmodel.predict( x_te, get_variance=True, get_derivatives=False, @@ -162,8 +174,10 @@ def test_variance_ensemble(self): from catlearn.regression.gp.ensemble import EnsembleClustering from catlearn.regression.gp.ensemble.clustering import K_means + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -176,15 +190,15 @@ def test_variance_ensemble(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Construct the clustering object - clustering = K_means(k=4, maxiter=20, tol=1e-3, metric="euclidean") + clustering = K_means(n_clusters=4, maxiter=20) # Define the list of whether to use variance as the ensemble method var_list = [False, True] # Make a list of the error values that the test compares to - error_list = [3.66417, 0.17265] + error_list = [4.51161, 0.37817] for index, use_variance_ensemble in enumerate(var_list): with self.subTest(use_variance_ensemble=use_variance_ensemble): # Construct the ensemble model @@ -194,11 +208,11 @@ def test_variance_ensemble(self): use_variance_ensemble=use_variance_ensemble, ) # Set random seed to give the same results every time - np.random.seed(1) + enmodel.set_seed(seed=seed) # Train the machine learning model enmodel.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = enmodel.predict( + ypred, _, _ = enmodel.predict( x_te, get_variance=False, get_derivatives=False, @@ -219,13 +233,16 @@ def test_clustering(self): K_means, K_means_auto, K_means_number, + K_means_enumeration, FixedClustering, RandomClustering, RandomClustering_number, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -238,30 +255,33 @@ def test_clustering(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Define the list of clustering objects that are tested clustering_list = [ - K_means(k=4, maxiter=20, tol=1e-3, metric="euclidean"), + K_means(n_clusters=4, maxiter=20), K_means_auto( min_data=6, max_data=12, maxiter=20, - tol=1e-3, - metric="euclidean", - ), - K_means_number( - data_number=12, maxiter=20, tol=1e-3, metric="euclidean" - ), - FixedClustering( - centroids=np.array([[-30.0], [60.0]]), metric="euclidean" ), + K_means_number(data_number=12, maxiter=20), + K_means_enumeration(data_number=12), + FixedClustering(centroids=np.array([[-30.0], [60.0]])), RandomClustering(n_clusters=4, equal_size=True), RandomClustering_number(data_number=12), ] # Make a list of the error values that the test compares to - error_list = [0.17265, 0.15492, 0.14095, 0.16393, 0.59046, 0.24236] + error_list = [ + 0.37817, + 0.38854, + 0.38641, + 0.52753, + 0.38640, + 0.47864, + 0.36700, + ] # Test the baseline objects for index, clustering in enumerate(clustering_list): with self.subTest(clustering=clustering): @@ -272,11 +292,11 @@ def test_clustering(self): use_variance_ensemble=True, ) # Set random seed to give the same results every time - np.random.seed(1) + enmodel.set_seed(seed=seed) # Train the machine learning model enmodel.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = enmodel.predict( + ypred, _, _ = enmodel.predict( x_te, get_variance=True, get_derivatives=False, diff --git a/tests/test_gp_fp.py b/tests/test_gp_fp.py index 5531d948..e82dac64 100644 --- a/tests/test_gp_fp.py +++ b/tests/test_gp_fp.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_h2_atoms, make_train_test_set, calculate_rmse @@ -18,32 +17,35 @@ def test_predict_var(self): from catlearn.regression.gp.kernel import SE from catlearn.regression.gp.fingerprint import ( Cartesian, + Distances, InvDistances, InvDistances2, - SortedDistances, + SortedInvDistances, SumDistances, SumDistancesPower, MeanDistances, MeanDistancesPower, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_h2_atoms(gridsize=50, seed=1) + x, f, g = create_h2_atoms(gridsize=50, seed=seed) # Whether to learn from the derivatives use_derivatives = False # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, kernel=SE(use_derivatives=use_derivatives, use_fingerprint=True), ) # Define the list of fingerprint objects that are tested fp_kwarg_list = [ Cartesian(reduce_dimensions=True, use_derivatives=use_derivatives), - InvDistances( + Distances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), InvDistances( reduce_dimensions=True, @@ -53,45 +55,47 @@ def test_predict_var(self): InvDistances2( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), - SortedDistances( + SortedInvDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), SumDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), SumDistancesPower( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, + power=4, ), MeanDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), MeanDistancesPower( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=False, + power=4, ), ] # Make a list of the error values that the test compares to error_list = [ - 1.35605, - 0.65313, - 0.65313, - 0.77297, - 0.65313, - 0.65313, - 0.45222, - 0.65313, - 0.45222, + 23.51556, + 22.50691, + 10.00542, + 56.04324, + 10.00542, + 6.712740, + 13.49250, + 20.04389, + 1.880300, ] # Test the fingerprint objects for index, fp in enumerate(fp_kwarg_list): @@ -106,12 +110,12 @@ def test_predict_var(self): te=10, use_derivatives=use_derivatives, ) - # Set random seed to give the same results every time - np.random.seed(1) + # Set the random seed + gp.set_seed(seed=seed) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -137,32 +141,35 @@ def test_predict_var(self): from catlearn.regression.gp.kernel import SE from catlearn.regression.gp.fingerprint import ( Cartesian, + Distances, InvDistances, InvDistances2, - SortedDistances, + SortedInvDistances, SumDistances, SumDistancesPower, MeanDistances, MeanDistancesPower, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_h2_atoms(gridsize=50, seed=1) + x, f, g = create_h2_atoms(gridsize=50, seed=seed) # Whether to True from the derivatives use_derivatives = True # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, kernel=SE(use_derivatives=use_derivatives, use_fingerprint=True), ) # Define the list of fingerprint objects that are tested fp_kwarg_list = [ Cartesian(reduce_dimensions=True, use_derivatives=use_derivatives), - InvDistances( + Distances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), InvDistances( reduce_dimensions=True, @@ -172,45 +179,47 @@ def test_predict_var(self): InvDistances2( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), - SortedDistances( + SortedInvDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), SumDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), SumDistancesPower( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, + power=4, ), MeanDistances( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=True, ), MeanDistancesPower( reduce_dimensions=True, use_derivatives=use_derivatives, - mic=True, + periodic_softmax=False, + power=4, ), ] # Make a list of the error values that the test compares to error_list = [ - 22.75648, - 9.90152, - 9.90152, - 4.62743, - 9.90152, - 9.90152, - 8.60277, - 9.90152, - 8.60277, + 37.64770, + 39.70638, + 69.16602, + 58.86160, + 69.16602, + 73.85387, + 69.11083, + 63.00867, + 70.55665, ] # Test the fingerprint objects for index, fp in enumerate(fp_kwarg_list): @@ -225,12 +234,12 @@ def test_predict_var(self): te=10, use_derivatives=use_derivatives, ) - # Set random seed to give the same results every time - np.random.seed(1) + # Set random seed + gp.set_seed(seed=seed) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, diff --git a/tests/test_gp_hpfitter.py b/tests/test_gp_hpfitter.py index cab04d74..208cf5cd 100644 --- a/tests/test_gp_hpfitter.py +++ b/tests/test_gp_hpfitter.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -23,11 +22,13 @@ def test_hpfitters_noderiv(self): FBPMGP, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -38,10 +39,6 @@ def test_hpfitters_noderiv(self): # Make the optimizer optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Define the list of hyperparameter fitter objects that are tested hpfitter_list = [ @@ -61,19 +58,19 @@ def test_hpfitters_noderiv(self): opt_tr_size=10, optimizer=optimizer, ), - FBPMGP(Q=None, n_test=50, ngrid=80, bounds=None), + FBPMGP(Q=None, n_test=50, ngrid=80), ] # Test the hyperparameter fitter objects for index, hpfitter in enumerate(hpfitter_list): with self.subTest(hpfitter=hpfitter): # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -109,11 +106,13 @@ def test_hpfitters_deriv(self): FBPMGP, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -124,10 +123,6 @@ def test_hpfitters_deriv(self): # Make the optimizer optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Define the list of hyperparameter fitter objects that are tested hpfitter_list = [ @@ -147,19 +142,19 @@ def test_hpfitters_deriv(self): opt_tr_size=10, optimizer=optimizer, ), - FBPMGP(Q=None, n_test=50, ngrid=80, bounds=None), + FBPMGP(Q=None, n_test=50, ngrid=80), ] # Test the hyperparameter fitter objects for index, hpfitter in enumerate(hpfitter_list): with self.subTest(hpfitter=hpfitter): # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, diff --git a/tests/test_gp_means.py b/tests/test_gp_means.py index 6cfbc47b..3f43264f 100644 --- a/tests/test_gp_means.py +++ b/tests/test_gp_means.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, calculate_rmse @@ -23,8 +22,10 @@ def test_means_noderiv(self): Prior_first, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -45,22 +46,22 @@ def test_means_noderiv(self): Prior_first, ] # Make a list of the error values that the test compares to - error_list = [3.14787, 1.75102, 1.77025, 1.95093, 1.65956, 1.74474] + error_list = [2.61859, 0.89152, 0.91990, 1.21032, 0.61772, 0.91545] # Test the prior mean objects for index, prior in enumerate(priors): with self.subTest(prior=prior): # Construct the Gaussian process gp = GaussianProcess( prior=prior(), - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -85,8 +86,10 @@ def test_means_deriv(self): Prior_first, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -107,22 +110,22 @@ def test_means_deriv(self): Prior_first, ] # Make a list of the error values that the test compares to - error_list = [0.09202, 0.13723, 0.13594, 0.12673, 0.14712, 0.13768] + error_list = [1.14773, 0.40411, 0.41732, 0.54772, 0.26334, 0.41526] # Test the prior mean objects for index, prior in enumerate(priors): with self.subTest(prior=prior): # Construct the Gaussian process gp = GaussianProcess( prior=prior(), - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, diff --git a/tests/test_gp_objectivefunctions.py b/tests/test_gp_objectivefunctions.py index 111acf93..0dd45bff 100644 --- a/tests/test_gp_objectivefunctions.py +++ b/tests/test_gp_objectivefunctions.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -25,11 +24,13 @@ def test_local(self): GPE, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -51,9 +52,6 @@ def test_local(self): optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Test the objective function objects for obj_func in obj_list: @@ -65,12 +63,12 @@ def test_local(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -112,13 +110,18 @@ def test_line_search_scale(self): FactorizedLogLikelihoodSVD, FactorizedGPP, ) - from catlearn.regression.gp.hpboundary import HPBoundaries + from catlearn.regression.gp.hpboundary import ( + HPBoundaries, + VariableTransformation, + ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -126,18 +129,24 @@ def test_line_search_scale(self): te=1, use_derivatives=use_derivatives, ) + # Make the default boundaries for the hyperparameters + default_bounds = VariableTransformation() # Make fixed boundary conditions for one of the tests fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) # Make the optimizers line_optimizer = FineGridSearch( - tol=1e-5, loops=3, ngrid=80, optimize=True, multiple_min=False + tol=1e-5, + loops=3, + ngrid=80, + optimize=True, + multiple_min=False, ) optimizer = FactorizedOptimizer( line_optimizer=line_optimizer, @@ -148,7 +157,7 @@ def test_line_search_scale(self): # Define the list of objective function objects that are tested obj_list = [ ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=250, @@ -156,7 +165,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=True, ngrid=250, @@ -164,7 +173,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=80, @@ -172,7 +181,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=80, @@ -188,7 +197,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihoodSVD( modification=False, ngrid=250, @@ -196,7 +205,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedGPP( modification=False, ngrid=250, @@ -215,12 +224,12 @@ def test_line_search_scale(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, diff --git a/tests/test_gp_optimizer.py b/tests/test_gp_optimizer.py index f6c3e6dd..fa6f5e03 100644 --- a/tests/test_gp_optimizer.py +++ b/tests/test_gp_optimizer.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -16,11 +15,13 @@ def test_function(self): from catlearn.regression.gp.objectivefunctions.gp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -35,12 +36,12 @@ def test_function(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -51,7 +52,7 @@ def test_function(self): verbose=False, ) # Test the solution is correct - self.assertTrue(abs(sol["fun"] - 393.422) < 1e-2) + self.assertTrue(abs(sol["fun"] - 197.54480) < 1e-2) def test_local_jac(self): """ @@ -65,11 +66,13 @@ def test_local_jac(self): ) from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -81,9 +84,6 @@ def test_local_jac(self): optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -92,12 +92,12 @@ def test_local_jac(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -128,11 +128,13 @@ def test_local_nojac(self): from catlearn.regression.gp.objectivefunctions.gp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -144,9 +146,6 @@ def test_local_nojac(self): optimizer = ScipyOptimizer( maxiter=500, jac=False, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -155,12 +154,12 @@ def test_local_nojac(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -189,11 +188,13 @@ def test_local_prior(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.pdistributions import Normal_prior + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -204,10 +205,6 @@ def test_local_prior(self): # Make the optimizer optimizer = ScipyPriorOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -216,7 +213,7 @@ def test_local_prior(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) @@ -226,7 +223,7 @@ def test_local_prior(self): noise=Normal_prior(mu=-4.0, std=2.0), ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -256,11 +253,13 @@ def test_local_ed_guess(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary.strict import StrictBoundaries + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -284,12 +283,12 @@ def test_local_ed_guess(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -325,11 +324,13 @@ def test_random(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -341,9 +342,6 @@ def test_random(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = RandomSamplingOptimizer( @@ -360,7 +358,7 @@ def test_random(self): ) # Define test list of arguments for the random sampling optimizer bounds_list = [ - VariableTransformation(bounds=None), + VariableTransformation(), EducatedBoundaries(), HPBoundaries(bounds_dict=bounds_dict), ] @@ -375,12 +373,12 @@ def test_random(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -416,11 +414,13 @@ def test_grid(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -432,20 +432,17 @@ def test_grid(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer kwargs opt_kwargs = dict(maxiter=500, n_each_dim=5, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) @@ -495,12 +492,12 @@ def test_grid(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -536,11 +533,13 @@ def test_line(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -552,20 +551,17 @@ def test_line(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer kwargs opt_kwargs = dict(maxiter=500, n_each_dim=10, loops=3, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) @@ -615,12 +611,12 @@ def test_line(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -648,11 +644,13 @@ def test_basin(self): from catlearn.regression.gp.objectivefunctions.gp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -683,12 +681,12 @@ def test_basin(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -720,11 +718,13 @@ def test_annealling(self): EducatedBoundaries, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -734,28 +734,19 @@ def test_annealling(self): ) # Make the dictionary of the optimization local_kwargs = dict(tol=1e-12, method="L-BFGS-B") - opt_kwargs = dict( - initial_temp=5230.0, - restart_temp_ratio=2e-05, - visit=2.62, - accept=-5.0, - seed=None, - no_local_search=False, - ) # Make the optimizer optimizer = AnneallingOptimizer( - maxiter=500, + maxiter=5000, jac=False, - opt_kwargs=opt_kwargs, local_kwargs=local_kwargs, ) # Make the boundary conditions for the tests bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) @@ -772,12 +763,12 @@ def test_annealling(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -812,11 +803,13 @@ def test_annealling_trans(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -826,28 +819,19 @@ def test_annealling_trans(self): ) # Make the dictionary of the optimization local_kwargs = dict(tol=1e-12, method="L-BFGS-B") - opt_kwargs = dict( - initial_temp=5230.0, - restart_temp_ratio=2e-05, - visit=2.62, - accept=-5.0, - seed=None, - no_local_search=False, - ) # Make the optimizer optimizer = AnneallingTransOptimizer( - maxiter=500, + maxiter=5000, jac=False, - opt_kwargs=opt_kwargs, local_kwargs=local_kwargs, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) @@ -865,12 +849,12 @@ def test_annealling_trans(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -913,11 +897,13 @@ def test_line_search_scale(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -928,13 +914,13 @@ def test_line_search_scale(self): # Make the dictionary of the optimization opt_kwargs = dict(maxiter=500, jac=False, tol=1e-5, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], + prefactor=[[0.0, 2.0]], ), log=True, ) @@ -1023,12 +1009,12 @@ def test_line_search_scale(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, diff --git a/tests/test_gp_optimizer_parallel.py b/tests/test_gp_optimizer_parallel.py index 7cc94e76..68d7197e 100644 --- a/tests/test_gp_optimizer_parallel.py +++ b/tests/test_gp_optimizer_parallel.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -22,11 +21,13 @@ def test_random(self): from catlearn.regression.gp.objectivefunctions.gp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -37,10 +38,6 @@ def test_random(self): # Make the local optimizer local_optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = RandomSamplingOptimizer( @@ -56,12 +53,12 @@ def test_random(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -95,11 +92,13 @@ def test_grid(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -110,10 +109,6 @@ def test_grid(self): # Make the local optimizer local_optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = GridOptimizer( @@ -124,7 +119,7 @@ def test_grid(self): parallel=True, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( func=LogLikelihood(), @@ -133,12 +128,12 @@ def test_grid(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -170,8 +165,10 @@ def test_line(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -185,10 +182,6 @@ def test_line(self): # Make the local optimizer local_optimizer = ScipyOptimizer( maxiter=500, - jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = IterativeLineOptimizer( @@ -200,7 +193,7 @@ def test_line(self): parallel=True, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( func=LogLikelihood(), @@ -209,12 +202,12 @@ def test_line(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -251,11 +244,13 @@ def test_line_search_scale(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -264,7 +259,7 @@ def test_line_search_scale(self): use_derivatives=use_derivatives, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Make the line optimizer line_optimizer = FineGridSearch( optimize=True, @@ -290,12 +285,12 @@ def test_line_search_scale(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, diff --git a/tests/test_gp_pdistributions.py b/tests/test_gp_pdistributions.py index 2f746f95..db2f9e5f 100644 --- a/tests/test_gp_pdistributions.py +++ b/tests/test_gp_pdistributions.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -24,11 +23,13 @@ def test_local_prior(self): ) from catlearn.regression.gp.hpboundary import StrictBoundaries + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -40,9 +41,6 @@ def test_local_prior(self): optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Define the list of prior distribution objects that are tested test_pdis = [ @@ -71,12 +69,12 @@ def test_local_prior(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, @@ -117,11 +115,13 @@ def test_global_prior(self): ) from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -160,7 +160,7 @@ def test_global_prior(self): (True, Invgamma_prior(a=[1e-5], b=[1e-5])), ] # Test the prior distributions - for index, (use_update_pdis, pdis_d) in enumerate(test_pdis): + for use_update_pdis, pdis_d in test_pdis: with self.subTest(use_update_pdis=use_update_pdis, pdis_d=pdis_d): # Construct the prior distribution objects pdis = dict(length=pdis_d.copy(), noise=pdis_d.copy()) @@ -173,12 +173,12 @@ def test_global_prior(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + gp.set_seed(seed=seed) # Optimize the hyperparameters sol = gp.optimize( x_tr, diff --git a/tests/test_gp_train.py b/tests/test_gp_train.py index 3df10bb8..1f85f7d2 100644 --- a/tests/test_gp_train.py +++ b/tests/test_gp_train.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, calculate_rmse @@ -17,7 +16,7 @@ def test_gp(self): use_derivatives = False # Construct the Gaussian process GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) @@ -25,11 +24,13 @@ def test_train(self): "Test if the GP can be trained." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -39,7 +40,7 @@ def test_train(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model @@ -49,8 +50,10 @@ def test_predict1(self): "Test if the GP can predict one test point." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -63,13 +66,13 @@ def test_predict1(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energy - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=False, get_derivatives=False, @@ -77,14 +80,16 @@ def test_predict1(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.02650) < 1e-4) + self.assertTrue(abs(error - 0.00069) < 1e-4) def test_predict(self): "Test if the GP can predict multiple test points." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -97,13 +102,13 @@ def test_predict(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=False, get_derivatives=False, @@ -111,14 +116,16 @@ def test_predict(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_var(self): "Test if the GP can predict variance of multiple test point." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -131,13 +138,13 @@ def test_predict_var(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -145,7 +152,7 @@ def test_predict_var(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_var_n(self): """ @@ -154,8 +161,10 @@ def test_predict_var_n(self): """ from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -168,13 +177,13 @@ def test_predict_var_n(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -182,14 +191,16 @@ def test_predict_var_n(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_derivatives(self): "Test if the GP can predict derivatives of multiple test points." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -202,23 +213,23 @@ def test_predict_derivatives(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies, derivatives, and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=True, include_noise=False, ) # Check that the derivatives are predicted - self.assertTrue(np.shape(ypred)[1] == 2) + self.assertTrue(ypred.shape[1] == 2) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) class TestGPTrainPredictDerivatives(unittest.TestCase): @@ -231,11 +242,13 @@ def test_train(self): "Test if the GP can be trained." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -245,7 +258,7 @@ def test_train(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model @@ -255,8 +268,10 @@ def test_predict1(self): "Test if the GP can predict one test point." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -269,13 +284,13 @@ def test_predict1(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energy - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=False, get_derivatives=False, @@ -283,14 +298,16 @@ def test_predict1(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.00218) < 1e-4) + self.assertTrue(abs(error - 0.00233) < 1e-4) def test_predict(self): "Test if the GP can predict multiple test points." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -303,13 +320,13 @@ def test_predict(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=False, get_derivatives=False, @@ -317,14 +334,16 @@ def test_predict(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_var(self): "Test if the GP can predict variance of multiple test points." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -337,13 +356,13 @@ def test_predict_var(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -351,7 +370,7 @@ def test_predict_var(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_var_n(self): """ @@ -360,8 +379,10 @@ def test_predict_var_n(self): """ from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -374,13 +395,13 @@ def test_predict_var_n(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=False, @@ -388,14 +409,16 @@ def test_predict_var_n(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_derivatives(self): "Test if the GP can predict derivatives of multiple test points." from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -408,23 +431,23 @@ def test_predict_derivatives(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model gp.train(x_tr, f_tr) # Predict the energies, derivatives, and uncertainties - ypred, var, var_deriv = gp.predict( + ypred, _, _ = gp.predict( x_te, get_variance=True, get_derivatives=True, include_noise=False, ) # Check that the derivatives are predicted - self.assertTrue(np.shape(ypred)[1] == 2) + self.assertTrue(ypred.shape[1] == 2) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) if __name__ == "__main__": diff --git a/tests/test_local.py b/tests/test_local.py new file mode 100644 index 00000000..93bf0e55 --- /dev/null +++ b/tests/test_local.py @@ -0,0 +1,75 @@ +import unittest +from .functions import get_endstructures, check_fmax + + +class TestLocal(unittest.TestCase): + """ + Test if the local active learning (AL) optimization works and + give the right output. + """ + + def test_local_init(self): + "Test if the local AL can be initialized." + from catlearn.activelearning.local import LocalAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the atoms from initial and final states + atoms, _ = get_endstructures() + # Move the gold atom up to prepare optimization + pos = atoms.get_positions() + pos[-1, 2] += 0.5 + atoms.set_positions(pos) + atoms.get_forces() + # Initialize Local AL optimization + LocalAL( + atoms=atoms, + ase_calc=EMT(), + unc_convergence=0.02, + use_restart=True, + check_unc=True, + verbose=False, + seed=seed, + ) + + def test_local_run(self): + "Test if the local AL can run and converge with restart of path." + from catlearn.activelearning.local import LocalAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the atoms from initial and final states + atoms, _ = get_endstructures() + # Move the gold atom up to prepare optimization + pos = atoms.get_positions() + pos[-1, 2] += 0.5 + atoms.set_positions(pos) + atoms.get_forces() + # Initialize Local AL optimization + local_al = LocalAL( + atoms=atoms, + ase_calc=EMT(), + unc_convergence=0.02, + use_restart=True, + check_unc=True, + verbose=False, + seed=seed, + ) + # Test if the Local AL optimization can be run + local_al.run( + fmax=0.05, + steps=50, + ml_steps=250, + max_unc=0.05, + ) + # Check that Local AL optimization converged + self.assertTrue(local_al.converged() is True) + # Check that Local AL optimization gives a saddle point + atoms = local_al.get_best_structures() + self.assertTrue(check_fmax(atoms, EMT(), fmax=0.05)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_mlgo.py b/tests/test_mlgo.py index 7115a5c3..3f4f50a7 100644 --- a/tests/test_mlgo.py +++ b/tests/test_mlgo.py @@ -3,14 +3,18 @@ class TestMLGO(unittest.TestCase): - """Test if the MLGO works and give the right output.""" + """ + Test if the MLGO works and give the right output. + """ def test_mlgo_init(self): "Test if the MLGO can be initialized." import numpy as np - from catlearn.optimize.mlgo import MLGO + from catlearn.activelearning.mlgo import MLGO from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states slab, ads = get_slab_ads() # Make the boundary conditions for the global search @@ -27,65 +31,61 @@ def test_mlgo_init(self): # Initialize MLGO MLGO( slab=slab, - ads=ads, + adsorbate=ads, ase_calc=EMT(), + unc_convergence=0.025, bounds=bounds, - initial_points=2, - norelax_points=10, - min_steps=6, - full_output=False, + min_data=4, + verbose=False, + local_opt_kwargs=dict(logfile=None), + seed=seed, ) def test_mlgo_run(self): "Test if the MLGO can run and converge." import numpy as np - from catlearn.optimize.mlgo import MLGO + from catlearn.activelearning.mlgo import MLGO from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states slab, ads = get_slab_ads() # Make the boundary conditions for the global search bounds = np.array( [ - [0.0, 1.0], - [0.0, 1.0], + [0.0, 0.5], + [0.0, 0.5], [0.5, 0.95], [0.0, 2 * np.pi], [0.0, 2 * np.pi], [0.0, 2 * np.pi], ] ) - # Set random seed - np.random.seed(1) # Initialize MLGO mlgo = MLGO( slab=slab, - ads=ads, + adsorbate=ads, ase_calc=EMT(), + unc_convergence=0.025, bounds=bounds, - initial_points=2, - norelax_points=10, - min_steps=6, - full_output=False, + min_data=4, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLGO can be run mlgo.run( fmax=0.05, - unc_convergence=0.025, steps=50, - max_unc=0.050, - ml_steps=500, - ml_chains=2, - relax=True, - local_steps=100, - seed=0, + max_unc=0.3, + ml_steps=4000, + ml_steps_local=1000, ) # Check that MLGO converged self.assertTrue(mlgo.converged() is True) # Check that MLGO give a minimum - atoms = mlgo.get_atoms() + atoms = mlgo.get_best_structures() self.assertTrue(check_fmax(atoms, EMT(), fmax=0.05)) diff --git a/tests/test_mlneb.py b/tests/test_mlneb.py index 4fd35767..bbe0a17e 100644 --- a/tests/test_mlneb.py +++ b/tests/test_mlneb.py @@ -1,97 +1,61 @@ import unittest -import numpy as np from .functions import get_endstructures, check_image_fmax class TestMLNEB(unittest.TestCase): - """Test if the MLNEB works and give the right output.""" + """ + Test if the MLNEB works and give the right output. + """ def test_mlneb_init(self): "Test if the MLNEB can be initialized." - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - # Set random seed - np.random.seed(1) # Initialize MLNEB MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation="linear", + neb_interpolation="linear", n_images=11, - use_restart_path=True, - check_path_unc=True, - full_output=False, + unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, + seed=seed, ) def test_mlneb_run(self): "Test if the MLNEB can run and converge with restart of path." - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - # Set random seed - np.random.seed(1) # Initialize MLNEB mlneb = MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation="linear", + neb_interpolation="linear", n_images=11, - use_restart_path=True, - check_path_unc=True, - full_output=False, - local_opt_kwargs=dict(logfile=None), - tabletxt=None, - ) - # Test if the MLNEB can be run - mlneb.run( - fmax=0.05, unc_convergence=0.05, - steps=50, - ml_steps=250, - max_unc=0.05, - ) - # Check that MLNEB converged - self.assertTrue(mlneb.converged() is True) - # Check that MLNEB gives a saddle point - images = mlneb.get_images() - self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) - - def test_mlneb_run_idpp(self): - """ - Test if the MLNEB can run and converge with - restart of path from IDPP. - """ - from catlearn.optimize.mlneb import MLNEB - from ase.calculators.emt import EMT - - # Get the initial and final states - initial, final = get_endstructures() - # Set random seed - np.random.seed(1) - # Initialize MLNEB - mlneb = MLNEB( - start=initial, - end=final, - ase_calc=EMT(), - interpolation="idpp", - n_images=11, - use_restart_path=True, - check_path_unc=True, - full_output=False, + use_restart=True, + check_unc=True, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLNEB can be run mlneb.run( fmax=0.05, - unc_convergence=0.05, steps=50, ml_steps=250, max_unc=0.05, @@ -99,7 +63,7 @@ def test_mlneb_run_idpp(self): # Check that MLNEB converged self.assertTrue(mlneb.converged() is True) # Check that MLNEB gives a saddle point - images = mlneb.get_images() + images = mlneb.get_best_structures() self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) def test_mlneb_run_path(self): @@ -107,33 +71,33 @@ def test_mlneb_run_path(self): Test if the MLNEB can run and converge with restart of path from different initial paths. """ - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - interpolations = ["idpp", "rep", "ends"] + interpolations = ["born", "ends", "idpp", "rep"] for interpolation in interpolations: with self.subTest(interpolation=interpolation): - # Set random seed - np.random.seed(1) # Initialize MLNEB mlneb = MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation=interpolation, + neb_interpolation=interpolation, n_images=11, - use_restart_path=True, - check_path_unc=True, - full_output=False, + unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLNEB can be run mlneb.run( fmax=0.05, - unc_convergence=0.05, steps=50, ml_steps=250, max_unc=0.05, @@ -141,34 +105,34 @@ def test_mlneb_run_path(self): # Check that MLNEB converged self.assertTrue(mlneb.converged() is True) # Check that MLNEB gives a saddle point - images = mlneb.get_images() + images = mlneb.get_best_structures() self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) def test_mlneb_run_norestart(self): "Test if the MLNEB can run and converge with no restart of path." - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - # Set random seed - np.random.seed(1) # Initialize MLNEB mlneb = MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation="linear", + neb_interpolation="linear", n_images=11, - use_restart_path=False, - full_output=False, + unc_convergence=0.05, + use_restart=False, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLNEB can be run mlneb.run( fmax=0.05, - unc_convergence=0.05, steps=50, ml_steps=250, max_unc=0.05, @@ -176,36 +140,36 @@ def test_mlneb_run_norestart(self): # Check that MLNEB converged self.assertTrue(mlneb.converged() is True) # Check that MLNEB gives a saddle point - images = mlneb.get_images() + images = mlneb.get_best_structures() self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) def test_mlneb_run_savememory(self): "Test if the MLNEB can run and converge when it saves memory." - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - # Set random seed - np.random.seed(1) # Initialize MLNEB mlneb = MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation="linear", + neb_interpolation="linear", n_images=11, - use_restart_path=True, - check_path_unc=True, + unc_convergence=0.05, + use_restart=True, + check_unc=True, save_memory=True, - full_output=False, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLNEB can be run mlneb.run( fmax=0.05, - unc_convergence=0.05, steps=50, ml_steps=250, max_unc=0.05, @@ -213,45 +177,157 @@ def test_mlneb_run_savememory(self): # Check that MLNEB converged self.assertTrue(mlneb.converged() is True) # Check that MLNEB gives a saddle point - images = mlneb.get_images() + images = mlneb.get_best_structures() self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) def test_mlneb_run_no_maxunc(self): "Test if the MLNEB can run and converge when it does not use max_unc." - from catlearn.optimize.mlneb import MLNEB + from catlearn.activelearning.mlneb import MLNEB from ase.calculators.emt import EMT + # Set random seed to give the same results every time + seed = 1 # Get the initial and final states initial, final = get_endstructures() - # Set random seed - np.random.seed(1) # Initialize MLNEB mlneb = MLNEB( start=initial, end=final, ase_calc=EMT(), - interpolation="linear", + neb_interpolation="linear", n_images=11, - use_restart_path=True, - check_path_unc=True, - full_output=False, + unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, local_opt_kwargs=dict(logfile=None), - tabletxt=None, + seed=seed, ) # Test if the MLNEB can be run mlneb.run( fmax=0.05, + steps=50, + ml_steps=250, + max_unc=None, + ) + # Check that MLNEB converged + self.assertTrue(mlneb.converged() is True) + # Check that MLNEB gives a saddle point + images = mlneb.get_best_structures() + self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) + + def test_mlneb_run_dtrust(self): + "Test if the MLNEB can run and converge when it use a trust distance." + from catlearn.activelearning.mlneb import MLNEB + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + initial, final = get_endstructures() + # Initialize MLNEB + mlneb = MLNEB( + start=initial, + end=final, + ase_calc=EMT(), + neb_interpolation="linear", + n_images=11, unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, + local_opt_kwargs=dict(logfile=None), + seed=seed, + ) + # Test if the MLNEB can be run + mlneb.run( + fmax=0.05, steps=50, ml_steps=250, - max_unc=False, + dtrust=0.5, ) # Check that MLNEB converged self.assertTrue(mlneb.converged() is True) # Check that MLNEB gives a saddle point - images = mlneb.get_images() + images = mlneb.get_best_structures() self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) + def test_mlneb_run_start_with_ci(self): + """ + Test if the MLNEB can run and converge without starting + without climbing image. + """ + from catlearn.activelearning.mlneb import MLNEB + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + initial, final = get_endstructures() + # Initialize MLNEB + mlneb = MLNEB( + start=initial, + end=final, + ase_calc=EMT(), + neb_interpolation="linear", + start_without_ci=False, + n_images=11, + unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, + local_opt_kwargs=dict(logfile=None), + seed=seed, + ) + # Test if the MLNEB can be run + mlneb.run( + fmax=0.05, + steps=50, + ml_steps=250, + max_unc=0.05, + ) + # Check that MLNEB converged + self.assertTrue(mlneb.converged() is True) + # Check that MLNEB gives a saddle point + images = mlneb.get_best_structures() + self.assertTrue(check_image_fmax(images, EMT(), fmax=0.05)) + + def test_mlneb_run_no_ci(self): + """ + Test if the MLNEB can run and converge without climbing image. + """ + from catlearn.activelearning.mlneb import MLNEB + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + initial, final = get_endstructures() + # Initialize MLNEB + mlneb = MLNEB( + start=initial, + end=final, + ase_calc=EMT(), + neb_interpolation="linear", + n_images=11, + climb=False, + unc_convergence=0.05, + use_restart=True, + check_unc=True, + verbose=False, + local_opt_kwargs=dict(logfile=None), + seed=seed, + ) + # Test if the MLNEB can be run + mlneb.run( + fmax=0.05, + steps=50, + ml_steps=250, + max_unc=0.05, + ) + # Check that MLNEB converged + self.assertTrue(mlneb.converged() is True) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_randomadsorption.py b/tests/test_randomadsorption.py new file mode 100644 index 00000000..dbdf9428 --- /dev/null +++ b/tests/test_randomadsorption.py @@ -0,0 +1,143 @@ +import unittest +from .functions import get_slab_ads, check_fmax + + +class TestRandomAdsorption(unittest.TestCase): + """ + Test if the RandomAdsorption works and give the right output. + """ + + def test_randomadsorption_init(self): + "Test if the RandomAdsorption can be initialized." + import numpy as np + from catlearn.activelearning import RandomAdsorptionAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + slab, ads = get_slab_ads() + # Make the boundary conditions for the global search + bounds = np.array( + [ + [0.0, 1.0], + [0.0, 1.0], + [0.5, 0.95], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] + ) + # Initialize RandomAdsorption AL + RandomAdsorptionAL( + slab=slab, + adsorbate=ads, + ase_calc=EMT(), + unc_convergence=0.025, + bounds=bounds, + min_data=4, + verbose=False, + seed=seed, + ) + + def test_randomadsorption_run(self): + "Test if the RandomAdsorption can run and converge." + import numpy as np + from catlearn.activelearning import RandomAdsorptionAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + slab, ads = get_slab_ads() + # Make the boundary conditions for the global search + bounds = np.array( + [ + [0.0, 0.5], + [0.0, 0.5], + [0.5, 0.95], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] + ) + # Initialize RandomAdsorption AL + ads_al = RandomAdsorptionAL( + slab=slab, + adsorbate=ads, + ase_calc=EMT(), + n_random_draws=20, + use_initial_opt=True, + initial_fmax=0.2, + unc_convergence=0.025, + bounds=bounds, + min_data=4, + verbose=False, + seed=seed, + ) + # Test if the RandomAdsorption AL can be run + ads_al.run( + fmax=0.05, + steps=50, + max_unc=0.3, + ml_steps=5000, + ) + # Check that RandomAdsorption AL converged + self.assertTrue(ads_al.converged() is True) + # Check that RandomAdsorption AL give a minimum + atoms = ads_al.get_best_structures() + self.assertTrue(check_fmax(atoms, EMT(), fmax=0.05)) + + def test_randomadsorption_run_no_initial_opt(self): + """ + Test if the RandomAdsorption without initial optimization + can run and converge. + """ + import numpy as np + from catlearn.activelearning import RandomAdsorptionAL + from ase.calculators.emt import EMT + + # Set random seed to give the same results every time + seed = 1 + # Get the initial and final states + slab, ads = get_slab_ads() + # Make the boundary conditions for the global search + bounds = np.array( + [ + [0.0, 0.5], + [0.0, 0.5], + [0.5, 0.95], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + [0.0, 2 * np.pi], + ] + ) + # Initialize RandomAdsorption AL + ads_al = RandomAdsorptionAL( + slab=slab, + adsorbate=ads, + ase_calc=EMT(), + n_random_draws=50, + use_initial_opt=False, + unc_convergence=0.025, + bounds=bounds, + min_data=4, + verbose=False, + seed=seed, + ) + # Test if the RandomAdsorption AL can be run + ads_al.run( + fmax=0.05, + steps=50, + max_unc=0.3, + ml_steps=5000, + ) + # Check that RandomAdsorption AL converged + self.assertTrue(ads_al.converged() is True) + # Check that RandomAdsorption AL give a minimum + atoms = ads_al.get_best_structures() + self.assertTrue(check_fmax(atoms, EMT(), fmax=0.05)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_save_model.py b/tests/test_save_model.py index b073968b..8bb855c4 100644 --- a/tests/test_save_model.py +++ b/tests/test_save_model.py @@ -14,8 +14,10 @@ def test_save_model(self): """ from catlearn.regression.gp.models import GaussianProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -28,7 +30,7 @@ def test_save_model(self): ) # Construct the Gaussian process gp = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) # Train the machine learning model @@ -37,12 +39,12 @@ def test_save_model(self): gp.save_model("test_model.pkl") # Load the model gp2 = GaussianProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0], prefactor=[0.0]), use_derivatives=use_derivatives, ) gp2 = gp2.load_model("test_model.pkl") # Predict the energy - ypred, var, var_deriv = gp2.predict( + ypred, _, _ = gp2.predict( x_te, get_variance=False, get_derivatives=False, @@ -50,7 +52,7 @@ def test_save_model(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.02650) < 1e-4) + self.assertTrue(abs(error - 0.00069) < 1e-4) if __name__ == "__main__": diff --git a/tests/test_tp_objectivefunctions.py b/tests/test_tp_objectivefunctions.py index 0bf51f69..e234b07f 100644 --- a/tests/test_tp_objectivefunctions.py +++ b/tests/test_tp_objectivefunctions.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -19,11 +18,13 @@ def test_local(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -35,9 +36,6 @@ def test_local(self): optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -46,12 +44,12 @@ def test_local(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -92,13 +90,18 @@ def test_line_search_scale(self): FactorizedLogLikelihood, FactorizedLogLikelihoodSVD, ) - from catlearn.regression.gp.hpboundary import HPBoundaries + from catlearn.regression.gp.hpboundary import ( + HPBoundaries, + VariableTransformation, + ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -106,12 +109,13 @@ def test_line_search_scale(self): te=1, use_derivatives=use_derivatives, ) + # Make the default boundaries for the hyperparameters + default_bounds = VariableTransformation() # Make fixed boundary conditions for one of the tests fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -132,7 +136,7 @@ def test_line_search_scale(self): # Define the list of objective function objects that are tested obj_list = [ ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=250, @@ -140,7 +144,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=True, ngrid=250, @@ -148,7 +152,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=80, @@ -156,7 +160,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihood( modification=False, ngrid=80, @@ -172,7 +176,7 @@ def test_line_search_scale(self): ), ), ( - None, + default_bounds, FactorizedLogLikelihoodSVD( modification=False, ngrid=250, @@ -191,12 +195,12 @@ def test_line_search_scale(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, diff --git a/tests/test_tp_optimizer.py b/tests/test_tp_optimizer.py index 9d0f75c3..1a8b163a 100644 --- a/tests/test_tp_optimizer.py +++ b/tests/test_tp_optimizer.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -16,11 +15,13 @@ def test_function(self): from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -35,12 +36,12 @@ def test_function(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -51,7 +52,7 @@ def test_function(self): verbose=False, ) # Test the solution is correct - self.assertTrue(abs(sol["fun"] - 502.256) < 1e-2) + self.assertTrue(abs(sol["fun"] - 489.88476) < 1e-2) def test_local_jac(self): """ @@ -63,11 +64,13 @@ def test_local_jac(self): from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -79,9 +82,6 @@ def test_local_jac(self): optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -90,12 +90,12 @@ def test_local_jac(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -126,11 +126,13 @@ def test_local_nojac(self): from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -142,9 +144,6 @@ def test_local_nojac(self): optimizer = ScipyOptimizer( maxiter=500, jac=False, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( @@ -153,12 +152,12 @@ def test_local_nojac(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -189,11 +188,13 @@ def test_local_prior(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.pdistributions import Normal_prior + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -216,7 +217,7 @@ def test_local_prior(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) @@ -226,7 +227,7 @@ def test_local_prior(self): noise=Normal_prior(mu=-4.0, std=2.0), ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -255,11 +256,13 @@ def test_local_ed_guess(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import StrictBoundaries + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -283,12 +286,12 @@ def test_local_ed_guess(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -324,11 +327,13 @@ def test_random(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -340,9 +345,6 @@ def test_random(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = RandomSamplingOptimizer( @@ -359,7 +361,7 @@ def test_random(self): ) # Define test list of arguments for the random sampling optimizer bounds_list = [ - VariableTransformation(bounds=None), + VariableTransformation(), EducatedBoundaries(), HPBoundaries(bounds_dict=bounds_dict), ] @@ -374,12 +376,12 @@ def test_random(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -415,11 +417,13 @@ def test_grid(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -431,20 +435,16 @@ def test_grid(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer kwargs opt_kwargs = dict(maxiter=500, n_each_dim=5, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -494,12 +494,12 @@ def test_grid(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -535,11 +535,13 @@ def test_line(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -551,20 +553,16 @@ def test_line(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer kwargs opt_kwargs = dict(maxiter=500, n_each_dim=10, loops=3, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -614,12 +612,12 @@ def test_line(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -647,11 +645,13 @@ def test_basin(self): from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -682,12 +682,12 @@ def test_basin(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -719,11 +719,13 @@ def test_annealling(self): EducatedBoundaries, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -738,7 +740,6 @@ def test_annealling(self): restart_temp_ratio=2e-05, visit=2.62, accept=-5.0, - seed=None, no_local_search=False, ) # Make the optimizer @@ -752,9 +753,8 @@ def test_annealling(self): bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -771,12 +771,12 @@ def test_annealling(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -811,11 +811,13 @@ def test_annealling_trans(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -830,7 +832,6 @@ def test_annealling_trans(self): restart_temp_ratio=2e-05, visit=2.62, accept=-5.0, - seed=None, no_local_search=False, ) # Make the optimizer @@ -841,12 +842,11 @@ def test_annealling_trans(self): local_kwargs=local_kwargs, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -864,12 +864,12 @@ def test_annealling_trans(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -912,11 +912,13 @@ def test_line_search_scale(self): VariableTransformation, ) + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -927,13 +929,12 @@ def test_line_search_scale(self): # Make the dictionary of the optimization opt_kwargs = dict(maxiter=500, jac=False, tol=1e-5, parallel=False) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() bounds_ed = EducatedBoundaries() fixed_bounds = HPBoundaries( bounds_dict=dict( - length=[[-3.0, 3.0]], - noise=[[-8.0, 0.0]], - prefactor=[[-2.0, 4.0]], + length=[[-1.0, 3.0]], + noise=[[-4.0, -1.0]], ), log=True, ) @@ -1022,12 +1023,12 @@ def test_line_search_scale(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, diff --git a/tests/test_tp_optimizer_parallel.py b/tests/test_tp_optimizer_parallel.py index 36732649..b0df50bd 100644 --- a/tests/test_tp_optimizer_parallel.py +++ b/tests/test_tp_optimizer_parallel.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, check_minima @@ -22,11 +21,13 @@ def test_random(self): from catlearn.regression.gp.objectivefunctions.tp import LogLikelihood from catlearn.regression.gp.hpfitter import HyperparameterFitter + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -38,9 +39,6 @@ def test_random(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = RandomSamplingOptimizer( @@ -56,12 +54,12 @@ def test_random(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -95,11 +93,13 @@ def test_grid(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -111,9 +111,6 @@ def test_grid(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = GridOptimizer( @@ -124,7 +121,7 @@ def test_grid(self): parallel=True, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( func=LogLikelihood(), @@ -133,12 +130,12 @@ def test_grid(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -170,11 +167,13 @@ def test_line(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -186,9 +185,6 @@ def test_line(self): local_optimizer = ScipyOptimizer( maxiter=500, jac=True, - method="l-bfgs-b", - use_bounds=False, - tol=1e-12, ) # Make the global optimizer optimizer = IterativeLineOptimizer( @@ -200,7 +196,7 @@ def test_line(self): parallel=True, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Construct the hyperparameter fitter hpfitter = HyperparameterFitter( func=LogLikelihood(), @@ -209,12 +205,12 @@ def test_line(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, @@ -251,11 +247,13 @@ def test_line_search_scale(self): from catlearn.regression.gp.hpfitter import HyperparameterFitter from catlearn.regression.gp.hpboundary import VariableTransformation + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -264,7 +262,7 @@ def test_line_search_scale(self): use_derivatives=use_derivatives, ) # Make the boundary conditions for the tests - bounds_trans = VariableTransformation(bounds=None) + bounds_trans = VariableTransformation() # Make the line optimizer line_optimizer = FineGridSearch( optimize=True, @@ -290,12 +288,12 @@ def test_line_search_scale(self): ) # Construct the Student t process tp = TProcess( - hp=dict(length=2.0), + hp=dict(length=[2.0], noise=[-5.0]), hpfitter=hpfitter, use_derivatives=use_derivatives, ) # Set random seed to give the same results every time - np.random.seed(1) + tp.set_seed(seed=seed) # Optimize the hyperparameters sol = tp.optimize( x_tr, diff --git a/tests/test_tp_train.py b/tests/test_tp_train.py index ceaf83c0..93916bb0 100644 --- a/tests/test_tp_train.py +++ b/tests/test_tp_train.py @@ -1,5 +1,4 @@ import unittest -import numpy as np from .functions import create_func, make_train_test_set, calculate_rmse @@ -16,17 +15,22 @@ def test_tp(self): # Whether to learn from the derivatives use_derivatives = False # Construct the Studen t process - TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) def test_train(self): "Test if the TP can be trained." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False - x_tr, f_tr, x_te, f_te = make_train_test_set( + x_tr, f_tr, _, _ = make_train_test_set( x, f, g, @@ -35,7 +39,10 @@ def test_train(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) @@ -43,8 +50,10 @@ def test_predict1(self): "Test if the TP can predict one test point." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -56,11 +65,14 @@ def test_predict1(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energy - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=False, get_derivatives=False, @@ -68,14 +80,16 @@ def test_predict1(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.02650) < 1e-4) + self.assertTrue(abs(error - 0.00069) < 1e-4) def test_predict(self): "Test if the TP can predict multiple test points." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -87,11 +101,14 @@ def test_predict(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=False, get_derivatives=False, @@ -99,14 +116,16 @@ def test_predict(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_var(self): "Test if the TP can predict variance of multiple test point." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -118,11 +137,14 @@ def test_predict_var(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=False, @@ -130,7 +152,7 @@ def test_predict_var(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_var_n(self): """ @@ -139,8 +161,10 @@ def test_predict_var_n(self): """ from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -152,11 +176,14 @@ def test_predict_var_n(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=False, @@ -164,14 +191,16 @@ def test_predict_var_n(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) def test_predict_derivatives(self): "Test if the TP can predict derivatives of multiple test points." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = False x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -183,21 +212,24 @@ def test_predict_derivatives(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies, derivatives, and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=True, include_noise=False, ) # Check that the derivatives are predicted - self.assertTrue(np.shape(ypred)[1] == 2) + self.assertTrue(ypred.shape[1] == 2) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 1.75102) < 1e-4) + self.assertTrue(abs(error - 0.89152) < 1e-4) class TestTPTrainPredictDerivatives(unittest.TestCase): @@ -210,8 +242,10 @@ def test_train(self): "Test if the TP can be trained." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -223,7 +257,10 @@ def test_train(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) @@ -231,8 +268,10 @@ def test_predict1(self): "Test if the TP can predict one test point." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -244,11 +283,14 @@ def test_predict1(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energy - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=False, get_derivatives=False, @@ -256,14 +298,16 @@ def test_predict1(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.00218) < 1e-4) + self.assertTrue(abs(error - 0.00233) < 1e-4) def test_predict(self): "Test if the TP can predict multiple test points." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -275,11 +319,14 @@ def test_predict(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=False, get_derivatives=False, @@ -287,14 +334,16 @@ def test_predict(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_var(self): "Test if the TP can predict variance of multiple test points." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -306,11 +355,14 @@ def test_predict_var(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=False, @@ -318,7 +370,7 @@ def test_predict_var(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_var_n(self): """ @@ -327,8 +379,10 @@ def test_predict_var_n(self): """ from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -340,11 +394,14 @@ def test_predict_var_n(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=False, @@ -352,14 +409,16 @@ def test_predict_var_n(self): ) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) def test_predict_derivatives(self): "Test if the TP can predict derivatives of multiple test points." from catlearn.regression.gp.models import TProcess + # Set random seed to give the same results every time + seed = 1 # Create the data set - x, f, g = create_func() + x, f, g = create_func(seed=seed) # Whether to learn from the derivatives use_derivatives = True x_tr, f_tr, x_te, f_te = make_train_test_set( @@ -371,21 +430,24 @@ def test_predict_derivatives(self): use_derivatives=use_derivatives, ) # Construct the Studen t process - tp = TProcess(hp=dict(length=2.0), use_derivatives=use_derivatives) + tp = TProcess( + hp=dict(length=[2.0], noise=[-5.0]), + use_derivatives=use_derivatives, + ) # Train the machine learning model tp.train(x_tr, f_tr) # Predict the energies, derivatives, and uncertainties - ypred, var, var_deriv = tp.predict( + ypred, _, _ = tp.predict( x_te, get_variance=True, get_derivatives=True, include_noise=False, ) # Check that the derivatives are predicted - self.assertTrue(np.shape(ypred)[1] == 2) + self.assertTrue(ypred.shape[1] == 2) # Test the prediction energy errors error = calculate_rmse(f_te[:, 0], ypred[:, 0]) - self.assertTrue(abs(error - 0.13723) < 1e-4) + self.assertTrue(abs(error - 0.40411) < 1e-4) if __name__ == "__main__":