diff --git a/nice/rascal_coefficients.pyx b/nice/rascal_coefficients.pyx index 0d644d9..924142e 100644 --- a/nice/rascal_coefficients.pyx +++ b/nice/rascal_coefficients.pyx @@ -88,8 +88,7 @@ cpdef normalize_by_ps(double[:, :, :, :] coefficients): def get_rascal_coefficients(structures, HYPERS, n_types): - - + sph = SPH(**HYPERS) try: n_max = HYPERS['max_radial'] @@ -108,9 +107,16 @@ def get_rascal_coefficients(structures, HYPERS, n_types): def get_rascal_coefficients_stared(task): return get_rascal_coefficients(*task) -def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size = 100, num_threads = None): +def get_rascal_coefficients_parallelized(structures, rascal_hypers, mask = None, mask_id = None, task_size = 100, num_threads = None): hypers = copy.deepcopy(rascal_hypers) - + + if mask is not None: + for f in structures: + mask_center_atoms_by_species(f,[mask]) + if mask_id is not None: + for f in structures: + mask_center_atoms_by_id(f,[mask_id]) + if ('expansion_by_species_method' in hypers.keys()): if (hypers['expansion_by_species_method'] != 'user defined'): raise ValueError("for proper packing spherical expansion coefficients into [env index, radial/specie index, l, m] shape output should be uniform, thus 'expansion_by_species_method' must be 'user defined'") @@ -134,10 +140,7 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size = hypers['global_species'].append(int(specie)) species = np.array(hypers['global_species']).astype(np.int32) - - - - + if (num_threads is None): num_threads = cpu_count() @@ -150,5 +153,11 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size = p.close() p.join() result = np.concatenate(result, axis = 0) - return split_by_central_specie(all_species, species, result) - \ No newline at end of file + if (mask is None and mask_id is None): + return split_by_central_specie(all_species, species, result) + else: + # just return the masked specie/atom + if mask is None: + return {mask_id: result} + else: + return {mask: result} diff --git a/tools/compute_nice.py b/tools/compute_nice.py new file mode 100755 index 0000000..4948c63 --- /dev/null +++ b/tools/compute_nice.py @@ -0,0 +1,107 @@ +#!/usr/bin/python3 + +import pickle +import sys, os, argparse +import ase.io as ase_io +import numpy as np +import tqdm +from nice.transformers import * +from nice.rascal_coefficients import get_rascal_coefficients_parallelized + + +def main(): + """ + Command-line utility to compute NICE features given a precomputed pickle. + """ + + # Tweak the autogenerated help output to look nicer + formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22) + parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter) + + parser.add_argument('input', type=str, default="", nargs="?", + help='XYZ file to load') + parser.add_argument('-o', '--output', type=str, default="", + help='Output files prefix. Defaults to input filename with stripped extension') + parser.add_argument('--select', type=str, default=":", + help='Selection of input frames. ASE format.') + parser.add_argument('--nice', type=str, default="nice.pickle", + help='Definition of the NICE contraction. Output from optimize_nice.py') + parser.add_argument('--blocks', type=int, default=1, + help='Number of blocks to break the calculation into.') + + + args = parser.parse_args() + + filename = args.input + output = args.output + select = args.select + nice = args.nice + nblocks = args.blocks + + if output == "": + output = os.path.splitext(filename)[0] + + print("Loading structures ", filename, " frames: ", select) + frames = ase_io.read(filename, index=select) + for f in frames: + if f.cell.sum() == 0.0: + f.cell = [100,100,100] + f.pbc = True + f.wrap(eps=1e-12) + + aa = pickle.load(open(nice, "rb")) + hypers = aa["HYPERS"] + nice_sequence = aa["NICE"] + + mask = hypers["reference-mask"] + try: + mask_id = hypers["reference-mask-id"] + except: + mask_id = -1 + lmax = hypers["max_angular"] + scale = hypers["scale"] + + # removes keys that are not needed for rascal + for k in ["nsph", "keep", "keep0", "screen", "screen0", "pcond", "scale", + "reference-file", "reference-sel", "reference-mask", "reference-mask-id"]: + hypers.pop(k) + print("HYPERPARAMETERS ", hypers) + + print("Precomputing CG coefficients") + cglist = ClebschGordan(lmax) + + lblock = len(frames)//nblocks + featinv = {} + for iblock in tqdm.tqdm(range(nblocks)): + print("Computing spherical expansion") + bstart = iblock*lblock + bend = (iblock+1)*lblock + if iblock == nblocks-1: + bend = len(frames) + coefficients = get_rascal_coefficients_parallelized(frames[bstart:bend], hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id)) + # merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing + l = [] + for f in coefficients.values(): + l.append(f) + coefficients = np.vstack(l) + coefficients *= scale + + invariants_even = nice_sequence.transform(coefficients, return_only_invariants = True) + + for k in invariants_even: + if k in featinv: + featinv[k].append(invariants_even[k]) + else: + featinv[k] = [invariants_even[k]] + + print("Saving to ", output) + for k in featinv: + featinv[k] = np.vstack(featinv[k]) + pickle.dump(featinv, open(output, "wb")) + +if __name__ == '__main__': + main() + + + + diff --git a/tools/optimize_nice.py b/tools/optimize_nice.py new file mode 100755 index 0000000..845dd98 --- /dev/null +++ b/tools/optimize_nice.py @@ -0,0 +1,162 @@ +#!/usr/bin/python3 + +import pickle +import json +import sys, os, argparse +import ase.io as ase_io +import numpy as np +from nice.transformers import * +from nice.rascal_coefficients import get_rascal_coefficients_parallelized + + +def main(): + """ + Command-line utility to optimize the parameters of NICE features. + Will output a pickled model which can then be used to predict features. + """ + + # Tweak the autogenerated help output to look nicer + formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22) + parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter) + + parser.add_argument('input', type=str, default="", nargs="?", + help='XYZ file to load') + parser.add_argument('-o', '--output', type=str, default="", + help='Output files prefix. Defaults to input filename with stripped extension') + parser.add_argument('--select', type=str, default=":", + help='Selection of input structures. ASE format.') + parser.add_argument('--nmax', type=int, default=6, + help='Number of radial channels') + parser.add_argument('--lmax', type=int, default=4, + help='Number of angular momentum channels') + parser.add_argument('--rcut', type=float, default=4, + help='Radial cut off') + parser.add_argument('--sigma', type=float, default=0.5, + help='Gaussian smearing') + parser.add_argument('--json', type=str, default='{}', help='Additional hypers, as JSON string') + parser.add_argument('--mask-id', type=int, default=-1, + help='Central atom it to be selected') + parser.add_argument('--mask', type=str, default="", + help='Central atom type to be selected') + parser.add_argument('--screen', type=int, default=1000, + help='Target number of equivariants to be computed in the iteration step') + parser.add_argument('--screen0', type=int, default=500, + help='Target number of invariants to be computed in the iteration step') + parser.add_argument('--nsph', type=int, default=0, + help='Target number of spherical expansion coefficients to keep. 0 defaults all') + parser.add_argument('--keep', type=int, default=200, + help='Target number of equivariants to be contracted to') + parser.add_argument('--keep0', type=int, default=400, + help='Target number of invariants to be stored') + + + args = parser.parse_args() + + filename = args.input + output = args.output + select = args.select + nmax = args.nmax + lmax = args.lmax + rcut = args.rcut + gs = args.sigma + mask = args.mask + mask_id = args.mask_id + nsph = args.nsph + fkeep = args.keep + fkeep0 = args.keep0 + fscreen = args.screen + fscreen0 = args.screen0 + json_hypers = json.loads(args.json) + pcond = 1e-4 + + if output == "": + output = os.path.splitext(filename)[0] + + + numax=4; + purify_take=50; + blocklist = [ ] + for nu in range(1, numax-1): # this starts from nu=2 actually + blocklist.append( + StandardBlock(ThresholdExpansioner(num_expand=fscreen), + CovariantsPurifierBoth(max_take=purify_take), + IndividualLambdaPCAsBoth(n_components=fkeep), + ThresholdExpansioner(num_expand=fscreen0, mode='invariants'), + InvariantsPurifier(), + InvariantsPCA(n_components=fkeep0)) + ) + + # at the last order, we only need invariants + blocklist.append( + StandardBlock(None, None, None, + ThresholdExpansioner(num_expand=fscreen0, mode='invariants'), + InvariantsPurifier(), + InvariantsPCA(n_components=fkeep0)) + ) + + # first-order equivariants are just combinations of the spherical coefficients, done by initial_pca + nice_sequence = StandardSequence(blocklist, initial_pca=IndividualLambdaPCAsBoth(n_components=nsph)) + + print("Loading structures ", filename, " structures: ", select) + structures = ase_io.read(filename, index=select) + for f in structures: + if f.cell.sum() == 0.0: # fake PBC + f.cell = [100,100,100] + f.positions += np.asarray([50,50,50]) + f.pbc = True + f.wrap(eps=1e-12) + + print("Computing spherical expansion") + + hypers = { **{ + 'interaction_cutoff': rcut, + 'max_radial': nmax, + 'max_angular': lmax, + 'gaussian_sigma_constant': gs, + 'gaussian_sigma_type': 'Constant', + 'cutoff_smooth_width': 0.5, + 'radial_basis': 'GTO' + }, **json_hypers } + + coefficients = get_rascal_coefficients_parallelized(structures, hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id)) + # merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing + l = [] + for f in coefficients.values(): + l.append(f) + coefficients = np.vstack(l) + if nsph == 0: + nsph = coefficients.shape[1] + scale = 1.0/np.sqrt(np.sum(coefficients**2)/len(coefficients)); + coefficients *= scale # normalization of features + + print("coefficients ", coefficients.shape) + + hypers["keep"] = fkeep + hypers["keep0"] = fkeep0 + hypers["screen"] = fscreen + hypers["screen0"] = fscreen0 + hypers["pcond"] = pcond + hypers["reference-file"] = filename + hypers["reference-sel"] = select + hypers["reference-mask"] = mask + hypers["reference-mask-id"] = mask_id + hypers["scale"] = scale + hypers["nsph"] = nsph + + print("Precomputing CG coefficients") + cglist = ClebschGordan(args.lmax) + + print("Optimizing NICE") + nice_sequence.fit(coefficients, clebsch_gordan = cglist) + + print("Dumping NICE model") + pickle.dump( { + "HYPERS" : hypers, + "NICE": nice_sequence, + }, open(output+".pickle", "wb")) + + #np.save(output+"-inv", np.hstack([rhoinv1, rhoinv2, rhoinv3, rhoinv4, rhoinv5]) ) + +if __name__ == '__main__': + main() +