diff --git a/README.md b/README.md index 85521e9..eeccc80 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,26 @@ # Fafoom - Flexible algorithm for optimization of molecules +:warning: **Fafoom is no longer maintained and will not be updated to Python 3** :warning: + Fafoom is a tool for sampling the conformational space of organic molecules. Fafoom is intended to work with FHI-aims (Fritz Haber Institute ab initio molecular simulations package). +## News + +* the paper "First-principles molecular structure search with a genetic algorithm" is now published in Journal of Chemical Information and Modeling; DOI: 10.1021/acs.jcim.5b00243 + +* a new branch targeting the implementation of a further degree of freedom ('orientation') has been created + ## Requirements * functionality of the tool: - * Python 2.7 - * Numpy - * RDKit (used version: Release_2014_09_2) + * Python (used for testing: 2.7.6) - Python3 is not supported! + * Numpy (used for testing: 1.8.2) + * RDKit (used for testing: Release_2015_03_1) * first-principles methods: * (recommended) FHI-aims (Fritz Haber Institute ab initio molecular simulations package) * (alternative) NWChem (NWChem: Open Source High-Performance Computational Chemistry) + * (alternative) ORCA (- An ab initio, DFT and semiempirical SCF-MO package -) ## How to use @@ -24,16 +33,30 @@ Fafoom is a tool for sampling the conformational space of organic molecules. Faf import fafoom -## Examples of usage +## Example of usage -Three examples are provided with the source code in the examples folder: +An implementation of a genetic algorithm is provided in the examples folder. +Depending on the used parameter file following genetic algorithm based searches can be run: -* genetic algorithm based search utilizing first-principles (FHI-aims required) -* genetic algorithm based search utilizing force fields (force fields accessed from RDKit) -* genetic algorithm based search utilizing first-principles via NWChem (NWChem required) +* parameters_aims.txt for first-principles (FHI-aims required) +* parameters_ff.txt for force fields (force fields accessed from RDKit) +* parameters_nwchem.txt for first-principles via NWChem (NWChem required) +* parameters_orca.txt for first-principles via ORCA (ORCA required) Get familiar with the provided manual to learn more about the tool and the parameters. +## Outlook + +Development goals: + +* flexible formulation of the scoring function, e.g. allowing for optimizing a user-defined property +* adding more kinds of degrees of freedom, e.g. orientation of a molecule +* adding wrappers for different molecular simulations packages + +Comments, feedback or development contributions are always welcome! + + + ## License Fafoom is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. diff --git a/examples/GA_first_principles_FHIaims/README.md b/examples/GA_first_principles_FHIaims/README.md deleted file mode 100644 index e3944ce..0000000 --- a/examples/GA_first_principles_FHIaims/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Usage: - -python ga_simple.py parameters_aims.txt - -In case you want to restart your calculation (at least one genetic algorithm iteration must be completed), use: - -python ga_restart.py parameters_aims.txt - -Provided example allows for optimization of alanine dipeptide. diff --git a/examples/GA_first_principles_FHIaims/ga_restart.py b/examples/GA_first_principles_FHIaims/ga_restart.py deleted file mode 100644 index e27afc7..0000000 --- a/examples/GA_first_principles_FHIaims/ga_restart.py +++ /dev/null @@ -1,183 +0,0 @@ -import numpy as np -import sys -import glob - -from fafoom import MoleculeDescription, Structure, selection, print_output,\ - backup, remover_dir - -p_file = sys.argv[1] - -with open(p_file) as f: - p_dict = dict(line.strip().partition(' ')[::2] for line in f) -f.close() -popsize = int(p_dict['popsize']) -cistrans1, cistrans2 = float(p_dict['cistrans1']), float(p_dict['cistrans2']) -black_dir = str(p_dict['black_dir']) -max_iter = int(p_dict['max_iter']) -energy_var = float(p_dict['energy_var']) -selection_type = str(p_dict['selection']) -fitness_sum_limit = str(p_dict['fitness_sum_limit']) -prob_for_crossing = float(p_dict['prob_for_crossing']) -prob_for_mut_cistrans = float(p_dict['prob_for_mut_cistrans']) -prob_for_mut_torsions = float(p_dict['prob_for_mut_torsions']) -max_mutations_cistrans = int(p_dict['max_mutations_cistrans']) -max_mutations_torsions = int(p_dict['max_mutations_torsions']) -aims_call = str(p_dict['aims_call']) -iter_limit_conv = int(p_dict['iter_limit_conv']) -energy_diff_conv = float(p_dict['energy_diff_conv']) -energy_wanted = float(p_dict['energy_wanted']) - -cnt_max = 100 -array = np.array -population, blacklist = [], [] -min_energy = [] -print_output(" \n ___Restart will be performed___") -with open("backup_mol.dat", 'r') as inf: - mol = eval(inf.readline()) -inf.close() -with open("backup_population.dat", 'r') as inf: - for line in inf: - population.append(eval(line)) -inf.close() -with open("backup_blacklist.dat", 'r') as inf: - for line in inf: - blacklist.append(eval(line)) -inf.close() -with open("backup_min_energy.dat", 'r') as inf: - for line in inf: - min_energy.append(eval(line)) -inf.close() -with open("backup_iteration.dat", 'r') as inf: - iteration_tmp = eval(inf.readline()) -inf.close() -population.sort() -for i in range(len(population)): - print_output(str(population[i])+" "+str(float(population[i]))) -print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) -iteration = iteration_tmp+1 -print_output(" \n ___Reinitialization completed___") -remover_dir('generation_'+str(iteration)+'_child1') -remover_dir('generation_'+str(iteration)+'_child2') - - -def mutate_and_relax(candidate, name, prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max): - print_output("__%s__" % name) - found = False - cnt = 0 - while found is False and cnt < cnt_max: - mut1, mut2 = np.random.rand(), np.random.rand() - candidate_backup = Structure(candidate) - if mut1 < prob_for_mut_torsions: - candidate.mutation_tor(max_mutations_torsions) - print_output("%s after mutation in torsions: " % name + str( - candidate)) - if mut2 < prob_for_mut_cistrans: - candidate.mutation_cistrans(max_mutations_cistrans) - print_output("%s after mutation in cistrans: " % name+str( - candidate)) - if not candidate.is_geometry_valid(): - print_output(" The geometry of %s is invalid." % name) - cnt += 1 - candidate = candidate_backup - continue - if candidate not in blacklist: - name = "generation_%d_%s" % (iteration, name) - candidate.perform_aims(p_file, aims_call, name) - candidate.send_to_blacklist(black_dir, blacklist) - print_output(str(candidate)+":, energy: "+str(float( - candidate))+", is temporary added to the population") - found = True - population.append(candidate) - else: - print_output("Geomerty fine, but the structure already known") - cnt += 1 - candidate = candidate_backup - if cnt == cnt_max: - raise Exception("The allowed number of trials for generating a" - " unique child has been exceeded.") - - -def perform_backup(): - backup("backup_mol.dat", mol) - backup("backup_population.dat", population) - backup("backup_blacklist.dat", blacklist) - backup("backup_iteration.dat", iteration) - backup("backup_min_energy.dat", min_energy) - -while iteration < max_iter: - print_output(" \n ___Start of iteration "+str(iteration)+"___") - (parent1, parent2, fitness) = selection(population, selection_type, - energy_var, fitness_sum_limit) - param = np.random.rand() - cnt = 0 - while param < prob_for_crossing and cnt < cnt_max: - child1, child2 = Structure.crossover(parent1, parent2) - if child1.is_geometry_valid() and child2.is_geometry_valid(): - print_output("Crossover outcome: "+str(child1)+(", ")+str(child2)) - break - else: - print_output("The geometries created via crossover are invalid.") - cnt += 1 - continue - else: - child1, child2 = Structure(parent1), Structure(parent2) - print_output("No crossover was performed. Children are copies of " - "parents: " + str(child1) + (": ") + str(child1) + - (", ") + str(child2) + (": ") + str(child2)) - for child in child1, child2: - attr_list = ["initial_sdf_string", "energy", - "initial_values_cistrans", "initial_values_tor"] - for attr in attr_list: - delattr(child, attr) - try: - mutate_and_relax(child1, "child1", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - try: - mutate_and_relax(child2, "child2", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - population.sort() - print_output("Sorted population: " + ', '.join([ - str(v) for v in population])) - del population[-1] - del population[-1] - print_output("Sorted population after removing two structures with highest" - " energy: " + ', '.join([str(v) for v in population])) - print_output("Lowest energy of the population: %.3f" % population[0].energy) - min_energy.append(population[0].energy) - print_output("Lowest energies in run: "+str(min_energy)) - if iteration >= iter_limit_conv-1: - print_output("Checking for convergence") - d = abs(min_energy[iteration+1]-min_energy[iteration+1-iter_limit_conv]) - if population[0].energy < energy_wanted or d < energy_diff_conv: - perform_backup() - print_output("Converged") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Not converged yet") - if iteration == max_iter-1: - perform_backup() - print_output("Max. number of iterations reached. The code terminates") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Next iteration will be perfomed") - if len(glob.glob("*/kill.dat")) == 0 and len(glob.glob("kill.dat")) == 0: - iteration += 1 - else: - print_output("Kill.dat file discovered. The code terminates") - sys.exit(0) diff --git a/examples/GA_first_principles_FHIaims/ga_simple.py b/examples/GA_first_principles_FHIaims/ga_simple.py deleted file mode 100644 index 0fb4b5e..0000000 --- a/examples/GA_first_principles_FHIaims/ga_simple.py +++ /dev/null @@ -1,192 +0,0 @@ -import numpy as np -import sys -import glob - -from fafoom import MoleculeDescription, Structure, selection, print_output,\ - backup - -p_file = sys.argv[1] - -with open(p_file) as f: - p_dict = dict(line.strip().partition(' ')[::2] for line in f) -f.close() -popsize = int(p_dict['popsize']) -cistrans1, cistrans2 = float(p_dict['cistrans1']), float(p_dict['cistrans2']) -black_dir = str(p_dict['black_dir']) -max_iter = int(p_dict['max_iter']) -energy_var = float(p_dict['energy_var']) -selection_type = str(p_dict['selection']) -fitness_sum_limit = str(p_dict['fitness_sum_limit']) -prob_for_crossing = float(p_dict['prob_for_crossing']) -prob_for_mut_cistrans = float(p_dict['prob_for_mut_cistrans']) -prob_for_mut_torsions = float(p_dict['prob_for_mut_torsions']) -max_mutations_cistrans = int(p_dict['max_mutations_cistrans']) -max_mutations_torsions = int(p_dict['max_mutations_torsions']) -aims_call = str(p_dict['aims_call']) -iter_limit_conv = int(p_dict['iter_limit_conv']) -energy_diff_conv = float(p_dict['energy_diff_conv']) -energy_wanted = float(p_dict['energy_wanted']) - -cnt_max = 100 - -mol = MoleculeDescription(p_file) -mol.get_mol_parameters() -mol.create_template_sdf() - -population, blacklist = [], [] -min_energy = [] -print_output("___Initialization___") -cnt = 0 -while len(population) < popsize and cnt < cnt_max: - print_output("New trial") - str3d = Structure(mol) - str3d.generate_random_structure(cistrans1, cistrans2) - if not str3d.is_geometry_valid(): - print_output("The geometry of "+str(str3d)+" is invalid.") - cnt += 1 - continue - if str3d not in blacklist: - name = "initial_%d" % (len(population)) - str3d.perform_aims(p_file, aims_call, name) - str3d.send_to_blacklist(black_dir, blacklist) - population.append(str3d) - print_output(str(str3d)+", energy: "+str(float(str3d)) + - ", was added to the population") - cnt += 1 - else: - print_output("Geomerty of "+str(str3d)+" is fine, but already known.") - cnt += 1 -if cnt == cnt_max: - print_output("The allowed number of trials for building the population has \ - been exceeded. The code terminates.") - sys.exit(0) -print_output("___Initialization completed___") -population.sort() -print_output("Initial population after sorting: ") -for i in range(len(population)): - print_output(str(population[i])+" "+str(float(population[i]))) -min_energy.append(population[0].energy) -print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) -iteration = 0 - - -def mutate_and_relax(candidate, name, prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max): - print_output("__%s__" % name) - found = False - cnt = 0 - while found is False and cnt < cnt_max: - mut1, mut2 = np.random.rand(), np.random.rand() - candidate_backup = Structure(candidate) - if mut1 < prob_for_mut_torsions: - candidate.mutation_tor(max_mutations_torsions) - print_output("%s after mutation in torsions: " % name + str( - candidate)) - if mut2 < prob_for_mut_cistrans: - candidate.mutation_cistrans(max_mutations_cistrans) - print_output("%s after mutation in cistrans: " % name + str( - candidate)) - if not candidate.is_geometry_valid(): - print_output(" The geometry of %s is invalid." % name) - cnt += 1 - candidate = candidate_backup - continue - if candidate not in blacklist: - name = "generation_%d_%s" % (iteration, name) - candidate.perform_aims(p_file, aims_call, name) - candidate.send_to_blacklist(black_dir, blacklist) - print_output(str(candidate)+":, energy: "+str(float( - candidate))+", is temporary added to the population") - found = True - population.append(candidate) - else: - print_output("Geomerty fine, but the structure already known") - cnt += 1 - candidate = candidate_backup - if cnt == cnt_max: - raise Exception("The allowed number of trials for generating a" - " unique child has been exceeded.") - - -def perform_backup(): - backup("backup_mol.dat", mol) - backup("backup_population.dat", population) - backup("backup_blacklist.dat", blacklist) - backup("backup_iteration.dat", iteration) - backup("backup_min_energy.dat", min_energy) -while iteration < max_iter: - print_output(" \n ___Start of iteration " + str(iteration) + "___") - (parent1, parent2, fitness) = selection(population, selection_type, - energy_var, fitness_sum_limit) - param = np.random.rand() - cnt = 0 - while param < prob_for_crossing and cnt < cnt_max: - child1, child2 = Structure.crossover(parent1, parent2) - if child1.is_geometry_valid() and child2.is_geometry_valid(): - print_output("Crossover outcome: "+str(child1)+(", ")+str(child2)) - break - else: - print_output("The geometries created via crossover are invalid.") - cnt += 1 - continue - else: - child1, child2 = Structure(parent1), Structure(parent2) - print_output("No crossover was performed. Children are copies of " - "parents: " + str(child1) + (": ") + str(child1) + - (", ") + str(child2) + (": ") + str(child2)) - for child in child1, child2: - attr_list = ["initial_sdf_string", "energy", - "initial_values_cistrans", "initial_values_tor"] - for attr in attr_list: - delattr(child, attr) - try: - mutate_and_relax(child1, "child1", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - try: - mutate_and_relax(child2, "child2", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - population.sort() - print_output("Sorted population: " + ', '.join([ - str(v) for v in population])) - del population[-1] - del population[-1] - print_output("Sorted population after removing two structures with highest" - " energy: " + ', '.join([str(v) for v in population])) - print_output("Lowest energy of the population: %.3f" % population[0].energy) - min_energy.append(population[0].energy) - print_output("Lowest energies in run: "+str(min_energy)) - if iteration >= iter_limit_conv-1: - print_output("Checking for convergence") - d = abs(min_energy[iteration+1]-min_energy[iteration+1-iter_limit_conv]) - if population[0].energy < energy_wanted or d < energy_diff_conv: - perform_backup() - print_output("Converged") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Not converged yet") - if iteration == max_iter-1: - perform_backup() - print_output("Max. number of iterations reached. The code terminates") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Next iteration will be perfomed") - if len(glob.glob("*/kill.dat")) == 0 and len(glob.glob("kill.dat")) == 0: - iteration += 1 - else: - print_output("Kill.dat file discovered. The code terminates") - sys.exit(0) diff --git a/examples/GA_first_principles_FHIaims/parameters_aims.txt b/examples/GA_first_principles_FHIaims/parameters_aims.txt deleted file mode 100644 index 1d34ed1..0000000 --- a/examples/GA_first_principles_FHIaims/parameters_aims.txt +++ /dev/null @@ -1,36 +0,0 @@ -###### Molecule ###### - -smile CC(=O)N[C@H](C(=O)NC)C -custom True -smart_torsion [C,N,O]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[C,N,O] -smart_cistrans C~[$(C=O)]-[$(NC)]~[C] -smart_custom C~[$(C=O)]-[$(NC)]~[C] -rmsd_type cartesian -distance_cutoff_1 1.55 -distance_cutoff_2 2.15 -rmsd_cutoff_uniq 0.1 -chiral True - -##### GA settings #### - -energy_var 0.001 -selection roulette_wheel -fitness_sum_limit 1.2 -popsize 5 -prob_for_crossing 0.95 -prob_for_mut_cistrans 0.4 -prob_for_mut_torsions 0.8 -max_mutations_cistrans 1 -max_mutations_torsions 2 -cistrans1 0 -cistrans2 180 - -#####Run settings##### - -max_iter 20 -iter_limit_conv 10 -energy_diff_conv 0.001 -energy_wanted -10000000.00 -sourcedir adds -black_dir blacklist -aims_call mpirun -n 4 aims.072713.scalapack.mpi.x diff --git a/examples/GA_first_principles_NWChem/README.md b/examples/GA_first_principles_NWChem/README.md deleted file mode 100644 index 852e6d9..0000000 --- a/examples/GA_first_principles_NWChem/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Usage: - -python ga_nwchem_simple.py parameters_nwchem.txt - -Provided example allows for optimization of alanine dipeptide. diff --git a/examples/GA_first_principles_NWChem/ga_nwchem_simple.py b/examples/GA_first_principles_NWChem/ga_nwchem_simple.py deleted file mode 100644 index fe700c1..0000000 --- a/examples/GA_first_principles_NWChem/ga_nwchem_simple.py +++ /dev/null @@ -1,192 +0,0 @@ -import numpy as np -import sys -import glob - -from fafoom import MoleculeDescription, Structure, selection, print_output,\ - backup - -p_file = sys.argv[1] - -with open(p_file) as f: - p_dict = dict(line.strip().partition(' ')[::2] for line in f) -f.close() -popsize = int(p_dict['popsize']) -cistrans1, cistrans2 = float(p_dict['cistrans1']), float(p_dict['cistrans2']) -black_dir = str(p_dict['black_dir']) -max_iter = int(p_dict['max_iter']) -energy_var = float(p_dict['energy_var']) -selection_type = str(p_dict['selection']) -fitness_sum_limit = str(p_dict['fitness_sum_limit']) -prob_for_crossing = float(p_dict['prob_for_crossing']) -prob_for_mut_cistrans = float(p_dict['prob_for_mut_cistrans']) -prob_for_mut_torsions = float(p_dict['prob_for_mut_torsions']) -max_mutations_cistrans = int(p_dict['max_mutations_cistrans']) -max_mutations_torsions = int(p_dict['max_mutations_torsions']) -nwchem_call = str(p_dict['nwchem_call']) -iter_limit_conv = int(p_dict['iter_limit_conv']) -energy_diff_conv = float(p_dict['energy_diff_conv']) -energy_wanted = float(p_dict['energy_wanted']) - -cnt_max = 100 - -mol = MoleculeDescription(p_file) -mol.get_mol_parameters() -mol.create_template_sdf() - -population, blacklist = [], [] -min_energy = [] -print_output("___Initialization___") -cnt = 0 -while len(population) < popsize and cnt < cnt_max: - print_output("New trial") - str3d = Structure(mol) - str3d.generate_random_structure(cistrans1, cistrans2) - if not str3d.is_geometry_valid(): - print_output("The geometry of "+str(str3d)+" is invalid.") - cnt += 1 - continue - if str3d not in blacklist: - name = "initial_%d" % (len(population)) - str3d.perform_nwchem(p_file, nwchem_call) - str3d.send_to_blacklist(black_dir, blacklist) - population.append(str3d) - print_output(str(str3d)+", energy: "+str(float(str3d)) + - ", was added to the population") - cnt += 1 - else: - print_output("Geomerty of "+str(str3d)+" is fine, but already known.") - cnt += 1 -if cnt == cnt_max: - print_output("The allowed number of trials for building the population has \ - been exceeded. The code terminates.") - sys.exit(0) -print_output("___Initialization completed___") -population.sort() -print_output("Initial population after sorting: ") -for i in range(len(population)): - print_output(str(population[i])+" "+str(float(population[i]))) -min_energy.append(population[0].energy) -print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) -iteration = 0 - - -def mutate_and_relax(candidate, name, prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max): - print_output("__%s__" % name) - found = False - cnt = 0 - while found is False and cnt < cnt_max: - mut1, mut2 = np.random.rand(), np.random.rand() - candidate_backup = Structure(candidate) - if mut1 < prob_for_mut_torsions: - candidate.mutation_tor(max_mutations_torsions) - print_output("%s after mutation in torsions: " % name + str( - candidate)) - if mut2 < prob_for_mut_cistrans: - candidate.mutation_cistrans(max_mutations_cistrans) - print_output("%s after mutation in cistrans: " % name + str( - candidate)) - if not candidate.is_geometry_valid(): - print_output(" The geometry of %s is invalid." % name) - cnt += 1 - candidate = candidate_backup - continue - if candidate not in blacklist: - name = "generation_%d_%s" % (iteration, name) - candidate.perform_nwchem(p_file, nwchem_call) - candidate.send_to_blacklist(black_dir, blacklist) - print_output(str(candidate)+":, energy: "+str(float( - candidate))+", is temporary added to the population") - found = True - population.append(candidate) - else: - print_output("Geomerty fine, but the structure already known") - cnt += 1 - candidate = candidate_backup - if cnt == cnt_max: - raise Exception("The allowed number of trials for generating a" - " unique child has been exceeded.") - - -def perform_backup(): - backup("backup_mol.dat", mol) - backup("backup_population.dat", population) - backup("backup_blacklist.dat", blacklist) - backup("backup_iteration.dat", iteration) - backup("backup_min_energy.dat", min_energy) -while iteration < max_iter: - print_output(" \n ___Start of iteration " + str(iteration) + "___") - (parent1, parent2, fitness) = selection(population, selection_type, - energy_var, fitness_sum_limit) - param = np.random.rand() - cnt = 0 - while param < prob_for_crossing and cnt < cnt_max: - child1, child2 = Structure.crossover(parent1, parent2) - if child1.is_geometry_valid() and child2.is_geometry_valid(): - print_output("Crossover outcome: "+str(child1)+(", ")+str(child2)) - break - else: - print_output("The geometries created via crossover are invalid.") - cnt += 1 - continue - else: - child1, child2 = Structure(parent1), Structure(parent2) - print_output("No crossover was performed. Children are copies of " - "parents: " + str(child1) + (": ") + str(child1) + - (", ") + str(child2) + (": ") + str(child2)) - for child in child1, child2: - attr_list = ["initial_sdf_string", "energy", - "initial_values_cistrans", "initial_values_tor"] - for attr in attr_list: - delattr(child, attr) - try: - mutate_and_relax(child1, "child1", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - try: - mutate_and_relax(child2, "child2", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - population.sort() - print_output("Sorted population: " + ', '.join([ - str(v) for v in population])) - del population[-1] - del population[-1] - print_output("Sorted population after removing two structures with highest" - " energy: " + ', '.join([str(v) for v in population])) - print_output("Lowest energy of the population: %.3f" % population[0].energy) - min_energy.append(population[0].energy) - print_output("Lowest energies in run: "+str(min_energy)) - if iteration >= iter_limit_conv-1: - print_output("Checking for convergence") - d = abs(min_energy[iteration+1]-min_energy[iteration+1-iter_limit_conv]) - if population[0].energy < energy_wanted or d < energy_diff_conv: - perform_backup() - print_output("Converged") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Not converged yet") - if iteration == max_iter-1: - perform_backup() - print_output("Max. number of iterations reached. The code terminates") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Next iteration will be perfomed") - if len(glob.glob("*/kill.dat")) == 0 and len(glob.glob("kill.dat")) == 0: - iteration += 1 - else: - print_output("Kill.dat file discovered. The code terminates") - sys.exit(0) diff --git a/examples/GA_first_principles_NWChem/parameters_nwchem.txt b/examples/GA_first_principles_NWChem/parameters_nwchem.txt deleted file mode 100644 index 45e4855..0000000 --- a/examples/GA_first_principles_NWChem/parameters_nwchem.txt +++ /dev/null @@ -1,37 +0,0 @@ -###### Molecule ###### - -smile CC(=O)N[C@H](C(=O)NC)C -custom True -smart_torsion [C,N,O]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[C,N,O] -smart_cistrans C~[$(C=O)]-[$(NC)]~[C] -smart_custom C~[$(C=O)]-[$(NC)]~[C] -rmsd_type cartesian -distance_cutoff_1 1.55 -distance_cutoff_2 2.15 -rmsd_cutoff_uniq 0.1 -chiral True - -##### GA settings #### - -energy_var 0.001 -selection roulette_wheel -fitness_sum_limit 1.2 -popsize 5 -prob_for_crossing 0.95 -prob_for_mut_cistrans 0.4 -prob_for_mut_torsions 0.8 -max_mutations_cistrans 1 -max_mutations_torsions 2 -cistrans1 0 -cistrans2 180 - -#####Run settings##### - -functional xpbe96 -basis_set STO-6G -max_iter 20 -iter_limit_conv 10 -energy_diff_conv 0.001 -energy_wanted -10000000.00 -black_dir blacklist -nwchem_call mpirun -n 4 nwchem diff --git a/examples/GA_force_field_RDKit/README.md b/examples/GA_force_field_RDKit/README.md deleted file mode 100644 index 0e7e224..0000000 --- a/examples/GA_force_field_RDKit/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Usage: - -python ga_ff_simple.py parameters_ff.txt - -Provided example allows for optimization of alanine dipeptide. diff --git a/examples/GA_force_field_RDKit/ga_ff_simple.py b/examples/GA_force_field_RDKit/ga_ff_simple.py deleted file mode 100644 index 71a87e1..0000000 --- a/examples/GA_force_field_RDKit/ga_ff_simple.py +++ /dev/null @@ -1,191 +0,0 @@ -import numpy as np -import sys -import glob - -from fafoom import MoleculeDescription, Structure, selection, print_output,\ - backup - -p_file = sys.argv[1] - -with open(p_file) as f: - p_dict = dict(line.strip().partition(' ')[::2] for line in f) -f.close() -popsize = int(p_dict['popsize']) -cistrans1, cistrans2 = float(p_dict['cistrans1']), float(p_dict['cistrans2']) -black_dir = str(p_dict['black_dir']) -max_iter = int(p_dict['max_iter']) -energy_var = float(p_dict['energy_var']) -selection_type = str(p_dict['selection']) -fitness_sum_limit = str(p_dict['fitness_sum_limit']) -prob_for_crossing = float(p_dict['prob_for_crossing']) -prob_for_mut_cistrans = float(p_dict['prob_for_mut_cistrans']) -prob_for_mut_torsions = float(p_dict['prob_for_mut_torsions']) -max_mutations_cistrans = int(p_dict['max_mutations_cistrans']) -max_mutations_torsions = int(p_dict['max_mutations_torsions']) -iter_limit_conv = int(p_dict['iter_limit_conv']) -energy_diff_conv = float(p_dict['energy_diff_conv']) -energy_wanted = float(p_dict['energy_wanted']) - -cnt_max = 100 - -mol = MoleculeDescription(p_file) -mol.get_mol_parameters() -mol.create_template_sdf() - -population, blacklist = [], [] -min_energy = [] -print_output("___Initialization___") -cnt = 0 -while len(population) < popsize and cnt < cnt_max: - print_output("New trial") - str3d = Structure(mol) - str3d.generate_random_structure(cistrans1, cistrans2) - if not str3d.is_geometry_valid(): - print_output("The geometry of "+str(str3d)+" is invalid.") - cnt += 1 - continue - if str3d not in blacklist: - name = "initial_%d" % (len(population)) - str3d.perform_ff(p_file) - str3d.send_to_blacklist(black_dir, blacklist) - population.append(str3d) - print_output(str(str3d)+", energy: "+str(float(str3d)) + - ", was added to the population") - cnt += 1 - else: - print_output("Geomerty of "+str(str3d)+" is fine, but already known.") - cnt += 1 -if cnt == cnt_max: - print_output("The allowed number of trials for building the population has \ - been exceeded. The code terminates.") - sys.exit(0) -print_output("___Initialization completed___") -population.sort() -print_output("Initial population after sorting: ") -for i in range(len(population)): - print_output(str(population[i])+" "+str(float(population[i]))) -min_energy.append(population[0].energy) -print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) -iteration = 0 - - -def mutate_and_relax(candidate, name, prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max): - print_output("__%s__" % name) - found = False - cnt = 0 - while found is False and cnt < cnt_max: - mut1, mut2 = np.random.rand(), np.random.rand() - candidate_backup = Structure(candidate) - if mut1 < prob_for_mut_torsions: - candidate.mutation_tor(max_mutations_torsions) - print_output("%s after mutation in torsions: " % name + str( - candidate)) - if mut2 < prob_for_mut_cistrans: - candidate.mutation_cistrans(max_mutations_cistrans) - print_output("%s after mutation in cistrans: " % name + str( - candidate)) - if not candidate.is_geometry_valid(): - print_output(" The geometry of %s is invalid." % name) - cnt += 1 - candidate = candidate_backup - continue - if candidate not in blacklist: - name = "generation_%d_%s" % (iteration, name) - candidate.perform_ff(p_file) - candidate.send_to_blacklist(black_dir, blacklist) - print_output(str(candidate)+":, energy: "+str(float( - candidate))+", is temporary added to the population") - found = True - population.append(candidate) - else: - print_output("Geomerty fine, but the structure already known") - cnt += 1 - candidate = candidate_backup - if cnt == cnt_max: - raise Exception("The allowed number of trials for generating a" - " unique child has been exceeded.") - - -def perform_backup(): - backup("backup_mol.dat", mol) - backup("backup_population.dat", population) - backup("backup_blacklist.dat", blacklist) - backup("backup_iteration.dat", iteration) - backup("backup_min_energy.dat", min_energy) -while iteration < max_iter: - print_output(" \n ___Start of iteration " + str(iteration) + "___") - (parent1, parent2, fitness) = selection(population, selection_type, - energy_var, fitness_sum_limit) - param = np.random.rand() - cnt = 0 - while param < prob_for_crossing and cnt < cnt_max: - child1, child2 = Structure.crossover(parent1, parent2) - if child1.is_geometry_valid() and child2.is_geometry_valid(): - print_output("Crossover outcome: "+str(child1)+(", ")+str(child2)) - break - else: - print_output("The geometries created via crossover are invalid.") - cnt += 1 - continue - else: - child1, child2 = Structure(parent1), Structure(parent2) - print_output("No crossover was performed. Children are copies of " - "parents: " + str(child1) + (": ") + str(child1) + - (", ") + str(child2) + (": ") + str(child2)) - for child in child1, child2: - attr_list = ["initial_sdf_string", "energy", - "initial_values_cistrans", "initial_values_tor"] - for attr in attr_list: - delattr(child, attr) - try: - mutate_and_relax(child1, "child1", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - try: - mutate_and_relax(child2, "child2", prob_for_mut_torsions, - prob_for_mut_cistrans, max_mutations_torsions, - max_mutations_cistrans, iteration, cnt_max) - except Exception as exc: - print_output(exc) - sys.exit(0) - population.sort() - print_output("Sorted population: " + ', '.join([ - str(v) for v in population])) - del population[-1] - del population[-1] - print_output("Sorted population after removing two structures with highest" - " energy: " + ', '.join([str(v) for v in population])) - print_output("Lowest energy of the population: %.3f" % population[0].energy) - min_energy.append(population[0].energy) - print_output("Lowest energies in run: "+str(min_energy)) - if iteration >= iter_limit_conv-1: - print_output("Checking for convergence") - d = abs(min_energy[iteration+1]-min_energy[iteration+1-iter_limit_conv]) - if population[0].energy < energy_wanted or d < energy_diff_conv: - perform_backup() - print_output("Converged") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Not converged yet") - if iteration == max_iter-1: - perform_backup() - print_output("Max. number of iterations reached. The code terminates") - killfile = open("kill.dat", "w") - killfile.close() - sys.exit(0) - else: - perform_backup() - print_output("Next iteration will be perfomed") - if len(glob.glob("*/kill.dat")) == 0 and len(glob.glob("kill.dat")) == 0: - iteration += 1 - else: - print_output("Kill.dat file discovered. The code terminates") - sys.exit(0) diff --git a/examples/GA_force_field_RDKit/parameters_ff.txt b/examples/GA_force_field_RDKit/parameters_ff.txt deleted file mode 100644 index 53c4916..0000000 --- a/examples/GA_force_field_RDKit/parameters_ff.txt +++ /dev/null @@ -1,38 +0,0 @@ -###### Molecule ###### - -smile CC(=O)N[C@H](C(=O)NC)C -custom True -smart_torsion [C,N,O]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[C,N,O] -smart_cistrans C~[$(C=O)]-[$(NC)]~[C] -smart_custom C~[$(C=O)]-[$(NC)]~[C] -rmsd_type cartesian -distance_cutoff_1 1.55 -distance_cutoff_2 2.15 -rmsd_cutoff_uniq 0.1 -chiral True - -##### GA settings #### - -energy_var 0.001 -selection roulette_wheel -fitness_sum_limit 1.2 -popsize 5 -prob_for_crossing 0.95 -prob_for_mut_cistrans 0.4 -prob_for_mut_torsions 0.8 -max_mutations_cistrans 1 -max_mutations_torsions 2 -cistrans1 0 -cistrans2 180 - -#####Run settings##### - -force_field mmff94 -steps 1000 -force_tol 1e-04 -energy_tol 1e-06 -max_iter 200 -iter_limit_conv 50 -energy_diff_conv 0.001 -energy_wanted -10000000.00 -black_dir blacklist diff --git a/examples/README.txt b/examples/README.txt new file mode 100644 index 0000000..e623647 --- /dev/null +++ b/examples/README.txt @@ -0,0 +1,9 @@ +Usage: + +python ga.py parameter_file_name + +If you want to restart your calculation (at least one genetic algorithm iteration must be completed), use: + +python ga.py parameter_file_name + +Provided example allows for optimization of alanine dipeptide. diff --git a/examples/ga.py b/examples/ga.py new file mode 100644 index 0000000..564b3fe --- /dev/null +++ b/examples/ga.py @@ -0,0 +1,202 @@ +import numpy as np +import sys + +from fafoom import MoleculeDescription, Structure, selection, print_output,\ + remover_dir, set_default, file2dict +import fafoom.run_utilities as run_util + +# Decide for restart or a simple run. +opt = run_util.simple_or_restart() +p_file = sys.argv[1] +# Build a dictionary from two section of the parameter file. +params = file2dict(p_file, ['GA settings', 'Run settings']) + +dict_default = {'energy_var': 0.001, 'selection': "roulette_wheel", + 'fitness_sum_limit': 1.2, 'popsize': 10, + 'prob_for_crossing': 1.0, 'max_iter': 30, + 'iter_limit_conv': 20, 'energy_diff_conv': 0.001} +# Set defaults for parameters not defined in the parameter file. +params = set_default(params, dict_default) +energy_function = run_util.detect_energy_function(params) + +cnt_max = 200 +population, blacklist = [], [] +min_energy = [] + +if opt == "simple": + mol = MoleculeDescription(p_file) + # Assign the permanent attributes to the molecule. + mol.get_parameters() + mol.create_template_sdf() + # Check for potential degree of freedom related parameters. + linked_params = run_util.find_linked_params(mol, params) + print_output("Number of atoms: "+str(mol.atoms)) + print_output("Number of bonds: "+str(mol.bonds)) + for dof in mol.dof_names: + print_output("Number of identified "+str(dof)+": " + + str(len(getattr(mol, dof)))) + print_output("Identified "+str(dof)+": "+str(getattr(mol, dof))) + + print_output("___Initialization___") + cnt = 0 + # Generate sensible and unique 3d structures. + while len(population) < params['popsize'] and cnt < cnt_max: + print_output("New trial") + str3d = Structure(mol) + str3d.generate_structure() + if not str3d.is_geometry_valid(): + print_output("The geometry of "+str(str3d)+" is invalid.") + cnt += 1 + continue + if str3d not in blacklist: + name = "initial_%d" % (len(population)) + # Perform the local optimization + run_util.optimize(str3d, energy_function, params, name) + run_util.check_for_kill() + str3d.send_to_blacklist(blacklist) + population.append(str3d) + print_output(str(str3d)+", energy: "+str(float(str3d)) + + ", was added to the population") + run_util.relax_info(str3d) + cnt += 1 + else: + print_output("Geomerty of "+str(str3d)+" is fine, but already " + "known.") + cnt += 1 + if cnt == cnt_max: + print_output("The allowed number of trials for building the " + "population has been exceeded. The code terminates.") + sys.exit(0) + print_output("___Initialization completed___") + population.sort() + print_output("Initial population after sorting: ") + for i in range(len(population)): + print_output(str(population[i])+" "+str(float(population[i]))) + min_energy.append(population[0].energy) + print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) + iteration = 0 + + +if opt == "restart": + # Reconstruct the molecule, population, blacklist and the state of the run. + print_output(" \n ___Restart will be performed___") + with open("backup_mol.dat", 'r') as inf: + mol = eval(inf.readline()) + inf.close() + with open("backup_population.dat", 'r') as inf: + for line in inf: + population.append(eval(line)) + inf.close() + with open("backup_blacklist.dat", 'r') as inf: + for line in inf: + blacklist.append(eval(line)) + inf.close() + with open("backup_min_energy.dat", 'r') as inf: + for line in inf: + min_energy.append(eval(line)) + inf.close() + with open("backup_iteration.dat", 'r') as inf: + iteration_tmp = eval(inf.readline()) + inf.close() + linked_params = run_util.find_linked_params(mol, params) + population.sort() + for i in range(len(population)): + print_output(str(population[i])+" "+str(float(population[i]))) + print_output("Blacklist: " + ', '.join([str(v) for v in blacklist])) + iteration = iteration_tmp+1 + print_output(" \n ___Reinitialization completed___") + remover_dir('generation_'+str(iteration)+'_child1') + remover_dir('generation_'+str(iteration)+'_child2') + + +def mutate_and_relax(candidate, name, iteration, cnt_max, **kwargs): + print_output("__%s__" % name) + found = False + cnt = 0 + while found is False and cnt < cnt_max: + candidate_backup = Structure(candidate) + candidate.mutate(**kwargs) + print_output("%s after mutation: " % name + str(candidate)) + run_util.str_info(candidate) + if not candidate.is_geometry_valid(): + print_output(" The geometry of %s is invalid." % name) + cnt += 1 + # Rebuild the structure + candidate = candidate_backup + continue + + if candidate not in blacklist: + name = "generation_%d_%s" % (iteration, name) + run_util.optimize(candidate, energy_function, params, name) + run_util.check_for_kill() + candidate.send_to_blacklist(blacklist) + print_output(str(candidate)+":, energy: "+str(float( + candidate))+", is temporary added to the population") + run_util.relax_info(candidate) + found = True + population.append(candidate) + else: + print_output("Geomerty of "+str(candidate)+" is fine, but already " + "known.") + cnt += 1 + candidate = candidate_backup + if cnt == cnt_max: + raise Exception("The allowed number of trials for generating" + " a unique child has been exceeded.") + +while iteration < params['max_iter']: + print_output(" \n ___Start of iteration " + str(iteration) + "___") + (parent1, parent2, fitness) = selection(population, params['selection'], + params['energy_var'], + params['fitness_sum_limit']) + param = np.random.rand() + cnt = 0 + while param < params['prob_for_crossing'] and cnt < cnt_max: + child1, child2 = Structure.crossover(parent1, parent2) + if child1.is_geometry_valid() and child2.is_geometry_valid(): + print_output("Crossover outcome: "+str(child1)+(", ")+str(child2)) + break + else: + print_output("The geometries created via crossover are invalid.") + cnt += 1 + continue + else: + child1, child2 = Structure(parent1), Structure(parent2) + print_output("No crossover was performed. Children are copies of " + "parents: " + str(child1) + (": ") + str(child1) + + (", ") + str(child2) + (": ") + str(child2)) + # Delete inherited attributes. + for child in child1, child2: + attr_list = ["initial_sdf_string", "energy"] + for attr in attr_list: + delattr(child, attr) + for dof in child.dof: + delattr(dof, "initial_values") + + run_util.str_info(child1) + run_util.str_info(child2) + + try: + mutate_and_relax(child1, "child1", iteration, cnt_max, **linked_params) + except Exception as exc: + print_output(exc) + sys.exit(0) + try: + mutate_and_relax(child2, "child2", iteration, cnt_max, **linked_params) + except Exception as exc: + print_output(exc) + sys.exit(0) + population.sort() + print_output("Sorted population: " + ', '.join([ + str(v) for v in population])) + del population[-1] + del population[-1] + print_output("Sorted population after removing two structures with highest" + " energy: " + ', '.join([str(v) for v in population])) + min_energy.append(population[0].energy) + print_output("Lowest energy of the population: %.3f" % min_energy[-1]) + print_output("Lowest energies in run: "+str(min_energy)) + run_util.perform_backup(mol, population, blacklist, iteration, min_energy) + run_util.check_for_convergence(iteration, params, min_energy) + run_util.check_for_kill() + iteration += 1 diff --git a/examples/parameters_aims.txt b/examples/parameters_aims.txt new file mode 100644 index 0000000..3c8545a --- /dev/null +++ b/examples/parameters_aims.txt @@ -0,0 +1,32 @@ +[Molecule] + +smiles="CC(=O)N[C@H](C(=O)NC)C" +optimize_torsion=True +optimize_cistrans=True +smarts_torsion= "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" +smarts_cistrans= "C~[$(C=O)]-[$(NC)]~[C]" +filter_smarts_torsion= "C~[$(C=O)]-[$(NC)]~[C]" +rmsd_type="cartesian" +distance_cutoff_1=1.2 +distance_cutoff_2=2.15 +rmsd_cutoff_uniq=0.25 +chiral=True + +[GA settings] + +energy_var=0.001 +selection="roulette_wheel" +fitness_sum_limit= 1.2 +popsize= 10 +prob_for_crossing= 0.95 +prob_for_mut_torsion= 0.8 +max_mutations_torsion= 3 + +[Run settings] + +energy_function = "FHI-aims" +sourcedir= "adds" +aims_call= "mpirun -n 4 aims.071914_7.scalapack.mpi.x" +max_iter= 30 +iter_limit_conv= 20 +energy_diff_conv= 0.001 diff --git a/examples/parameters_ff.txt b/examples/parameters_ff.txt new file mode 100644 index 0000000..147cb1e --- /dev/null +++ b/examples/parameters_ff.txt @@ -0,0 +1,31 @@ +[Molecule] + +smiles="CC(=O)N[C@H](C(=O)NC)C" +optimize_torsion=True +optimize_cistrans=True +smarts_torsion= "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" +smarts_cistrans= "C~[$(C=O)]-[$(NC)]~[C]" +filter_smarts_torsion= "C~[$(C=O)]-[$(NC)]~[C]" +rmsd_type="cartesian" +distance_cutoff_1=1.2 +distance_cutoff_2=2.15 +rmsd_cutoff_uniq=0.25 +chiral=True + +[GA settings] + +energy_var=0.001 +selection="roulette_wheel" +fitness_sum_limit= 1.2 +popsize= 10 +prob_for_crossing= 0.95 +prob_for_mut_torsion= 0.8 +max_mutations_torsion= 3 + +[Run settings] + +energy_function = "force_field" +force_field = "mmff94" +max_iter= 30 +iter_limit_conv= 20 +energy_diff_conv= 0.001 diff --git a/examples/parameters_nwchem.txt b/examples/parameters_nwchem.txt new file mode 100644 index 0000000..8e6df4c --- /dev/null +++ b/examples/parameters_nwchem.txt @@ -0,0 +1,34 @@ +[Molecule] + +smiles="CC(=O)N[C@H](C(=O)NC)C" +optimize_torsion=True +optimize_cistrans=True +smarts_torsion= "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" +smarts_cistrans= "C~[$(C=O)]-[$(NC)]~[C]" +filter_smarts_torsion= "C~[$(C=O)]-[$(NC)]~[C]" +rmsd_type="cartesian" +distance_cutoff_1=1.2 +distance_cutoff_2=2.15 +rmsd_cutoff_uniq=0.25 +chiral=True + +[GA settings] + +energy_var=0.001 +selection="roulette_wheel" +fitness_sum_limit= 1.2 +popsize= 10 +prob_for_crossing= 0.95 +prob_for_mut_torsion= 0.8 +max_mutations_torsion= 3 + + +[Run settings] + +energy_function = "nwchem" +functional="pw91lda" +basis_set= "STO-6G" +nwchem_call= "mpirun -n 4 nwchem" +max_iter= 30 +iter_limit_conv= 20 +energy_diff_conv= 0.001 diff --git a/examples/parameters_orca.txt b/examples/parameters_orca.txt new file mode 100644 index 0000000..2ffba9b --- /dev/null +++ b/examples/parameters_orca.txt @@ -0,0 +1,37 @@ +[Molecule] + +smiles="CC(=O)N[C@H](C(=O)NC)C" +optimize_torsion=True +optimize_cistrans=True +smarts_torsion= "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" +smarts_cistrans= "C~[$(C=O)]-[$(NC)]~[C]" +filter_smarts_torsion= "C~[$(C=O)]-[$(NC)]~[C]" +rmsd_type="cartesian" +distance_cutoff_1=1.2 +distance_cutoff_2=2.15 +rmsd_cutoff_uniq=0.25 +chiral=True + +[GA settings] + +energy_var= 0.001 +selection= "roulette_wheel" +fitness_sum_limit= 1.2 +popsize= 10 +prob_for_crossing= 0.95 +prob_for_mut_torsion= 0.8 +max_mutations_torsion= 3 + + +[Run settings] + +energy_function= "orca" +commandline= "opt pbe nopop miniprint" +chargemult= "0 1" +optsteps = 200 +nprocs= 4 +memory= 4000 +orca_call= "/full/path/to/orca" +max_iter= 100 +iter_limit_conv= 20 +energy_diff_conv= 0.001 diff --git a/fafoom/__init__.py b/fafoom/__init__.py index 6382432..80bff83 100644 --- a/fafoom/__init__.py +++ b/fafoom/__init__.py @@ -1,9 +1,8 @@ from structure import MoleculeDescription, Structure -from angle import angle_measure, angle_set -from genetic_operations import selection, crossover, mutation_tor,\ - mutation_cistrans -from new_molecule_parametrize import parametrize, template_sdf +from genetic_operations import selection, crossover from pyaims import AimsObject from pyff import FFObject from pynwchem import NWChemObject +from pyorca import OrcaObject from utilities import * +from run_utilities import * diff --git a/fafoom/angle.py b/fafoom/angle.py deleted file mode 100644 index a60d95c..0000000 --- a/fafoom/angle.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2015 Adriana Supady -# -# This file is part of fafoom. -# -# Fafoom is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Fafoom is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with fafoom. If not, see . -"""Measure and set torsions values.""" - -from operator import itemgetter -from rdkit import Chem -from rdkit.Chem import rdMolTransforms - - -def ig(x): - return itemgetter(x) - - -def angle_measure(sdf_string, cistrans, torsion): - """Return the dihedral angles (in degrees). - - Args: - sdf_string - cistrans (list of tuples): positions of cistrans bonds - torsion (list of tuples): positions of rotatable bonds - Returns: - list of values of cistrans bonds and list of values of rotatable - bonds in deg - Raises: - ValueError: if both lists are empty - - """ - if len(cistrans) == 0 and len(torsion) == 0: - raise ValueError("There are no rotatable bonds and no cis/trans bonds") - mol = Chem.MolFromMolBlock(sdf_string, removeHs=False) - measured_torsion = [] - measured_cistrans = [] - if len(cistrans) != 0: - for x in range(len(cistrans)): - tmp = float(rdMolTransforms.GetDihedralDeg( - mol.GetConformer(), - ig(0)(cistrans[x]), ig(1)(cistrans[x]), - ig(2)(cistrans[x]), ig(3)(cistrans[x]))) - measured_cistrans.append(float('{0:.2f}'.format(tmp))) - if len(torsion) != 0: - for y in range(len(torsion)): - tmp = float(rdMolTransforms.GetDihedralDeg( - mol.GetConformer(), - ig(0)(torsion[y]), ig(1)(torsion[y]), - ig(2)(torsion[y]), ig(3)(torsion[y]))) - measured_torsion.append(float('{0:.2f}'.format(tmp))) - - return measured_cistrans, measured_torsion - - -def angle_set(sdf_string, cistrans, torsion, values_cistrans_n, values_tor_n): - """Return a modifies sdf_string after setting torsions. - - Args: - sdf_string - cistrans (list of tuples): positions of cistrans bonds - torsion (list of tuples): positions of rotatable bonds - values_cistrans_n: cistrans bonds values to set (in degrees) - values_tor_n: rotatable bonds values to set (in degrees) - Returns: - modified sdf_string - Raises: - ValueError: if both lists of positions are empty - ValueError: if lengths of the corresponding lists do not match - - """ - if len(cistrans) == 0 and len(torsion) == 0: - raise ValueError("There are no rotatable bonds and no cis/trans bonds") - if len(cistrans) != len( - values_cistrans_n) or len(torsion) != len(values_tor_n): - raise ValueError("No length match between the values and positions") - mol = Chem.MolFromMolBlock(sdf_string, removeHs=False) - if len(torsion) != 0: - for x in range(len(torsion)): - rdMolTransforms.SetDihedralDeg( - mol.GetConformer(), ig(0)(torsion[x]), - ig(1)(torsion[x]), ig(2)(torsion[x]), - ig(3)(torsion[x]), ig(x)(values_tor_n)) - if len(cistrans) != 0: - for x in range(len(cistrans)): - rdMolTransforms.SetDihedralDeg( - mol.GetConformer(), ig(0)(cistrans[x]), - ig(1)(cistrans[x]), ig(2)(cistrans[x]), - ig(3)(cistrans[x]), ig(x)(values_cistrans_n)) - - return Chem.MolToMolBlock(mol) diff --git a/fafoom/deg_of_freedom.py b/fafoom/deg_of_freedom.py new file mode 100644 index 0000000..98edbe1 --- /dev/null +++ b/fafoom/deg_of_freedom.py @@ -0,0 +1,423 @@ +# Copyright 2015 Adriana Supady +# +# This file is part of fafoom. +# +# Fafoom is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Fafoom is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with fafoom. If not, see . +''' Handle the degrees of freedom.''' +from __future__ import division +import math +from copy import copy +from random import choice +from rdkit import Chem + +from utilities import ig, cleaner, get_vec, tor_rmsd, find_one_in_list +from measure import ( + dihedral_measure, + dihedral_set, + pyranosering_measure, + pyranosering_set +) + +from genetic_operations import mutation + + +class DOF: + + def __init__(self, name): + self.name = name + + def common_function(): + pass + + +class Torsion(DOF): + """ Find, create and handle rotatable bonds""" + # Rotatable Bonds can freely rotate around themselves + values_options = range(-179, 181, 1) + + @staticmethod + def find(smiles, smarts_torsion="[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]", + filter_smarts_torsion=None, positions=None): + """Find the positions of rotatable bonds in the molecule. + + Args(required): + smiles (str) + Arge(optional) + smarts_torion (str) : pattern defintion for the torsions, if not + defined, a default pattern "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" + will be used + filter_smarts_torsion (str): pattern defition for the torsion to be + ignored + positions (list of tuples) : if the positions (in terms of atom + indicies) of the torsions is known, they can be passed directly + """ + if positions is None: + mol = Chem.MolFromSmiles(smiles) + if mol is None: + raise ValueError("The smiles is invalid") + pattern_tor = Chem.MolFromSmarts(smarts_torsion) + torsion = list(mol.GetSubstructMatches(pattern_tor)) + + if filter_smarts_torsion: + pattern_custom = Chem.MolFromSmarts(filter_smarts_torsion) + custom = list(mol.GetSubstructMatches(pattern_custom)) + to_del_bef_custom = [] + + for x in reversed(range(len(torsion))): + for y in reversed(range(len(custom))): + ix1, ix2 = ig(1)(torsion[x]), ig(2)(torsion[x]) + iy1, iy2 = ig(1)(custom[y]), ig(2)(custom[y]) + if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and + ix2 == iy1): + to_del_bef_custom.append(x) + + custom_torsion = copy(torsion) + custom_torsion = [v for i, v in enumerate(custom_torsion) + if i not in set(to_del_bef_custom)] + torsion = custom_torsion + + positions = cleaner(torsion) + + return positions + + def __init__(self, positions): + """Initialaize the Torsion object from the positions.""" + self.type = "torsion" + self.positions = positions + + def get_random_values(self): + """Generate a random value for each of the positions of the Torsion + object""" + self.values = [choice(Torsion.values_options) + for i in range(len(self.positions))] + + def get_weighted_values(self, weights): + """Generate a random, but weighted value for each of the positions of + the Torsion object. + + Args: + weights (list): weights for all allowed values options, if it is + not of the length of the values options, random values will be + generated + """ + if len(weights) == len(Torsion.values_options): + self.values = [Torsion.values_options[find_one_in_list(sum( + weights), weights)] + for i in range(len(self.positions))] + else: + self.values = [choice(Torsion.values_options) + for i in range(len(self.positions))] + + def apply_on_string(self, string, values_to_set=None): + """Adjust the sdf string to match the values of the Torsion object. + + Args(required): + sdf_string + Args(optional): + values_to_set (list) : a list of values to be set can be passed + directly + """ + if values_to_set is not None: + self.values = values_to_set + for i in range(len(self.positions)): + string = dihedral_set(string, self.positions[i], + self.values[i]) + return string + + def mutate_values(self, max_mutations=None, weights=None): + """Call for a mutation of the list of Torsion object values + + Args (optional): + max_mutations (int): maximal number of mutations to be performed on + the values list + weights (list) : weights for the values_options + """ + if max_mutations is None: + max_mutations = max(1, int(math.ceil(len(self.values)/2.0))) + self.values = mutation(self.values, max_mutations, + Torsion.values_options, weights, periodic=True) + + def update_values(self, string): + """Measure and update the Torsion object values. + + Args: + sdf_string + """ + updated_values = [] + for i in range(len(self.positions)): + updated_values.append(dihedral_measure(string, + self.positions[i])) + self.values = updated_values + + def is_equal(self, other, threshold, chiral=True): + """Decide if the values of two Torsion objects are equal or not + (based on Torsional RMSD). If the objects have the "initial_values" + attributes, they will be taken into account too. + + Args(optional): + chiral (bool): default-True, if set to False, mirror image of the + structure will be considered too + """ + + values = [] + values.append(tor_rmsd(2, get_vec(self.values, other.values))) + + if hasattr(other, "initial_values"): + values.append(tor_rmsd(2, get_vec(self.values, + other.initial_values))) + if not chiral: + values.append(tor_rmsd(2, get_vec(self.values, + [-1*i for i in other.values]))) + if hasattr(other, "initial_values"): + values.append(tor_rmsd(2, get_vec(self.values, [-1*i for i in + other.initial_values]))) + if min(values) > threshold: + return False + else: + return True + + +class PyranoseRing(DOF): + + # 0,1: chairs; 2-7:boats; 8-13:skewboats; 14-25:halfchairs; 26-37:envelopes + dict_for_ring_dih = {'0': [60.0, -60.0, 60.0, -60.0, 60.0, -60.0], + '1': [-60.0, 60.0, -60.0, 60.0, -60.0, 60.0], + '2': [0.0, 60.0, -60.0, 0.0, 60.0, -60.0], + '3': [60.0, 0.0, -60.0, 60.0, 0.0, -60.0], + '4': [60.0, -60.0, 0.0, 60.0, -60.0, 0.0], + '5': [0.0, -60.0, 60.0, 0.0, -60.0, 60.0], + '6': [-60.0, 0.0, 60.0, -60.0, 0.0, 60.0], + '7': [-60.0, 60.0, 0.0, -60.0, 60.0, 0.0], + '8': [30.0, 30.0, -60.0, 30.0, 30.0, -60.0], + '9': [60.0, -30.0, -30.0, 60.0, -30.0, -30.0], + '10': [30.0, -60.0, 30.0, 30.0, -60.0, 30.0], + '11': [-30.0, -30.0, 60.0, -30.0, -30.0, 60.0], + '12': [-60.0, 30.0, 30.0, -60.0, 30.0, 30.0], + '13': [-30.0, 60.0, -30.0, -30.0, 60.0, -30.0], + '14': [45.0, -15.0, 0.0, -15.0, 45.0, -60.0], + '15': [60.0, -45.0, 15.0, 0.0, 15.0, -45.0], + '16': [45.0, -60.0, 45.0, -15.0, 0.0, -15.0], + '17': [15.0, -45.0, 60.0, -45.0, 15.0, 0.0], + '18': [0.0, -15.0, 45.0, -60.0, 45.0, -15.0], + '19': [15.0, 0.0, 15.0, -45.0, 60.0, -45.0], + '20': [-15.0, 45.0, -60.0, 45.0, -15.0, 0.0], + '21': [0.0, 15.0, -45.0, 60.0, -45.0, 15.0], + '22': [-15.0, 0.0, -15.0, 45.0, -60.0, 45.0], + '23': [-45.0, 15.0, 0.0, 15.0, -45.0, 60.0], + '24': [-60.0, 45.0, -15.0, 0.0, -15.0, 45.0], + '25': [-45.0, 60.0, -45.0, 15.0, 0.0, 15.0], + '26': [30.0, 0.0, 0.0, -30.0, 60.0, -60.0], + '27': [60.0, -30.0, 0.0, 0.0, 30.0, -60.0], + '28': [60.0, -60.0, 30.0, 0.0, 0.0, -30.0], + '29': [30.0, -60.0, 60.0, -30.0, 0.0, 0.0], + '30': [0.0, -30.0, 60.0, -60.0, 30.0, 0.0], + '31': [0.0, 0.0, 30.0, -60.0, 60.0, -30.0], + '32': [-30.0, 60.0, -60.0, 30.0, 0.0, 0.0], + '33': [0.0, 30.0, -60.0, 60.0, -30.0, 0.0], + '34': [0.0, 0.0, -30.0, 60.0, -60.0, 30.0], + '35': [-30.0, 0.0, 0.0, 30.0, -60.0, 60.0], + '36': [-60.0, 30.0, 0.0, 0.0, -30.0, 60.0], + '37': [-60.0, 60.0, -30.0, 0.0, 0.0, 30.0]} + + dict_for_ring_ang = {'0': [109.5, 109.5, 109.5, 109.5, 109.5], + '1': [109.5, 109.5, 109.5, 109.5, 109.5], + '2': [109.5, 109.5, 109.5, 109.5, 109.5], + '3': [109.5, 109.5, 109.5, 109.5, 109.5], + '4': [109.5, 109.5, 109.5, 109.5, 109.5], + '5': [109.5, 109.5, 109.5, 109.5, 109.5], + '6': [109.5, 109.5, 109.5, 109.5, 109.5], + '7': [109.5, 109.5, 109.5, 109.5, 109.5], + '8': [114.0, 112.9, 112.9, 112.9, 112.9], + '9': [114.0, 114.0, 112.9, 112.9, 112.9], + '10': [112.9, 112.9, 112.9, 112.9, 114.0], + '11': [114.0, 112.9, 112.9, 112.9, 112.9], + '12': [114.0, 114.0, 112.9, 112.9, 112.9], + '13': [112.9, 112.9, 112.9, 112.9, 114.0], + '14': [111.4, 118.2, 118.2, 118.2, 118.2], + '15': [111.4, 111.4, 118.2, 118.2, 118.2], + '16': [118.2, 111.4, 111.4, 118.2, 118.2], + '17': [118.2, 118.2, 111.4, 111.4, 118.2], + '18': [118.2, 118.2, 118.2, 111.4, 111.4], + '19': [118.2, 118.2, 118.2, 118.2, 111.4], + '20': [118.2, 118.2, 111.4, 111.4, 118.2], + '21': [118.2, 118.2, 118.2, 111.4, 111.4], + '22': [118.2, 118.2, 118.2, 118.2, 111.4], + '23': [111.4, 118.2, 118.2, 118.2, 118.2], + '24': [111.4, 111.4, 118.2, 118.2, 118.2], + '25': [118.2, 111.4, 111.4, 118.2, 118.2], + '26': [117.7, 117.7, 117.7, 117.7, 117.7], + '27': [105.1, 117.7, 117.7, 117.7, 117.7], + '28': [117.7, 105.1, 117.7, 117.7, 117.7], + '29': [117.7, 117.7, 105.1, 117.7, 117.7], + '30': [117.7, 117.7, 117.7, 105.1, 117.7], + '31': [117.7, 117.7, 117.7, 117.7, 105.1], + '32': [117.7, 117.7, 105.1, 117.7, 117.7], + '33': [117.7, 117.7, 117.7, 105.1, 117.7], + '34': [117.7, 117.7, 117.7, 117.7, 105.1], + '35': [117.7, 117.7, 117.7, 117.7, 117.7], + '36': [105.1, 117.7, 117.7, 117.7, 117.7], + '37': [117.7, 105.1, 117.7, 117.7, 117.7]} + + values_options = range(0, len(dict_for_ring_dih), 1) + + @staticmethod + def find(smiles, pyranosering_pattern="C1(CCCCO1)O", positions=None): + if positions is None: + mol = Chem.MolFromSmiles(smiles) + if mol is None: + raise ValueError("The smiles is invalid") + pattern_pyranosering = Chem.MolFromSmarts(pyranosering_pattern) + pyranosering = list(mol.GetSubstructMatches(pattern_pyranosering)) + positions = pyranosering + return positions + + def __init__(self, positions): + self.type = "pyranosering" + self.positions = positions + + def get_random_values(self): + self.values = [choice(PyranoseRing.values_options) + for i in range(len(self.positions))] + + def get_weighted_values(self, weights): + if len(weights) == len(PyranoseRing.values_options): + self.values = [PyranoseRing.values_options[find_one_in_list(sum( + weights), weights)] + for i in range(len(self.positions))] + else: + self.values = [choice(PyranoseRing.values_options) + for i in range(len(self.positions))] + + def apply_on_string(self, string, values_to_set=None): + if values_to_set is not None: + self.values = values_to_set + for i in range(len(self.positions)): + val_dih = PyranoseRing.dict_for_ring_dih[str( + int(self.values[i]))][:5] + val_ang = PyranoseRing.dict_for_ring_ang[str( + int(self.values[i]))][:5] + string = pyranosering_set(string, self.positions[i], val_dih, + val_ang) + return string + + def update_values(self, string): + updated_values = [] + for i in range(len(self.positions)): + updated_values.append(pyranosering_measure(string, + self.positions[i], + PyranoseRing.dict_for_ring_dih)) + self.values = updated_values + + def mutate_values(self, max_mutations=None, weights=None): + if max_mutations is None: + max_mutations = max(1, int(math.ceil(len(self.values)/2.0))) + self.values = mutation(self.values, max_mutations, + PyranoseRing.values_options, weights, + periodic=False) + + def is_equal(self, other, threshold, chiral=True): + values = [] + tmp = [] + for i in get_vec(self.values, other.values): + if i == 0: + tmp.append(0) + else: + tmp.append(1) + values.append(sum(tmp)/len(tmp)) + if hasattr(other, "initial_values"): + tmp = [] + for i in get_vec(self.values, other.initial_values): + if i == 0: + tmp.append(0) + else: + tmp.append(1) + values.append(sum(tmp)/len(tmp)) + if min(values) > threshold: + return False + else: + return True + + +class CisTrans(DOF): + values_options = [0.0, 180.0] + + @staticmethod + def find(smiles, smarts_cistrans=None, positions=None): + if positions is None: + mol = Chem.MolFromSmiles(smiles) + if mol is None: + raise ValueError("The smiles is invalid") + pattern_cistrans = Chem.MolFromSmarts(smarts_cistrans) + cistrans = list(mol.GetSubstructMatches(pattern_cistrans)) + positions = cleaner(cistrans) + return positions + + def __init__(self, positions): + self.type = "cistrans" + self.positions = positions + + def apply_on_string(self, string, values_to_set=None): + + if values_to_set is not None: + self.values = values_to_set + + for i in range(len(self.positions)): + string = dihedral_set(string, self.positions[i], self.values[i]) + return string + + def update_values(self, string): + updated_values = [] + for i in range(len(self.positions)): + updated_values.append(dihedral_measure(string, self.positions[i])) + self.values = updated_values + + def get_random_values(self): + self.values = [choice(CisTrans.values_options) + for i in range(len(self.positions))] + + def get_weighted_values(self, weights): + if len(weights) == len(CisTrans.values_options): + self.values = [CisTrans.values_options[find_one_in_list(sum( + weights), weights)] + for i in range(len(self.positions))] + else: + self.values = [choice(CisTrans.values_options) + for i in range(len(self.positions))] + + def mutate_values(self, max_mutations=None, weights=None): + + if max_mutations is None: + max_mutations = max(1, int(math.ceil(len(self.values)/2.0))) + + self.values = mutation(self.values, max_mutations, + CisTrans.values_options, weights, periodic=True) + + def is_equal(self, other, threshold, chiral=True): + values = [] + values.append(tor_rmsd(2, get_vec(self.values, other.values))) + + if hasattr(other, "initial_values"): + values.append(tor_rmsd(2, get_vec(self.values, + other.initial_values))) + + if not chiral: + values.append(tor_rmsd(2, get_vec(self.values, + [-1*i for i in other.values]))) + if hasattr(other, "initial_values"): + values.append(tor_rmsd(2, get_vec(self.values, + [-1*i for i in other.initial_values]))) + if min(values) > threshold: + return False + else: + return True diff --git a/fafoom/genetic_operations.py b/fafoom/genetic_operations.py index 6f6b650..4e46b5c 100644 --- a/fafoom/genetic_operations.py +++ b/fafoom/genetic_operations.py @@ -16,11 +16,16 @@ # along with fafoom. If not, see . """Collection of genetic operations.""" - +from __future__ import division import numpy as np import random -from utilities import find_two_in_list, print_output +from utilities import ( + find_closest, + find_one_in_list, + find_two_in_list, + print_output +) def selection(pop_list, selection_type, energy_range, fitness_sum_limit): @@ -59,22 +64,22 @@ def selection(pop_list, selection_type, energy_range, fitness_sum_limit): if selection_type == "roulette_wheel": if fitness_sum > fitness_sum_limit: x, y = find_two_in_list(fitness_sum, fitness) - parent1 = pop_list[x-1] - parent2 = pop_list[y-1] + parent1 = pop_list[x] + parent2 = pop_list[y] else: # if the sum is below the limit, best and a random are selected parent1 = pop_list[0] - parent2 = pop_list[int(np.ceil(np.random.rand()*(len(pop_list)-1)))] + parent2 = pop_list[int(random.choice(range(1, len(pop_list))))] if selection_type == "roulette_wheel_reverse": if fitness_sum > fitness_sum_limit: # in order to prevent num problems fitness_rev = np.zeros(len(fitness)) for i in range(len(fitness)): fitness_rev[-(i+1)] = fitness[i] # swapping of fitness values x, y = find_two_in_list(fitness_sum, fitness_rev) - parent1 = pop_list[x-1] - parent2 = pop_list[y-1] + parent1 = pop_list[x] + parent2 = pop_list[y] else: parent1 = pop_list[-1] - parent2 = pop_list[int(np.ceil(np.random.rand()*(len(pop_list)-1))-1)] + parent2 = pop_list[int(random.choice(range(1, len(pop_list)))-1)] if selection_type == "random": parents_ind = random.sample(xrange(len(pop_list)), 2) parent1 = pop_list[parents_ind[0]] @@ -89,7 +94,7 @@ def crossover(list1, list2): list1 (list): list of values list2 (list): list of values Returns: - two numpy arrays + two lists (converted numpy arrays) Raises: ValueError: if the length of the lists differ """ @@ -110,60 +115,54 @@ def crossover(list1, list2): for n in range(len(end2)): end2[ind] = list1[cross_point+n] ind += 1 - new_list1 = np.append(start1, end1) - new_list2 = np.append(start2, end2) + new_list1 = list(np.append(start1, end1)) + new_list2 = list(np.append(start2, end2)) return new_list1, new_list2 else: return list1, list2 -def mutation_tor(list_for_mut, max_mutations_torsions): - """Mutate a list of torsion values. +def mutation(list_for_mut, max_mutations, options, weights=None, + periodic=False): + """Mutate a list of values. - Args: - list_for_mut (list): list of values - max_mutations_torsions (int): maximal allowed number of mutations + Args(required): + list_for_mut (list): list of values to be mutated + max_mut: maximal number of changes to be made + options: list of options for new values + Args(optional): + weights (list): weights for options + periodic (bool) Returns: - mutated list; integers from (-180,179) are allowed as new values - Raises: - TypeError: if the max_mutations_torsions is not an integer - ValueError: if the max_mutations_torsions is negative + mutated list """ - if not isinstance(max_mutations_torsions, int): - raise TypeError("The maximal number of mutations needs to be an integer") - if max_mutations_torsions < 0: - raise ValueError("The maximal number of mutations cannot be negative") - mut_numb = int(np.ceil(np.random.rand()*max_mutations_torsions)) - newtor1 = [] - for p in range(mut_numb): - newtor1.append(np.ceil(np.random.rand()*359.0)-180.0) - mutp2 = int(np.ceil(np.random.rand()*len(list_for_mut))) - list_for_mut[mutp2-1] = newtor1[p] - return list_for_mut + if not isinstance(max_mutations, int): + raise TypeError("The max. number of mutations needs to be an integer") + if max_mutations < 0: + raise ValueError("The max. number of mutations cannot be negative") + mut_numb = random.randint(1, min(max_mutations, len(list_for_mut))) + pos = random.sample(range(len(list_for_mut)), mut_numb) + for p in pos: + current_value = list_for_mut[p] + banned = find_closest(current_value, options, periodic) + cnt = 0 + while cnt < 100: + if weights is None: + new_value = random.sample(options, 1)[0] + else: + new_value = options[find_one_in_list(sum(weights), weights)] -def mutation_cistrans(list_for_mut, max_mutations_cistrans): - """Mutate a list of cistrans values. + if len(banned) != len(options): + if new_value not in banned: + list_for_mut[p] = new_value + break + else: + cnt += 1 + else: + # for very rare cases, i.e. if there are only two options + # and the value is the mean of them! + list_for_mut[p] = new_value + break - Args: - list_for_mut (list): list of values - max_mutations_cistrans (int): maximal allowed number of mutations - Returns: - mutated list; values from (-90,90> are changed to 180; otherwise to 0 - Raises: - TypeError: if the max_mutations_cistrans is not an integer - ValueError: if the max_mutations_cistrans is negative - """ - if not isinstance(max_mutations_cistrans, int): - raise TypeError("The maximal number of mutations needs to be an integer") - if max_mutations_cistrans < 0: - raise ValueError("The maximal number of mutations cannot be negative") - mut_numb = int(np.ceil(np.random.rand()*max_mutations_cistrans)) - for p in range(mut_numb): - mutp1 = int(np.ceil(np.random.rand()*len(list_for_mut))) - wert = list_for_mut[mutp1-1] - if wert > 90.0 or wert < -90.0: - list_for_mut[mutp1-1] = 0.0 - else: - list_for_mut[mutp1-1] = 180.0 return list_for_mut diff --git a/fafoom/get_parameters.py b/fafoom/get_parameters.py new file mode 100644 index 0000000..befe023 --- /dev/null +++ b/fafoom/get_parameters.py @@ -0,0 +1,131 @@ +# Copyright 2015 Adriana Supady +# +# This file is part of fafoom. +# +# Fafoom is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Fafoom is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with fafoom. If not, see . +''' Communicate between the structure and the degrees of freedom.''' +from __future__ import division +from rdkit import Chem +from rdkit.Chem import AllChem +from deg_of_freedom import Torsion, CisTrans, PyranoseRing +from utilities import check_geo_sdf + + +def get_atoms_and_bonds(smiles): + """Build the molecule from SMILES and return the number of atoms and bonds. + + Args(required): + smiles (str): one-line representation of the molecule + Returns: + Number of atoms, number of bonds + """ + mol = Chem.MolFromSmiles(smiles) + if mol is None: + raise ValueError("The smiles is invalid") + mol = Chem.AddHs(mol) + return mol.GetNumAtoms(), mol.GetNumBonds() + + +def get_positions(type_of_deg, smiles, **kwargs): + """Find the positions (tuples of atom indicies) of the degrees of freedom. + + Args(required): + type_of_deg (str) + smiles (str) + if cistrans should be optimized: + smarts_cistrans + Args(optimal): + list_of_torsion (list) + smarts_torsion (str) + filter_smarts_torsion (str) + list_of_cistrans (list) + list_of_pyranosering (list) + Returns: + list of touples defining the positions of the degree of freedom + """ + + if type_of_deg == "torsion": + if 'list_of_torsion' in kwargs: + return Torsion.find(smiles, positions=kwargs['list_of_torsion']) + else: + if 'smarts_torsion' in kwargs: + if 'filter_smarts_torsion' in kwargs: + return Torsion.find(smiles, + smarts_torsion=kwargs['smarts_torsion'], + filter_smarts_torsion= + kwargs['filter_smarts_torsion']) + else: + return Torsion.find(smiles, + smarts_torsion=kwargs['smarts_torsion']) + else: + return Torsion.find(smiles) + if type_of_deg == "cistrans": + if 'list_of_cistrans' in kwargs: + return CisTrans.find(smiles, positions=kwargs['list_of_cistrans']) + else: + return CisTrans.find(smiles, + smarts_cistrans=kwargs['smarts_cistrans']) + + if type_of_deg == "pyranosering": + if 'list_of_pyranosering' in kwargs: + return PyranoseRing.find(smiles, + positions=kwargs['list_of_pyranosering']) + else: + return PyranoseRing.find(smiles) + + +def create_dof_object(type_of_deg, positions): + """Initialize the degree of freedom from the positions + + Args: + type_of_deg (str) + positsion (list) + Returns: + degree of freedom object + """ + if type_of_deg == "torsion": + return Torsion(positions) + if type_of_deg == "cistrans": + return CisTrans(positions) + if type_of_deg == "pyranosering": + return PyranoseRing(positions) + + +def template_sdf(smiles, distance_cutoff_1, distance_cutoff_2): + """Create a template sdf string and writes it to file. + + Args(required): + smiles (str): one-line representation of the molecule + Args(optional): + distance_cutoff_1 (float): min distance between non-bonded atoms [A] + distance_cutoff_2 (float): max distance between bonded atoms [A] + Returns: + sdf string + """ + cnt = 0 + sdf_check = True + while sdf_check: + mol = Chem.MolFromSmiles(smiles) + mol = Chem.AddHs(mol) + AllChem.EmbedMolecule(mol) + AllChem.UFFOptimizeMolecule(mol) + Chem.SDWriter('mol.sdf').write(mol) + sdf_string = Chem.MolToMolBlock(mol) + check = check_geo_sdf(sdf_string, distance_cutoff_1, distance_cutoff_2) + if check: + sdf_check = False + Chem.SDWriter('mol.sdf').write(mol) + else: + cnt += 1 + return sdf_string diff --git a/fafoom/measure.py b/fafoom/measure.py new file mode 100644 index 0000000..f5d9458 --- /dev/null +++ b/fafoom/measure.py @@ -0,0 +1,406 @@ +# Copyright 2015 Adriana Supady & Mateusz Marianski +# +# This file is part of fafoom. +# +# Fafoom is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Fafoom is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with fafoom. If not, see . +"""Measure and set dihedral angles and rings.""" +from __future__ import division +from operator import itemgetter +import numpy as np + +from rdkit import Chem +from rdkit.Chem import rdMolTransforms + +from utilities import get_vec, tor_rmsd, xyz2sdf + + +def ig(x): + return itemgetter(x) + + +def dihedral_measure(sdf_string, position): + """ Measure the dihedral angle. + + Args: + sdf_string (string) + position (list): 4 atoms defining the dihedral + Returns: + float value + Raises: + ValueError: If the lenght of the list is not equal 4. + """ + if len(position) != 4: + raise ValueError("The position needs to be defined by 4 integers") + mol = Chem.MolFromMolBlock(sdf_string, removeHs=False) + val = float(rdMolTransforms.GetDihedralDeg( + mol.GetConformer(), + ig(0)(position), ig(1)(position), + ig(2)(position), ig(3)(position))) + return float('{0:.2f}'.format(val)) + + +def dihedral_set(sdf_string, position, value): + """ Set the dihedral angle. + + Args: + sdf_string (string): + position (list): 4 atoms defining the dihedral + value : value to set + Returns: + modified sdf_string + Raises: + ValueError: If the lenght of the list is not equal 4. + """ + if len(position) != 4: + raise ValueError("The position needs to be defined by 4 integers") + mol = Chem.MolFromMolBlock(sdf_string, removeHs=False) + rdMolTransforms.SetDihedralDeg(mol.GetConformer(), ig(0)(position), + ig(1)(position), ig(2)(position), + ig(3)(position), value) + + return Chem.MolToMolBlock(mol) + + +def pyranosering_set(sdf_string, position, new_dih, new_ang): + """ Set the pyranosering. + + Args: + sdf_string (string) + position (list): 7 atoms defining the ring, i.e. positions of + ['C0','C1','C2','C3','C4','O', 'O0'] + new_dih (list) : 5 values for the dihedral angles + new_ang (list): 5 values for the bond angles + Returns: + modified sdf_string + Raises: + ValueError: If the lenght of the position is not equal 7 ot if the + length of new_dih/new_ang is not equal to 5. + """ + if len(position) != 7: + raise ValueError("The position needs to be defined by 7 integers") + if len(new_dih) != 5: + raise ValueError("Five dihedral angles are needed for the new ring " + "conformation.") + if len(new_ang) != 5: + raise ValueError("Five bond angles are needed for the new ring " + "conformation.") + + from scipy.linalg import expm3 + + atoms_ring = {} + for n, name in zip(range(len(position)), + ['C0', 'C1', 'C2', 'C3', 'C4', 'O', 'O0']): + atoms_ring[name] = position[n] + + def initialize(sdf_string): + molecule = Chem.MolFromMolBlock(sdf_string, removeHs=False) + return molecule + + def calculate_normal_vector(list_of_atoms, xyz): + """Calculate the normal vector of a plane by + cross product of two vectors belonging to it. + + Args: + list_of_atoms: list of 3 atoms + xyz: numpy array with atoms xyz position + """ + + r0 = xyz[list_of_atoms[1], :] - xyz[list_of_atoms[0], :] + r1 = xyz[list_of_atoms[2], :] - xyz[list_of_atoms[1], :] + cross_product = np.cross(r1, r0) + + return cross_product + + def measure_angle(list_of_atoms, xyz): + """Calculate an angle between three atoms: + angle = acos(dot(X,Y)/(norm(X)*norm(Y))) + + Args: + list_of_atoms: list of 3 atoms + xyz: numpy array with atoms xyz positions + """ + + r0 = xyz[list_of_atoms[0], :] - xyz[list_of_atoms[1], :] + r1 = xyz[list_of_atoms[2], :] - xyz[list_of_atoms[1], :] + + norm_r0 = np.sqrt(np.sum(r0**2)) + norm_r1 = np.sqrt(np.sum(r1**2)) + norm = norm_r0*norm_r1 + + dot_product = np.dot(r0, r1)/norm + angle = np.arccos(dot_product) + + #Calculate the axis of rotation (axor): + axor = np.cross(r0, r1) + + return angle*180.0/np.pi, axor + + def measure_dihedral(list_of_atoms, xyz): + """Calculate a dihedral angle between two planes defined by + a list of four atoms. It returns the angle and the rotation axis + required to set a new dihedral. + + Args: + list_of_atoms: list of 4 atoms + xyz: numpy array with atom xyz positions + """ + + plane1 = calculate_normal_vector(list_of_atoms[:3], xyz) + plane2 = calculate_normal_vector(list_of_atoms[1:], xyz) + #Calculate the axis of rotation (axor) + axor = np.cross(plane1, plane2) + + #Calculate a norm of normal vectors: + norm_plane1 = np.sqrt(np.sum(plane1**2)) + norm_plane2 = np.sqrt(np.sum(plane2**2)) + norm = norm_plane1 * norm_plane2 + #Measure the angle between two planes: + dot_product = np.dot(plane1, plane2)/norm + alpha = np.arccos(dot_product) + + #The cosine function is symetric thus, to distinguish between + #negative and positive angles, one has to calculate if the fourth + #point is above or below the plane defined by first 3 points: + ppoint = - np.dot(plane1, xyz[list_of_atoms[0], :]) + dpoint = (np.dot(plane1, xyz[list_of_atoms[3], :])+ppoint)/norm_plane1 + if dpoint >= 0: + return -(alpha*180.0)/np.pi, axor + else: + return (alpha*180.0)/np.pi, axor + + def determine_carried_atoms(at1, at2, conn_mat): + """Find all atoms necessary to be carried over during rotation + of an atom 2: + + Args: + at1, at2: two atoms number + """ + + #1. Zero the connections in connectivity matrix + tmp_conn = np.copy(conn_mat) + tmp_conn[at1, at2] = 0 + tmp_conn[at2, at1] = 0 + #2. Determine the connected atoms: + carried_atoms = [at2] + s = True + while s: + s = False + #Always iterate over entire list because I might have branching + for at in carried_atoms: + #List of indexes of connected atoms: + conn_atoms = np.where(tmp_conn[at] != 0)[0] + conn_atoms.tolist + for x in conn_atoms: + if x not in carried_atoms: + carried_atoms.append(x) + s = True + return carried_atoms + + def set_angle(list_of_atoms, new_ang, atoms_ring, xyz, conn_mat): + """Set a new angle between three atoms + + Args: + list_of_atoms: list of three atoms + new_ang: value of dihedral angle (in degrees) to be set + atoms_ring: dictionary of atoms in the ring. It recognizes + if the last atom is 'C0O' (obsolete) + xyz: numpy array with atoms xyz positions + conn_mat: connectivity matrix + Returns: + xyz: modified numpy array with new atoms positions + """ + #Determine the axis of rotation: + old_ang, axor = measure_angle(list_of_atoms, xyz) + norm_axor = np.sqrt(np.sum(axor**2)) + normalized_axor = axor/norm_axor + + #Determine which atoms should be dragged along with the bond: + carried_atoms = determine_carried_atoms(list_of_atoms[1], + list_of_atoms[2], conn_mat) + + #Each carried_atom is rotated by euler-rodrigues formula: + #Also, I move the midpoint of the bond to the mid atom + #the rotation step and then move the atom back. + + rot_angle = np.pi*(new_ang - old_ang)/180. + #Shake it, baby! Rotation matrix: + #print old_ang, new_ang, rot_angle*180./np.pi + rot1 = expm3(np.cross(np.eye(3), normalized_axor*rot_angle)) + translation = xyz[list_of_atoms[1], :] + for at in carried_atoms: + xyz[at, :] = np.dot(rot1, xyz[at, :]-translation) + xyz[at, :] = xyz[at, :]+translation + return xyz + + def set_dihedral(list_of_atoms, new_dih, atoms_ring, xyz, conn_mat): + """Set a new dihedral angle between two planes defined by + atoms first and last three atoms of the supplied list. + + Args: + list_of_atoms: list of four atoms + new_dih: value of dihedral angle (in degrees) to be set + atoms_ring: dictionary of atoms in the ring. It recognizes + if the last atom is 'C0O' + xyz: numpy array with atoms xyz positions + conn_mat: connectivity matrix + Returns: + xyz: modified numpy array with new atoms positions + """ + + #Determine the axis of rotation: + old_dih, axor = measure_dihedral(list_of_atoms, xyz) + norm_axor = np.sqrt(np.sum(axor**2)) + normalized_axor = axor/norm_axor + + #Check if the bond is the last bond, next to broken one. + #If yes, refer to the oxygen: + if 'O0a' in atoms_ring.keys(): + if list_of_atoms[-1] == atoms_ring['O0a']: + new_dih += 120.0 + else: + if list_of_atoms[-1] == atoms_ring['O0b']: + new_dih -= 120.0 + #Determine which atoms should be dragged along with the bond: + carried_atoms = determine_carried_atoms(list_of_atoms[1], + list_of_atoms[2], conn_mat) + #Each carried_atom is rotated by Euler-Rodrigues formula: + #Reverse if the angle is less than zero, so it rotates in + #right direction. + #Also, I move the midpoint of the bond to the center for + #the rotation step and then move the atom back. + + if old_dih >= 0.0: + rot_angle = np.pi*(new_dih - old_dih)/180. + else: + rot_angle = -np.pi*(new_dih - old_dih)/180. + #Shake it, baby! Rotation matrix: + rot1 = expm3(np.cross(np.eye(3), normalized_axor*rot_angle)) + translation = (xyz[list_of_atoms[1], :]+xyz[list_of_atoms[2], :])/2 + for at in carried_atoms: + xyz[at, :] = np.dot(rot1, xyz[at, :]-translation) + xyz[at, :] = xyz[at, :]+translation + + return xyz + + def mutate_ring(molecule, new_dih, new_ang): + """Mutate a ring to given conformation defined as a list of torsional + angles accoring to the 10.1016/S0040-4020(00)01019-X (IUPAC) paper + """ + n_at = molecule.GetNumAtoms() + n_bonds = molecule.GetNumBonds() + m_string = Chem.MolToMolBlock(molecule) + + #Split the string to xyz, connectivity matrix and atom list + m_coords = m_string.split('\n')[4:4+n_at] + xyz = np.zeros((n_at, 3)) + atom_list = [] + n = 0 + for line in m_coords: + xyz[n, :] += np.array(map(float, line.split()[:3])) + atom_list.append(line.split()[3]) + n += 1 + #Molecule Connectivity Matrix + m_conn = m_string.split('\n')[4+n_at:4+n_at+n_bonds] + conn_mat = np.zeros((n_at, n_at)) + for line in m_conn: + at1 = int(line.split()[0]) + at2 = int(line.split()[1]) + conn_mat[at1-1, at2-1] = 1 + conn_mat[at2-1, at1-1] = 1 + + #Introduce a cut between ring C0 and C1: + #I chose these atoms according to the torsion + #definitions in the IUPAC paper + #doi: 10.1016/S0040-4020(00)01019-X + conn_mat[atoms_ring['C0'], atoms_ring['C1']] = 0 + conn_mat[atoms_ring['C1'], atoms_ring['C0']] = 0 + + #Construct a list of atoms in order: + #C0, C1, C2, C3, C4, O, C0, O0a/b (oxygen at anomeric carbon) + #I use this list to rotate bonds. + atoms_list = [] + for x in range(0, 5): + atoms_list.append(atoms_ring['C'+str(x)]) + atoms_list.append(atoms_ring['O']) + atoms_list.append(atoms_ring['C0']) + atoms_list.append(atoms_ring['O0']) + + #Determine the anomer - alpha/beta, based on improper + #dihedral angle C1-C0-O-O0 + imdih = [] + for at in ['C1', 'C0', 'O', 'O0']: + imdih.append(atoms_ring[at]) + test_anomer = measure_dihedral(imdih, xyz)[0] + if test_anomer > 0.0: + atoms_ring['O0b'] = atoms_ring.pop('O0') + else: + atoms_ring['O0a'] = atoms_ring.pop('O0') + + #Adjust the 'internal' angles in the ring: + for n in range(len(new_ang)): + xyz = set_angle(atoms_list[n:n+3], new_ang[n], atoms_ring, xyz, + conn_mat) + #Rotate the dihedral angles in the ring: + for n in range(len(new_dih)): + xyz = set_dihedral(atoms_list[n:n+4], new_dih[n], atoms_ring, xyz, + conn_mat) + a = [] + a.append("%10s\n" % n_at) + for n in new_dih: + a.append("%10.4f" % n) + a.append("\n") + for n in range(n_at): + a.append("%10s%10.4f%10.4f%10.4f\n" % (atom_list[n], xyz[n, 0], + xyz[n, 1], xyz[n, 2])) + xyz_string = ''.join(a) + return xyz_string + + molecule = initialize(sdf_string) + sdf_string = xyz2sdf(mutate_ring(molecule, new_dih, new_ang), sdf_string) + + return sdf_string + + +def pyranosering_measure(sdf_string, position, dict_of_options): + """Assign the ring to a conformation from the dictionary of options. + + Args: + sdf_string (string) + position (list): 7 atoms defining the ring + dict_of_options (dict) : options for the ring + Returns: + An integer that corresponds to the best matching dict key + Raises: + ValueError: If the lenght of the position is not equal 7. + """ + if len(position) != 7: + raise ValueError("The position needs to be defined by 7 integers") + ang1 = dihedral_measure(sdf_string, position[0:4]) + ang2 = dihedral_measure(sdf_string, position[1:5]) + ang3 = dihedral_measure(sdf_string, position[2:6]) + ang4 = dihedral_measure(sdf_string, (ig(3)(position), ig(4)(position), + ig(5)(position), ig(0)(position))) + ang5 = dihedral_measure(sdf_string, (ig(4)(position), ig(5)(position), + ig(0)(position), ig(1)(position))) + ang6 = dihedral_measure(sdf_string, (ig(5)(position), ig(0)(position), + ig(1)(position), ig(2)(position))) + + all_ang = [ang1, ang2, ang3, ang4, ang5, ang6] + + rmsd_dict = {} + + for key in dict_of_options: + rmsd_dict[key] = (tor_rmsd(2, get_vec(all_ang, dict_of_options[key]))) + + return int(min(rmsd_dict.iteritems(), key=ig(1))[0]) diff --git a/fafoom/new_molecule_parametrize.py b/fafoom/new_molecule_parametrize.py deleted file mode 100644 index cb5bad2..0000000 --- a/fafoom/new_molecule_parametrize.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2015 Adriana Supady -# -# This file is part of fafoom. -# -# Fafoom is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Fafoom is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with fafoom. If not, see . - -"""Create the molecule from the smile code.""" - -from rdkit import Chem -from rdkit.Chem import AllChem -from copy import copy -from operator import itemgetter - -from utilities import check_geo_sdf, boolean - - -def parametrize(smile, custom="false", - smart_torsion="[C,N,O]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[C,N,O]", - smart_cistrans="C~[$(C=O)]-[$(NC)]~[C]", smart_custom=""): - """Build the molecule and return parameters. - - Args(required): - smile (str): one-line representation of the molecule - Args(optional): - custom (str): if true, the selection of torsions will be filtered - smart_torsion (str): search pattern for rotatable bonds [4 atoms!] - smart_cistrans (str): search pattern for cistrans bonds [4 atoms!] - smart_custom (str): search pattern torsions to be ignored [4 atoms!] - Returns: - number of atoms, bonds, lists of tuples: defining torsions, defining - cistrans bonds and, optionally, defining filtered torsions - Raises: - ValueError: if the molecule cannot be build form the smile - ValueError: if custom true and smart_custom not defined - """ - if boolean(custom) and smart_custom == "": - raise ValueError("The custom_pattern is not defined") - mol = Chem.MolFromSmiles(smile) - if mol is None: - raise ValueError("The smile is invalid") - pattern_tor = Chem.MolFromSmarts(smart_torsion) - pattern_cistrans = Chem.MolFromSmarts(smart_cistrans) - torsion = list(mol.GetSubstructMatches(pattern_tor)) - cistrans = list(mol.GetSubstructMatches(pattern_cistrans)) - custom_torsion = [] - - def ig(x): - return itemgetter(x) - - def cleaner(list_to_clean): - for_remove = [] - for x in reversed(range(len(list_to_clean))): - for y in reversed(range(x)): - ix1, ix2 = ig(1)(list_to_clean[x]), ig(2)(list_to_clean[x]) - iy1, iy2 = ig(1)(list_to_clean[y]), ig(2)(list_to_clean[y]) - if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): - for_remove.append(y) - clean_list = [v for i, v in enumerate(list_to_clean) if i not in set(for_remove)] - return clean_list - if boolean(custom): - pattern_custom = Chem.MolFromSmarts(smart_custom) - custom = list(mol.GetSubstructMatches(pattern_custom)) - to_del_bef_custom = [] - - for x in reversed(range(len(torsion))): - for y in reversed(range(len(custom))): - ix1, ix2 = ig(1)(torsion[x]), ig(2)(torsion[x]) - iy1, iy2 = ig(1)(custom[y]), ig(2)(custom[y]) - if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): - to_del_bef_custom.append(x) - - custom_torsion = copy(torsion) - custom_torsion = [v for i, v in enumerate(custom_torsion) if i not in set(to_del_bef_custom)] - custom_torsion = cleaner(custom_torsion) - cistrans = cleaner(cistrans) - torsion = cleaner(torsion) - mol = Chem.AddHs(mol) - atoms = mol.GetNumAtoms() - bonds = mol.GetNumBonds() - - return (atoms, bonds, torsion, cistrans, custom_torsion) - - -def template_sdf(smile, distance_cutoff_1, distance_cutoff_2): - """Create a template sdf string and writes it to file. - - Args: - smile (str): one-line representation of the molecule - distance_cutoff_1 (float): min distance between non-bonded atoms [A] - distance_cutoff_2 (float): max distance between bonded atoms [A] - Returns: - sdf string - """ - cnt = 0 - sdf_check = True - while sdf_check: - mol = Chem.MolFromSmiles(smile) - mol = Chem.AddHs(mol) - AllChem.EmbedMolecule(mol) - AllChem.UFFOptimizeMolecule(mol) - Chem.SDWriter('mol.sdf').write(mol) - sdf_string = Chem.MolToMolBlock(mol) - check = check_geo_sdf(sdf_string, distance_cutoff_1, distance_cutoff_2) - if check: - sdf_check = False - Chem.SDWriter('mol.sdf').write(mol) - else: - cnt += 1 - return sdf_string diff --git a/fafoom/pyaims.py b/fafoom/pyaims.py index 241df7e..9be6e41 100644 --- a/fafoom/pyaims.py +++ b/fafoom/pyaims.py @@ -16,6 +16,7 @@ # along with fafoom. If not, see . """Wrapper for FHI-aims.""" +from __future__ import division import shutil import os import subprocess @@ -26,19 +27,11 @@ class AimsObject(): """Create and handle FHI-aims objects.""" - def __init__(self, parameter_file): - """Get the directory containing the control.in file. - Args: - parameter_file (str): name of the parameter file - Raises: - KeyError: if the source directory is not defined + def __init__(self, sourcedir): + """Initialize the object. The sourcedir is the directory with the + 'control.in' file """ - with open(parameter_file) as fin: - parameter_dict = dict(line.strip().partition(' ')[::2] for line in fin) - fin.close() - if 'sourcedir' not in parameter_dict: - raise KeyError("The source directory is not defined.") - self.sourcedir = parameter_dict['sourcedir'] + self.sourcedir = sourcedir def generate_input(self, sdf_string): """Create input files for FHI-aims. @@ -78,8 +71,9 @@ def run_aims(self, execution_string): OSError: if geometry.in or control.in not present in the working directory """ + success = False for filename in ['control.in', 'geometry.in']: - if os.path.exists(filename) == False: + if os.path.exists(filename) is False: raise OSError("Required input file not present.") aims = subprocess.Popen( execution_string, stdout=subprocess.PIPE, shell=True) @@ -87,39 +81,42 @@ def run_aims(self, execution_string): ['cat'], stdin=aims.stdout, stdout=open('result.out', 'w'), shell=True) out.wait() + s0 = "Present geometry is converged" s = "Total energy of the DFT / Hartree-Fock s.c.f. calculation :" - s2 = "*** scf_solver: SCF cycle not converged." - s3 = "Final atomic structure:" + s2 = "Final atomic structure:" + not_conv = True searchfile = open("result.out", "r") for line in searchfile: - if s in line: - a = line.split(" ") - self.energy = float('{0:.4f}'.format(float(a[-2]))) - if s2 in line: - killfile = open("kill.dat", "w") - killfile.close() - searchfile.close() - searchfile = open("result.out", "r") - for i, line in enumerate(searchfile, 1): - if s3 in line: - l_num = int(i) + if s0 in line: + not_conv = False searchfile.close() - atoms = len(self.aims_string.splitlines()) - with open('geometry.out', 'w') as file_geo: - try: + if not_conv: + killfile = open("kill.dat", "w") + killfile.close() + + else: + searchfile = open("result.out", "r") + for i, line in enumerate(searchfile, 1): + if s in line: + a = line.split(" ") + self.energy = float('{0:.4f}'.format(float(a[-2]))) + if s2 in line: + l_num = int(i) + searchfile.close() + atoms = len(self.aims_string.splitlines()) + with open('geometry.out', 'w') as file_geo: with open('result.out') as f: for line in itertools.islice(f, l_num+1, l_num+1+atoms): file_geo.write(line) - except IOError or NameError or UnboundLocalError: - with open('geometry.in') as f: - for line in f: - file_geo.write(line) - file_geo.close() - f.close() - with open('geometry.out', 'r') as f: - self.aims_string_opt = f.read() - f.close() + file_geo.close() + f.close() + with open('geometry.out', 'r') as f: + self.aims_string_opt = f.read() + f.close() + success = True + + return success def get_energy(self): """Get the energy of the molecule. @@ -151,10 +148,13 @@ def clean_and_store(self): """Store the output and clean the working direction after the FHI-aims calculation has been completed. """ - os.remove('geometry.in') - os.remove('control.in') - shutil.copy('result.out', self.dirname) - os.remove('result.out') - remover_file('geometry.in.next_step') - shutil.copy('geometry.out', self.dirname) - os.remove('geometry.out') + try: + os.remove('geometry.in') + os.remove('control.in') + shutil.copy('result.out', self.dirname) + os.remove('result.out') + remover_file('geometry.in.next_step') + shutil.copy('geometry.out', self.dirname) + os.remove('geometry.out') + except IOError: + pass diff --git a/fafoom/pyff.py b/fafoom/pyff.py index 628d888..2672759 100644 --- a/fafoom/pyff.py +++ b/fafoom/pyff.py @@ -16,42 +16,41 @@ # along with fafoom. If not, see . '''Wrapper for RDKit force-field routines''' - +from __future__ import division +import os from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import ChemicalForceFields +kcalmol2eV = 0.0433641 + class FFObject(): """Create and handle force-field objects.""" - def __init__(self, parameter_file): - """Get the parameters from file. + def __init__(self, force_field, steps=1000, force_tol=1.0e-4, + energy_tol=1.0e-6): + """Initialize the FFObject. + Args(required): - parameter_file (str): name of the parameter file + force_field (str): name of the force_field to use + + Args(optional): + (for the minimization method used for optimization) + + steps (default=1000) + force_tol (default=1e-4) + energy_tol (default=1e-6) + Raises: - KeyError: if the force field in not defined ValueError: if the force field is not 'uff' or 'mmff94' - Optional parameters for the minimization method used for optimization - that can be defined in the file: - steps (default=1000) - force_tol (default=1e-4) - energy_tol (default=1e-6) + """ - with open(parameter_file) as fin: - parameter_dict = dict(line.strip().partition(' ')[::2] for line in fin) - fin.close() - if 'force_field' not in parameter_dict: - raise KeyError("The force field is not defined.") - self.force_field = parameter_dict['force_field'] + self.force_field = force_field if self.force_field not in ['uff', 'mmff94']: raise ValueError("Unknown force field.") - s = [('steps', 1000), ('force_tol', 1.0e-4), ('energy_tol', 1.0e-6)] - for k, v in s: - if k not in parameter_dict: - parameter_dict[k] = v - self.steps = parameter_dict['steps'] - self.force_tol = parameter_dict['force_tol'] - self.energy_tol = parameter_dict['energy_tol'] + self.steps = steps + self.force_tol = force_tol + self.energy_tol = energy_tol def run_ff(self, sdf_string): """Perform the force field minimization. @@ -65,8 +64,8 @@ def run_ff(self, sdf_string): ff = ChemicalForceFields.MMFFGetMoleculeForceField(mol, molprop) elif self.force_field == 'uff': ff = AllChem.UFFGetMoleculeForceField(mol) - ff.Minimize(int(self.steps), - float(self.force_tol), float(self.energy_tol)) + ff.Minimize(int(self.steps), float(self.force_tol), + float(self.energy_tol)) self.sdf_string_opt = Chem.MolToMolBlock(mol) self.energy = float('{0:.4f}'.format(ff.CalcEnergy())) @@ -81,7 +80,7 @@ def get_energy(self): if not hasattr(self, 'energy'): raise AttributeError("The calculation wasn't performed yet.") else: - return self.energy + return kcalmol2eV*self.energy def get_sdf_string_opt(self): """Get the optimized sdf string. @@ -95,3 +94,14 @@ def get_sdf_string_opt(self): raise AttributeError("The calculation wasn't performed yet.") else: return self.sdf_string_opt + + def save_to_file(self): + if os.path.isfile("optimized_structures.sdf"): + f = open("optimized_structures.sdf", "a") + else: + f = open("optimized_structures.sdf", "w") + f.write(str(self.sdf_string_opt)+'\n') + f.write("> "+'\n') + f.write(str(self.energy)+'\n\n') + f.write("$$$$"+'\n') + f.close() diff --git a/fafoom/pynwchem.py b/fafoom/pynwchem.py index 2083ec4..3f49ae3 100644 --- a/fafoom/pynwchem.py +++ b/fafoom/pynwchem.py @@ -16,7 +16,7 @@ # along with fafoom. If not, see . '''Wrapper for NWCHEM''' - +from __future__ import division import glob import os import subprocess @@ -24,25 +24,22 @@ from utilities import sdf2xyz prefix_name = 'geo' +hartree2eV = 27.21138602 class NWChemObject(): '''Create and handle NWCHEM objects ''' - def __init__(self, parameter_file): - """Get the parameters from file. + def __init__(self, functional, basis_set): + """Initialize the NWChemObject. + Args(required): - parameter_file (str): name of the parameter file + functional + basis_set Raises: KeyError: if the functional or basis set is not defined """ - with open(parameter_file) as fin: - parameter_dict = dict(line.strip().partition(' ')[::2] for line in fin) - fin.close() - for key in ['functional', 'basis_set']: - if key not in parameter_dict: - raise KeyError("The functional or basis_set is not defined.") - self.functional = parameter_dict['functional'] - self.basis_set = parameter_dict['basis_set'] + self.functional = functional + self.basis_set = basis_set def generate_input(self, sdf_string): """Create input files for NWChem. @@ -76,7 +73,7 @@ def run_nwchem(self, execution_string): Raises: OSError: if nwchem_molecule.nw not present in the working directory """ - if os.path.exists('nwchem_molecule.nw') == False: + if os.path.exists('nwchem_molecule.nw') is False: raise OSError("Required input file not present.") nwchem = subprocess.Popen( execution_string+str(" nwchem_molecule.nw"), @@ -110,7 +107,7 @@ def get_energy(self): if not hasattr(self, 'energy'): raise AttributeError("The calculation wasn't performed yet.") else: - return self.energy + return hartree2eV*self.energy def get_xyz_string_opt(self): """Get the optimized xyz string. diff --git a/fafoom/pyorca.py b/fafoom/pyorca.py new file mode 100644 index 0000000..18c7d18 --- /dev/null +++ b/fafoom/pyorca.py @@ -0,0 +1,155 @@ +# Copyright 2015 Adriana Supady +# +# This file is part of fafoom. +# +# Fafoom is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Fafoom is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with fafoom. If not, see . + +'''Wrapper for ORCA''' +from __future__ import division +import glob +import os +import subprocess + +from utilities import sdf2xyz + +hartree2eV = 27.21138602 + + +class OrcaObject(): + '''Create and handle ORCA objects.''' + def __init__(self, commandline, memory, chargemult="0 1", nprocs=1, + optsteps=500): + """Initialize the OrcaObject. + + Args(required): + commandline + memory + Args(optional): + chargemult (default="0 1") + nprocs (default=1) + optsteps (default=500) + Raises: + KeyError: if the commandline or memory is not defined + """ + self.commandline = commandline + self.memory = memory + self.chargemult = chargemult + self.nprocs = nprocs + self.optsteps = optsteps + + def generate_input(self, sdf_string): + """Create input files for ORCA. + Args: + sdf_string (str) + """ + xyz_string = sdf2xyz(sdf_string) + coord = xyz_string.split('\n') + string1 = '! '+str(self.commandline)+'\n' + string2 = '! xyzfile\n' + string3 = '%geom MaxIter '+str(self.optsteps)+' end\n' + string4 = '%pal nprocs '+str(self.nprocs)+" end\n" + string5 = '%maxcore '+str(self.memory)+"\n" + string6 = '* xyz '+str(self.chargemult)+'\n' + with open('orca_molecule.inp', 'w') as f: + f.write(string1) + f.write(string2) + f.write(string3) + f.write(string4) + f.write(string5) + f.write(string6) + f.write('\n'.join(coord[2:])) + f.write('*\n') + f.close() + + def run_orca(self, execution_string): + """Run orca and write output to 'result.out'. The optimized + geometry is written to 'orca_molecule.xyz'. + + Warning: this function uses subprocessing to invoke the run. + The subprocess's shell is set to TRUE. + Args: + execution_string (str): e.g. ORCA or for parallel version + /the/complete/path/to/orca + Raises: + OSError: if orca_molecule.inp not present in the working directory + """ + success = False + if os.path.exists('orca_molecule.inp') is False: + raise OSError("Required input file not present.") + orca = subprocess.Popen( + execution_string+str(" orca_molecule.inp"), + stdout=subprocess.PIPE, shell=True) + out = subprocess.Popen( + ['cat'], stdin=orca.stdout, + stdout=open('result.out', 'w'), shell=True) + out.wait() + + searchfile = open("result.out", "r") + + s0 = "THE OPTIMIZATION HAS CONVERGED" + s = "FINAL SINGLE POINT ENERGY" + not_conv = True + for line in searchfile: + if s0 in line: + not_conv = False + searchfile.close() + if not_conv: + killfile = open("kill.dat", "w") + killfile.close() + else: + searchfile = open("result.out", "r") + for line in searchfile: + if s in line: + energy_tmp = float(line.split(" ")[-1].split('\n')[0]) + searchfile.close() + self.energy = energy_tmp + + with open('orca_molecule.xyz', 'r') as f: + self.xyz_string_opt = f.read() + f.close() + success = True + return success + + def get_energy(self): + """Get the energy of the molecule. + + Returns: + energy (float) in eV + Raises: + AttributeError: if energy hasn't been calculated yet + """ + if not hasattr(self, 'energy'): + raise AttributeError("The calculation wasn't performed yet.") + else: + return hartree2eV*self.energy + + def get_xyz_string_opt(self): + """Get the optimized xyz string. + + Returns: + optimized xyz string (str) + Raises: + AttributeError: if the optimization hasn't been performed yet + """ + if not hasattr(self, 'xyz_string_opt'): + raise AttributeError("The calculation wasn't performed yet.") + else: + return self.xyz_string_opt + + def clean(self): + """Clean the working direction after the orca calculation has been + completed. + """ + for f in glob.glob("orca_molecule.*"): + os.remove(f) diff --git a/fafoom/run_utilities.py b/fafoom/run_utilities.py new file mode 100644 index 0000000..636bde5 --- /dev/null +++ b/fafoom/run_utilities.py @@ -0,0 +1,184 @@ +# Copyright 2015 Adriana Supady +# +# This file is part of fafoom. +# +# Fafoom is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Fafoom is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with fafoom. If not, see . +''' Collection of diverse run controlling utilites ''' +from __future__ import division +import glob +import sys + +from utilities import print_output, remover_file, remover_dir, backup + + +def simple_or_restart(): + """ Select the type of run. If the all backup files are present the run + will be restarted. Otherwise, the directory will be cleaned and a new run + will be started.""" + + for_restart = ["backup_population.dat", "backup_mol.dat", + "backup_min_energy.dat", "backup_iteration.dat", + "backup_blacklist.dat"] + opt = "restart" + for filename in for_restart: + if glob.glob(filename): + pass + else: + opt = "simple" + if opt == "simple": + print_output("Cleaning up the directory") + for d in glob.glob("initial_*"): + remover_dir(d) + for d in glob.glob("generation_*_child*"): + remover_dir(d) + remover_dir("blacklist") + for f in ["mol.sdf", "control.in", "geometry.in", "output.txt", + "result.out", "kill.dat"]: + remover_file(f) + for f in for_restart: + remover_file(f) + if opt == "restart": + remover_file("kill.dat") + return opt + + +def str_info(struct): + """ Prints the information about the structure to the output file""" + print_output(struct) + for dof in struct.dof: + print_output("Values of "+str(dof.type)+": " + str(dof.values)) + + +def relax_info(struct): + """ Prints the information about the structure to the output file after + the local optimization.""" + print_output(struct) + for dof in struct.dof: + print_output("Values of "+str(dof.type)+" before: " + + str(dof.initial_values)+" and after local opt: " + + str(dof.values)) + + +def check_for_kill(): + """ Check if the kill.dat file is present in the directory or in the + subdirectories. If discoveed the run will be aborted.""" + if len(glob.glob("*/kill.dat")) == 0 and len(glob.glob("kill.dat")) == 0: + pass + else: + print_output("Kill.dat file discovered. The code terminates") + sys.exit(0) + + +def detect_energy_function(params): + """ Detect the energy function that will be used for local optimization.""" + if 'energy_function' not in params: + print_output("The energy function hasn't been defined." + " The code terminates") + sys.exit(0) + else: + if params['energy_function'] in ['aims', 'FHI-aims', 'FHIaims']: + print_output("Local optimization will be performed with FHI-aims.") + energy_function = "aims" + elif params['energy_function'] in ['nwchem', 'NWChem']: + print_output("Local optimization will be performed with NWChem.") + energy_function = "nwchem" + elif params['energy_function'] in ['ORCA', 'Orca', 'orca']: + print_output("Local optimization will be performed with ORCA.") + energy_function = "orca" + elif params['energy_function'] in ['ff', 'force_field', 'RDKit', + 'rdkit']: + print_output("Local optimization will be performed with RDKit.") + energy_function = "ff" + else: + print_output("Unknown type of energy function." + " The code terminates.") + sys.exit(0) + return energy_function + + +def optimize(structure, energy_function, params, name=None): + """Perform local optimization.""" + if energy_function == "aims": + structure.perform_aims(params['sourcedir'], params['aims_call'], name) + elif energy_function == "nwchem": + structure.perform_nwchem(params['functional'], params['basis_set'], + params['nwchem_call']) + elif energy_function == "orca": + linked_params = {} + for key in ["chargemult", "nprocs", "optsteps"]: + if key in params: + linked_params[str(key)] = params[str(key)] + structure.perform_orca(params['commandline'], + params['memory'], + params['orca_call'], **linked_params) + elif energy_function == "ff": + linked_params = {} + for key in ["steps", "force_tol", "energy_tol"]: + if key in params: + linked_params[str(key)] = params[str(key)] + structure.perform_ff(params['force_field'], **linked_params) + + +def perform_backup(mol, population, blacklist, iteration, min_energy): + """Write object representation to files for a future restart.""" + backup("backup_mol.dat", mol) + backup("backup_population.dat", population) + backup("backup_blacklist.dat", blacklist) + backup("backup_iteration.dat", iteration) + backup("backup_min_energy.dat", min_energy) + + +def find_linked_params(mol, params): + """ Check for parameters assosciated with the degrees of freedom.""" + linked_params = {} + for dof_name in mol.dof_names: + if 'prob_for_mut_'+str(dof_name) in params: + linked_params['prob_for_mut_'+str(dof_name)] = \ + params['prob_for_mut_'+str(dof_name)] + if 'max_mutations_'+str(dof_name) in params: + linked_params['max_mutations_'+str(dof_name)] = \ + params['max_mutations_'+str(dof_name)] + return linked_params + + +def check_for_convergence(iteration, params, min_energy): + """Check the run for convergence""" + if iteration >= params['iter_limit_conv']-1: + print_output("Checking for convergence") + d = abs(min_energy[iteration+1]-min_energy[iteration + 1 - + params['iter_limit_conv']]) + if 'energy_wanted' in params: + if min_energy[-1] < params['energy_wanted'] or \ + d < params['energy_diff_conv']: + print_output("Converged") + killfile = open("kill.dat", "w") + killfile.close() + sys.exit(0) + else: + print_output("Not converged yet") + else: + if d < params['energy_diff_conv']: + print_output("Converged") + killfile = open("kill.dat", "w") + killfile.close() + sys.exit(0) + else: + print_output("Not converged yet") + if iteration == params['max_iter']-1: + print_output("Max. number of iterations reached. The code terminates") + killfile = open("kill.dat", "w") + killfile.close() + sys.exit(0) + else: + print_output("Next iteration will be perfomed") diff --git a/fafoom/structure.py b/fafoom/structure.py index 48c6ff1..41e5fa4 100644 --- a/fafoom/structure.py +++ b/fafoom/structure.py @@ -15,24 +15,34 @@ # You should have received a copy of the GNU Lesser General Public License # along with fafoom. If not, see . ''' Handle the molecule and its 3D structures.''' - -from rdkit import Chem -from rdkit.Chem import AllChem -import numpy as np -import os -import glob +from __future__ import division from copy import deepcopy - -from new_molecule_parametrize import parametrize, template_sdf -from angle import angle_measure, angle_set -from genetic_operations import crossover, mutation_tor, mutation_cistrans +from get_parameters import ( + create_dof_object, + get_atoms_and_bonds, + get_positions, + template_sdf +) +from genetic_operations import crossover from pyaims import AimsObject from pyff import FFObject from pynwchem import NWChemObject -from utilities import boolean, print_output, check_geo_sdf, get_vec, tor_rmsd,\ - string2file, aims2sdf, xyz2sdf, mirror_sdf -from random import randint, choice +from pyorca import OrcaObject + +from utilities import ( + aims2sdf, + check_geo_sdf, + file2dict, + lowest_cartesian, + mirror_sdf, + print_output, + set_default, + xyz2sdf + +) +import random +#from random import randint, choice class MoleculeDescription: @@ -40,32 +50,39 @@ class MoleculeDescription: newline = "NEWLINE" def __init__(self, parameter_file=None, **kwargs): - """Initialize the molecule. Get the parameters from file (if present) or - read keyword args. The keyword args overwrite the file values.""" + """Initialize the molecule. Get the parameters from file (if present) + or read keyword args. The keyword args overwrite the file values.""" + params = {} if parameter_file is not None: - with open(parameter_file) as fin: - parameter_dict = dict(line.strip().partition(' ')[::2] for line in fin) - self.smile = parameter_dict['smile'] - self.custom = parameter_dict['custom'] - self.rmsd_cutoff_uniq = float(parameter_dict['rmsd_cutoff_uniq']) - self.distance_cutoff_1 = float(parameter_dict['distance_cutoff_1']) - self.distance_cutoff_2 = float(parameter_dict['distance_cutoff_2']) - self.chiral = parameter_dict['chiral'] - self.rmsd_type = parameter_dict['rmsd_type'] - self.smart_torsion = parameter_dict['smart_torsion'] - self.smart_cistrans = parameter_dict['smart_cistrans'] - self.smart_custom = parameter_dict['smart_custom'] - fin.close() - for key in kwargs.keys(): - if hasattr(self, str(key)): - print_output("Overwriting the values for keyword "+str(key)) - print_output("Old value: "+str(getattr(self, str( - key)))+", new value: "+str(kwargs[key])) - if key not in ["template_sdf_string"]: - setattr(self, str(key), kwargs[key]) - else: - setattr(self, str(key), kwargs[key].replace( - MoleculeDescription.newline, "\n")) + params = file2dict(parameter_file, ['Molecule']) + new_names_dict = {'smile': 'smiles', + 'smart_torsion': 'smarts_torsion', + 'filter_smart_torsion': 'filter_smarts_torsion', + 'smart_cistrans': 'smarts_cistrans'} + for key in new_names_dict: + try: + params[new_names_dict[key]] = params.pop(key) + except KeyError: + pass + else: + for key in kwargs.keys(): + if key not in ["template_sdf_string"]: + params[str(key)] = kwargs[key] + else: + params[str(key)] = kwargs[key].replace( + MoleculeDescription.newline, "\n") + + dict_default = {'rmsd_type': "cartesian", 'distance_cutoff_1': 1.3, + 'distance_cutoff_2': 2.15, 'rmsd_cutoff_uniq': 0.2, + 'chiral': True, 'optimize_torsion': True, + 'smarts_torsion': + "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]"} + + params = set_default(params, dict_default) + + for key in params: + if not hasattr(self, str(key)): + setattr(self, str(key), params[key]) def __repr__(self): """Create an unambiguous object representation. The resulting string @@ -73,13 +90,14 @@ def __repr__(self): '\n' sign in the template sdf_string attribute.""" repr_list = [] for att_name in self.__dict__.keys(): - if att_name in ["smile", "rmsd_type", "smart_torsion", "custom", - "chiral", "smart_cistrans", "smart_custom"]: + + if type(self.__dict__[att_name]) in [str] and \ + att_name != "template_sdf_string": repr_list.append('%s="%s"' % (att_name, getattr(self, att_name))) - elif att_name in ["rmsd_cutoff_uniq", "distance_cutoff_1", - "distance_cutoff_2", "atoms", "bonds", - "torsion", "cistrans", "custom_torsion"]: + + elif type(self.__dict__[att_name]) in [int, float, bool, list]: + repr_list.append('%s=%s' % (att_name, repr(getattr(self, att_name)))) elif att_name in ["template_sdf_string"]: @@ -88,6 +106,7 @@ def __repr__(self): self, att_name).replace("\n", MoleculeDescription.newline,))) else: + print_output("Unknown type of attribute "+str(att_name)) return "%s(%s)" % (self.__class__.__name__, ', '.join(repr_list)) @@ -99,22 +118,38 @@ def __eq__(self, other): continue else: return False + for att_name in other.__dict__.keys(): + if getattr(other, att_name) == getattr(self, att_name): + continue + else: + return False return True - def get_mol_parameters(self): - """Assign new attributes (atoms, bonds, torsion, cistrans, - custom_torsion) to the object.""" - (self.atoms, self.bonds, - self.torsion, self.cistrans, - self.custom_torsion) = parametrize( - self.smile, self.custom, self.smart_torsion, - self.smart_cistrans, self.smart_custom) - if self.custom: - self.torsion = self.custom_torsion + def get_parameters(self): + """Assign permanent attributes (number of atoms, number of bonds and + degrees of freedom related attributes) to the object.""" + self.atoms, self.bonds = get_atoms_and_bonds(self.smiles) + self_copy = deepcopy(self) + dof_names = [] + for attr, value in self_copy.__dict__.iteritems(): + if str(attr).split('_')[0] == "optimize" and value: + type_of_dof = str(attr).split('_')[1] + linked_attr = {} + for attr, value in self_copy.__dict__.iteritems(): + if type_of_dof in str(attr).split('_'): + linked_attr[attr] = value + pos = get_positions(type_of_dof, self.smiles, **linked_attr) + if len(pos) != 0: + setattr(self, type_of_dof, pos) + dof_names.append(type_of_dof) + else: + print_output("The degree to optimize: "+str(type_of_dof) + + " hasn't been found.") + setattr(self, "dof_names", dof_names) def create_template_sdf(self): """Assign new attribute (template_sdf_string) to the object.""" - self.template_sdf_string = template_sdf(self.smile, + self.template_sdf_string = template_sdf(self.smiles, self.distance_cutoff_1, self.distance_cutoff_2) @@ -125,55 +160,90 @@ class Structure: newline = "NEWLINE" def __init__(self, arg=None, **kwargs): - """Initialize the 3D structure: (1) from MoleculeDescription class object - or from (2) from previously created object of the Structure class. Any - present and valid keyword args overwrite the old values. - Warning: the may be more attributes in the (2) case.""" + """Initialize the 3D structure: (1) from MoleculeDescription class + object or from (2) from previously created object of the Structure + class. Any present and valid keyword args overwrite the old values. + Warning: there may be more attributes in the (2) case.""" if isinstance(arg, MoleculeDescription): + self.mol_info = arg Structure.index += 1 self.index = Structure.index + dof = [] + for i in self.mol_info.dof_names: + new_obj = create_dof_object(str(i), getattr(self.mol_info, i)) + dof.append(new_obj) + setattr(self, "dof", dof) + elif isinstance(arg, Structure): + self.mol_info = arg.mol_info Structure.index += 1 self.index = Structure.index for att_name in arg.__dict__.keys(): - if att_name != "mol_info" and att_name != "index": + + if att_name not in ["mol_info", "index"]: setattr(self, str(att_name), deepcopy(getattr(arg, str(att_name)))) + else: print_output("Initialization can't be performed. Check the input") + for key in kwargs.keys(): if key != "index": if hasattr(self, str(key)): - print_output("Overwriting the values for keyword "+str(key)) - print_output("Old value: "+str(getattr(self, str( - key)))+", new value: "+str(kwargs[key])) - if key not in ["sdf_string", "initial_sdf_string"]: - setattr(self, str(key), kwargs[key]) - else: + print_output("Overwriting the value for keyword "+str(key)) + print_output("Old value: "+str(getattr(self, str(key))) + + ", new value: "+str(kwargs[key])) + if key in ["sdf_string", "initial_sdf_string"]: setattr(self, str(key), kwargs[key].replace(Structure.newline, "\n")) + elif key.split('_')[0] in self.mol_info.dof_names: + for dof in self.dof: + if key.split('_')[0] == dof.type: + if key.split('_')[1] == 'initial': + setattr(dof, 'initial_values', kwargs[key]) + if key.split('_')[1] == 'values': + setattr(dof, 'values', kwargs[key]) + else: + setattr(self, str(key), kwargs[key]) + def __repr__(self): """Create an unambiguous object representation. The resulting string is an one-liner with the newline parameter replacing the original '\n' sign in the sdf_string and initial_sdf_string attribute.""" repr_list = [] for att_name in self.__dict__.keys(): - if att_name in ["energy", "index", "values_cistrans", "values_tor", - "initial_values_cistrans", "initial_values_tor"]: - repr_list.append('%s=%s' % ( - att_name, repr(getattr(self, att_name)))) - elif att_name in ["sdf_string", "initial_sdf_string"]: + + if att_name in ["sdf_string", "initial_sdf_string"]: repr_list.append("%s='%s'" % ( att_name, getattr( self, att_name).replace("\n", Structure.newline))) - elif att_name in ["mol_info"]: - pass else: - print_output("Unknown type of attribute "+str(att_name)) + if type(self.__dict__[att_name]) in [str]: + repr_list.append('%s=%s' % ( + att_name, repr(getattr(self, att_name)))) + elif type(self.__dict__[att_name]) in [int, float, bool]: + repr_list.append('%s=%s' % ( + att_name, repr(getattr(self, att_name)))) + elif att_name == 'dof': + for dof in self.dof: + repr_list.append('%s_%s=%s' % ( + dof.type, "values", repr(dof.values))) + try: + repr_list.append('%s_%s=%s' % ( + dof.type, "initial_values", + repr(dof.initial_values))) + except: + pass + elif att_name == 'mol_info': + pass + + else: + print_output("Unknown type of attribute "+str(att_name)) + return "%s(mol, %s)" % (self.__class__.__name__, ', '.join(repr_list)) def __str__(self): @@ -184,57 +254,25 @@ def __float__(self): """Return the object energy.""" return float(self.energy) - def generate_random_structure(self, cistrans1=0, cistrans2=180): - """Generate a random structure and assign new attributes to the object - (sdf_string, values_cistrans, values_tor) - - Args(optional): - cistrans1 (int): first option for the cistrans bond value - cistrans2 (int): second option for the cistrans bond value - If the arguments are not specified, the cistrans bond will 0 or - 180 degrees. - """ - self.values_cistrans, self.values_tor = [], [] - for x in range(len(self.mol_info.cistrans)): - self.values_cistrans.append(float(choice([cistrans1, cistrans2]))) - for y in range(len(self.mol_info.torsion)): - self.values_tor.append(randint(0.0, 359.0)-179.0) - self.sdf_string = angle_set(self.mol_info.template_sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion, - self.values_cistrans, self.values_tor) - self.values_cistrans, self.values_tor = angle_measure( - self.sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion) - - def generate_structure_from_values(self, values_cis, values_tor): - """Generate structure from lists of valuse and assign new attributes - to the object (sdf_string, values_cistrans, values_tor). + def generate_structure(self, values={}): + """Generate a 3D structures. If no values are passed, a random + structure will be generated (weights, associated with the degrees of + freedom, will be taken into account).""" - Args: - values_cis (list): values for the cistrans bonds, put [] if no - cistrans bonds are present - values_tor (list): values for the torsions, put [] if no torsions - are present - Raises: - ValueError: if the length of the corresponding lists differ - """ - if len(values_cis) != len(self.mol_info.cistrans): - raise ValueError("No match between the number of values to assign \ -and the number of cistrans bonds.") - if len(values_tor) != len(self.mol_info.torsion): - raise ValueError("No match between the number of values to assign \ -and the number of torsions.") - self.values_cistrans, self.values_tor = values_cis, values_tor - self.sdf_string = angle_set(self.mol_info.template_sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion, - self.values_cistrans, self.values_tor) - self.values_cistrans, self.values_tor = angle_measure( - self.sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion) + new_string = deepcopy(self.mol_info.template_sdf_string) + for dof in self.dof: + if dof.type in values.keys(): + new_string = dof.apply_on_string(new_string, values[dof.type]) + else: + if hasattr(self.mol_info, "weights_"+str(dof.type)): + weights = getattr(self.mol_info, "weights_"+str(dof.type)) + dof.get_weighted_values(weights) + else: + dof.get_random_values() + new_string = dof.apply_on_string(new_string) + self.sdf_string = new_string + for dof in self.dof: + dof.update_values(self.sdf_string) def is_geometry_valid(self): """Return True if the geometry is valid.""" @@ -243,92 +281,57 @@ def is_geometry_valid(self): return check def __eq__(self, other): - """Calculate the rmsd for an object pair. + """Decide, if the objects are equal based on the rms values. Returns: True, if the objects are 'similar' Raises: ValueError: if the rmsd type differs between the objects - ValueErrof: if the rmsd type is unknown + ValueErrof: if the rmsd type is unknown (supported options are + 'cartesian' and 'internal_coord') """ + if self.mol_info.rmsd_type != other.mol_info.rmsd_type: raise ValueError("The type of the rmsd differ for the objects") - if self.mol_info.rmsd_type not in ['cartesian', 'torsional']: + if self.mol_info.rmsd_type not in ['cartesian', 'internal_coord']: raise ValueError("Unknown type of rmsd.") - if self.mol_info.rmsd_type == 'cartesian': - obj1, obj2 = self, other - if hasattr(self, "initial_sdf_string"): - obj1, obj2 = obj2, obj1 - if hasattr(obj1, "initial_sdf_string"): - raise Exception("Both structures are already relaxed") - ref = Chem.MolFromMolBlock(obj1.sdf_string, removeHs=False) - probe = Chem.MolFromMolBlock(obj2.sdf_string, removeHs=False) - bestrms = AllChem.GetBestRMS(ref, probe) + obj1, obj2 = self, other + if hasattr(self, "initial_sdf_string"): + obj1, obj2 = obj2, obj1 + if hasattr(obj1, "initial_sdf_string"): + raise Exception("Both structures are already relaxed.") + + if obj1.mol_info.rmsd_type == 'cartesian': + linked_strings = {} if hasattr(obj2, "initial_sdf_string"): - probe_ini = Chem.MolFromMolBlock( - obj2.initial_sdf_string, removeHs=False) - bestrms_ini = AllChem.GetBestRMS(ref, probe_ini) - if bestrms_ini < bestrms: - bestrms = bestrms_ini - if not boolean(obj1.mol_info.chiral): - obj2_mirror_string = mirror_sdf(obj2.sdf_string) - probe_mirror = Chem.MolFromMolBlock( - obj2_mirror_string, removeHs=False) - bestrms_mirror = AllChem.GetBestRMS(ref, probe_mirror) + n_str = str(obj2.initial_sdf_string) + linked_strings[n_str] = obj2.initial_sdf_string + + if not obj1.mol_info.chiral: + n_str = str(mirror_sdf(obj2.sdf_string)) + linked_strings[n_str] = mirror_sdf(obj2.sdf_string) if hasattr(obj2, "initial_sdf_string"): - obj2_mirror_string_ini = mirror_sdf( - obj2.initial_sdf_string) - probe_mirror_ini = Chem.MolFromMolBlock( - obj2_mirror_string_ini, removeHs=False) - bestrms_mirror_ini = AllChem.GetBestRMS( - ref, probe_mirror_ini) - if bestrms_mirror_ini < bestrms_mirror: - bestrms_mirror = bestrms_mirror_ini - if bestrms_mirror < bestrms: - bestrms = bestrms_mirror + n_str = str(mirror_sdf(obj2.initial_sdf_string)) + linked_strings[n_str] = mirror_sdf(obj2.initial_sdf_string) + + bestrms = lowest_cartesian(obj1.sdf_string, obj2.sdf_string, + **linked_strings) + if bestrms > obj1.mol_info.rmsd_cutoff_uniq: return False else: return True - if self.mol_info.rmsd_type == 'torsional': - obj1, obj2 = self, other - if hasattr(self, "initial_sdf_string"): - obj1, obj2 = obj2, obj1 - if hasattr(obj1, "initial_sdf_string"): - raise Exception("Both structures are already relaxed.") - rmsd1 = tor_rmsd(2, get_vec( - np.concatenate(( - obj1.values_cistrans, obj1.values_tor), axis=0), - np.concatenate(( - obj2.values_cistrans, obj2.values_tor), axis=0))) - rmsd = rmsd1 - if hasattr(obj2, "initial_sdf_string"): - rmsd2 = tor_rmsd(2, get_vec( - np.concatenate(( - obj1.values_cistrans, obj1.values_tor), axis=0), - np.concatenate(( - obj2.initial_values_cistrans, - obj2.initial_values_tor), axis=0))) - if rmsd2 < rmsd1: - rmsd = rmsd2 - if not boolean(obj1.mol_info.chiral): - rmsd_mirr1 = tor_rmsd(2, get_vec( - np.concatenate(( - obj1.values_cistrans, obj1.values_tor), axis=0), - -1*np.concatenate(( - obj2.values_cistrans, obj2.values_tor), axis=0))) - rmsd_mirr2 = tor_rmsd(2, get_vec( - np.concatenate(( - obj1.values_cistrans, obj1.values_tor), axis=0), - -1*np.concatenate(( - obj2.initial_values_cistrans, - obj2.initial_values_tor), axis=0))) - rmsd_mirr = min(rmsd_mirr1, rmsd_mirr2) - if rmsd_mirr < rmsd: - rmsd = rmsd_mirr - if rmsd > obj1.mol_info.rmsd_cutoff_uniq: + + if obj1.mol_info.rmsd_type == 'internal_coord': + all_bool = [] + for dof1, dof2 in zip(obj1.dof, obj2.dof): + all_bool.append(dof1.is_equal(dof2, + obj1.mol_info.rmsd_cutoff_uniq, + obj1.mol_info.chiral)) + + if False in all_bool: return False else: return True @@ -337,115 +340,144 @@ def __cmp__(self, other): """Compare two object basing on their energy values.""" return cmp(self.energy, other.energy) - def send_to_blacklist(self, address, array): - """Append the structure to dedicated array. Write it to file and store. + def send_to_blacklist(self, array): + """Append the structure to dedicated array. Args: - address: name of directory to store the structure (file); if the - directory is absent it will be created array: the array to append to Raise: NameError: if the array not defined """ - try: - os.mkdir(address) - except OSError: - pass - array.append(self) - cnt_black = len(glob.glob(address+"/*.sdf")) - string2file(self.sdf_string, - str(address)+"/black_"+str(cnt_black)+".sdf") - if not self.mol_info.chiral: - mirror_sdf_string = mirror_sdf(self.sdf_string) - cnt_black = len(glob.glob(address+"/*.sdf")) - string2file(mirror_sdf_string, - str(address)+"/black_"+str(cnt_black)+".sdf") + array.append(self) - def perform_aims(self, parameter_file, execution_string, dirname): + def perform_aims(self, sourcedir, execution_string, dirname): """Generate the FHI-aims input, run FHI-aims, store the output, assign - new attributes (initial_sdf_string, initial_values_cistrans, - initial_values_tor) and update attribute values (sdf_string, - values_cistrans, values_tor).""" + new attributes and update attribute values.""" - aims_object = AimsObject(parameter_file) + aims_object = AimsObject(sourcedir) aims_object.generate_input(self.sdf_string) aims_object.build_storage(dirname) - aims_object.run_aims(execution_string) - aims_object.clean_and_store() - self.energy = aims_object.get_energy() - self.initial_sdf_string = self.sdf_string - self.initial_values_cistrans = self.values_cistrans - self.initial_values_tor = self.values_tor - self.sdf_string = aims2sdf( - aims_object.get_aims_string_opt(), - self.mol_info.template_sdf_string) - self.values_cistrans, self.values_tor = angle_measure( - self.sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion) - - def perform_nwchem(self, parameter_file, execution_string): - """Generate the NWChem input, run NWChem, assign new attributes - (initial_sdf_string, initial_values_cistrans, initial_values_tor) and - update attribute values (sdf_string, values_cistrans, values_tor).""" - nwchem_object = NWChemObject(parameter_file) + success = aims_object.run_aims(execution_string) + if success: + aims_object.clean_and_store() + self.energy = aims_object.get_energy() + self.initial_sdf_string = self.sdf_string + self.sdf_string = aims2sdf(aims_object.get_aims_string_opt(), + self.mol_info.template_sdf_string) + + for dof in self.dof: + setattr(dof, "initial_values", dof.values) + dof.update_values(self.sdf_string) + else: + print_output("The FHI-aims relaxation failed") + + def perform_nwchem(self, functional, basis_set, execution_string): + """Generate the NWChem input, run NWChem, assign new attributes and + update attribute values.""" + nwchem_object = NWChemObject(functional, basis_set) nwchem_object.clean() nwchem_object.generate_input(self.sdf_string) nwchem_object.run_nwchem(execution_string) nwchem_object.clean() self.energy = nwchem_object.get_energy() self.initial_sdf_string = self.sdf_string - self.initial_values_cistrans = self.values_cistrans - self.initial_values_tor = self.values_tor - self.sdf_string = xyz2sdf( - nwchem_object.get_xyz_string_opt(), - self.mol_info.template_sdf_string) - self.values_cistrans, self.values_tor = angle_measure( - self.sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion) - - def perform_ff(self, parameter_file): + self.sdf_string = xyz2sdf(nwchem_object.get_xyz_string_opt(), + self.mol_info.template_sdf_string) + + for dof in self.dof: + setattr(dof, "initial_values", dof.values) + dof.update_values(self.sdf_string) + + def perform_orca(self, commandline, memory, execution_string, **kwargs): + """Generate the orca input, run orca, assign new attributes and + update attribute values.""" + orca_object = OrcaObject(commandline, memory, **kwargs) + orca_object.clean() + orca_object.generate_input(self.sdf_string) + orca_object.run_orca(execution_string) + orca_object.clean() + self.energy = orca_object.get_energy() + self.initial_sdf_string = self.sdf_string + self.sdf_string = xyz2sdf(orca_object.get_xyz_string_opt(), + self.mol_info.template_sdf_string) + + for dof in self.dof: + setattr(dof, "initial_values", dof.values) + dof.update_values(self.sdf_string) + + def perform_ff(self, force_field, **kwargs): """Generate the force-field input, run force=field calculation, assign - new attributes (initial_sdf_string, initial_values_cistrans, - initial_values_tor) and update attribute values (sdf_string, - values_cistrans, values_tor).""" - ff_object = FFObject(parameter_file) + new attributes and update attribute values.""" + ff_object = FFObject(force_field, **kwargs) ff_object.run_ff(self.sdf_string) self.energy = ff_object.get_energy() self.initial_sdf_string = self.sdf_string - self.initial_values_cistrans = self.values_cistrans - self.initial_values_tor = self.values_tor self.sdf_string = ff_object.get_sdf_string_opt() - self.values_cistrans, self.values_tor = angle_measure( - self.sdf_string, - self.mol_info.cistrans, - self.mol_info.torsion) + ff_object.save_to_file() + for dof in self.dof: + setattr(dof, "initial_values", dof.values) + dof.update_values(self.sdf_string) def crossover(self, other): """Perform the crossover.""" - start1, start2 = crossover(self.values_cistrans, - other.values_cistrans) - end1, end2 = crossover(self.values_tor, other.values_tor) + child1 = Structure(self.mol_info) - child1.generate_structure_from_values(start1, end1) child2 = Structure(self.mol_info) - child2.generate_structure_from_values(start2, end2) + + for dof_par1, dof_par2, dof_child1, dof_child2 in zip(self.dof, + other.dof, + child1.dof, + child2.dof): + if dof_par1.type == dof_par2.type: + a, b = crossover(getattr(dof_par1, "values"), + getattr(dof_par2, "values")) + setattr(dof_child1, "values", a) + setattr(dof_child2, "values", b) + + for child in child1, child2: + new_string = deepcopy(child.mol_info.template_sdf_string) + for dof in child.dof: + new_string = dof.apply_on_string(new_string, dof.values) + child.sdf_string = new_string + for dof in child.dof: + dof.update_values(child.sdf_string) + return child1, child2 - def mutation_tor(self, max_mutations_torsions): - """Mutate the torsion values list.""" - dt = len(self.mol_info.torsion) - if dt > 0: - mut_list = mutation_tor(self.values_tor, - max_mutations_torsions) - self.generate_structure_from_values(self.values_cistrans, mut_list) - - def mutation_cistrans(self, max_mutations_cistrans): - """Mutate the cistrans bonds list.""" - dc = len(self.mol_info.cistrans) - if dc > 0: - mut_list = mutation_cistrans(self.values_cistrans, - max_mutations_cistrans) - self.generate_structure_from_values(mut_list, self.values_tor) + def mutate(self, **kwargs): + + def call_mut(dof, max_mutations=None, weights=None): + print_output("Performing mutation for: "+str(dof.type)) + if max_mutations is not None: + if hasattr(self.mol_info, "weights_"+str(dof.type)): + weights = getattr(self.mol_info, "weights_"+str(dof.type)) + dof.mutate_values(max_mutations, weights) + else: + dof.mutate_values(max_mutations=max_mutations) + else: + if hasattr(self.mol_info, "weights_"+str(dof.type)): + weights = getattr(self.mol_info, "weights_"+str(dof.type)) + dof.mutate_values(weights=weights) + else: + dof.mutate_values() + + for dof in self.dof: + if 'prob_for_mut_'+str(dof.type) in kwargs: + if random.random() < kwargs['prob_for_mut_'+str(dof.type)]: + if 'max_mutations_'+str(dof.type) in kwargs: + call_mut(dof, kwargs['max_mutations_'+str(dof.type)]) + else: + call_mut(dof) + else: + if 'max_mutations_'+str(dof.type) in kwargs: + call_mut(dof, kwargs['max_mutations_'+str(dof.type)]) + else: + call_mut(dof) + + new_string = deepcopy(self.sdf_string) + for dof in self.dof: + new_string = dof.apply_on_string(new_string, dof.values) + self.sdf_string = new_string + for dof in self.dof: + dof.update_values(self.sdf_string) diff --git a/fafoom/utilities.py b/fafoom/utilities.py index 8ce55fa..8d8eea5 100644 --- a/fafoom/utilities.py +++ b/fafoom/utilities.py @@ -15,10 +15,17 @@ # You should have received a copy of the GNU Lesser General Public License # along with fafoom. If not, see . ''' Collection of diverse help/convert functions ''' +from __future__ import division import os import numpy as np import math import shutil +import ConfigParser + +from rdkit import Chem +from rdkit.Chem import AllChem + +from operator import itemgetter # Flow-handling @@ -43,8 +50,16 @@ def boolean(string): raise ValueError("Cannot be converted to a boolean type") +def number(s): + """Convert to integer of float if needed""" + try: + return int(s) + except ValueError: + return float(s) + + def print_output(text): - """Write text to the 'output.txt'. Create if not existing.""" + """Write text to the 'output.txt'. Create it if needed.""" if os.path.isfile("output.txt"): f = open("output.txt", "a") f.write(str(text)+'\n') @@ -72,7 +87,7 @@ def remover_dir(instance): def file2string(input_file): - """Read file to a string and return it.""" + """Read a file to a string and return it.""" with open(input_file, 'r') as f: string = f.read() f.close() @@ -80,19 +95,60 @@ def file2string(input_file): def string2file(string, filename): - """Write string to a file""" + """Write a string to a file""" with open(filename, 'w') as target: target.write(string) target.close() + +def set_default(params, dict_default): + """Set defaults for missing keys and add the key:value pairs to the + dict.""" + for key in dict_default: + if key not in params: + print_output("Setting a default value for "+str(key)+": " + + str(dict_default[key])) + params[str(key)] = dict_default[key] + return params + + +def file2dict(filename, sections): + """Parse a file and create a dictionary""" + config = ConfigParser.RawConfigParser() + config.read(filename) + new_dict = {} + for section in sections: + if config.has_section(section): + for key, value in config.items(section): + new_dict[str(key)] = eval(value) + return new_dict + # Help vector/matrix functions +def ig(x): + return itemgetter(x) + + +def cleaner(list_to_clean): + """ Remove duplicate torsion definion from a list of atom ind. tuples.""" + for_remove = [] + for x in reversed(range(len(list_to_clean))): + for y in reversed(range(x)): + ix1, ix2 = ig(1)(list_to_clean[x]), ig(2)(list_to_clean[x]) + iy1, iy2 = ig(1)(list_to_clean[y]), ig(2)(list_to_clean[y]) + if (ix1 == iy1 and ix2 == iy2) or (ix1 == iy2 and ix2 == iy1): + for_remove.append(y) + clean_list = [v for i, v in enumerate(list_to_clean) + if i not in set(for_remove)] + return clean_list + + def get_vec(vec1, vec2): - """Calculate difference between vectors of angles. + """Calculate difference between vectors of angles [in rad!]. Args: - vec1 (list) - vec2 (list) + vec1 (list) in deg + vec2 (list) in deg Returns: numpy array Raises: @@ -101,15 +157,15 @@ def get_vec(vec1, vec2): Warning1: the vectors contain periodic values, i.e -185 -> 175 Warning2: symmetry is not included here, but can be easily added if the - index of 'symmetric' torsion is known + index of the 'symmetric' torsion is known """ if len(vec1) != len(vec2): raise ValueError("No length match between the lists") - diff_vec = np.zeros((1, len(vec1))) - for i in range(1, len(vec1)+1): - tor_diff = abs(vec1[i-1]-vec2[i-1]) - diff_vec[0][i-1] = min(abs(tor_diff), abs(360-tor_diff))/180.0 - return diff_vec[0] + diff_vec = np.zeros(len(vec1)) + for i in range(0, len(vec1)): + tor_diff = abs(vec1[i]-vec2[i]) + diff_vec[i] = min(abs(tor_diff), abs(360-tor_diff))/180.0 + return diff_vec def tor_rmsd(p, vec): @@ -121,32 +177,89 @@ def tor_rmsd(p, vec): return math.pow(summe/len(vec), (1.0/p)) -def find_two_in_list(list_sum, list_to_search): - """A list is mapped to a segment of a line which length is equal to 1. - The lengths of the segments are proportional to the corresponding list - values. Next, two random numbers between 0 and 1 are generated and the - segments containing these random numbers are returned.""" +def get_cartesian_rms(sdf_string1, sdf_string2): + """Return the optimal RMS after aligning two structures.""" + ref = Chem.MolFromMolBlock(sdf_string1, removeHs=False) + probe = Chem.MolFromMolBlock(sdf_string2, removeHs=False) + rms = AllChem.GetBestRMS(ref, probe) + return rms + + +def lowest_cartesian(string1, string2, **linked_strings): + """Select lowest Cartesian RMS for two structures (for nonchiral and + previously optimized structures).""" + values = [] + get_cartesian_rms(string1, string2) + values.append(get_cartesian_rms(string1, string2)) + if linked_strings: + for string in linked_strings: + values.append(get_cartesian_rms(string1, string)) + + return min(values) + + +def find_one_in_list(sum_array, list_to_search): + """Generate a random number and return the corresponding index from a + list. See the description of the method find_two_in_list.""" + nparray_to_search = np.array(list_to_search) + rn = sum_array*np.random.rand() + found = False + index = 0 + while not found: + if rn <= nparray_to_search[:index+1].sum(axis=0): + found = True + else: + index += 1 + return index + + +def find_two_in_list(list_sum, nparray_to_search): + """A numpy array is mapped to a segment of a line which length is equal to + 1. The lengths of the segments are proportional to the corresponding numpy + array values. Next, two random numbers between 0 and 1 are generated and + the segments containing these random numbers are returned.""" rn1 = list_sum*np.random.rand() found1 = False - index = 1 + index1 = 0 while not found1: - if rn1 < list_to_search[:index].sum(axis=0): - found1 = index + if rn1 < nparray_to_search[:index1+1].sum(axis=0): + found1 = True else: - index += 1 + index1 += 1 equal = True while equal: rn2 = list_sum*np.random.rand() found2 = False - index = 1 + index2 = 0 while not found2: - if rn2 < list_to_search[:index].sum(axis=0): - found2 = index + if rn2 < nparray_to_search[:index2+1].sum(axis=0): + found2 = True else: - index += 1 - if found2 != found1: + index2 += 1 + if index2 != index1: equal = False - return found1, found2 + return index1, index2 + + +def find_closest(numb, list_of_values, periodic=False): + """For a given number, return the closest value(s) from a given list""" + all_dist = [] + for value in list_of_values: + if periodic: + all_dist.append(min(abs(numb-value), (360-abs(numb-value)))) + else: + all_dist.append(abs(numb-value)) + m = min(all_dist) + closest_ind = [i for i, j in enumerate(all_dist) if j == m] + closest = [] + for ind in closest_ind: + closest.append(list_of_values[ind]) + return closest + + +def distance(x, y): + """"Calculate distance between two points in 3D.""" + return np.sqrt((x[0]-y[0])**2+(x[1]-y[1])**2+(x[2]-y[2])**2) def check_geo_sdf(sdf_string, cutoff1, cutoff2): @@ -161,73 +274,76 @@ def check_geo_sdf(sdf_string, cutoff1, cutoff2): Raises: ValueError: if distance cutoffs are non-positive """ + if cutoff1 <= 0 or cutoff2 <= 0: raise ValueError("Distance cutoff needs to be a positive float") - - def distance(x, y): - """"Calculate distance between two points in 3D.""" - return np.sqrt((x[0]-y[0])**2+(x[1]-y[1])**2+(x[2]-y[2])**2) - - atoms = int(sdf_string.split('\n')[3].split()[0]) - bonds = int(sdf_string.split('\n')[3].split()[1]) + atoms, bonds = get_ind_from_sdfline(sdf_string.split('\n')[3]) coordinates = np.zeros((atoms, 3)) - bonds_pairs = np.zeros((bonds, 2)) + bonds_list = [] for i in range(4, atoms+4): - coordinates[i-4][0] = sdf_string.split('\n')[i].split()[0] - coordinates[i-4][1] = sdf_string.split('\n')[i].split()[1] - coordinates[i-4][2] = sdf_string.split('\n')[i].split()[2] + coordinates[i-4][0:3] = sdf_string.split('\n')[i].split()[0:3] for i in range(atoms+4, atoms+bonds+4): - bonds_pairs[i-atoms-4][0] = sdf_string.split('\n')[i].split()[0] - bonds_pairs[i-atoms-4][1] = sdf_string.split('\n')[i].split()[1] - + e1, e2 = get_ind_from_sdfline(sdf_string.split('\n')[i]) + bonds_list.append([e1, e2]) dist = np.zeros((atoms, atoms)) for x in range(atoms): - for y in xrange(x, atoms): - a = np.array(coordinates[int(x)]) - b = np.array(coordinates[int(y)]) - dist[x][y] = distance(a, b) + for y in range(x, atoms): + dist[x][y] = distance(np.array(coordinates[x]), + np.array(coordinates[y])) dist[y][x] = dist[x][y] - list_of_bonds = [] - for i in range(bonds): - list_of_bonds.append([bonds_pairs[i][0], bonds_pairs[i][1]]) - - def check_distance(): - for x in range(atoms): - for y in xrange(x+1, atoms): - if [x+1, y+1] not in list_of_bonds and \ - [y+1, x+1] not in list_of_bonds and dist[x][y] < cutoff1: + check = True + for x in range(atoms): + for y in range(x+1, atoms): + if [x+1, y+1] not in bonds_list and [y+1, x+1] not in bonds_list: + if dist[x][y] < cutoff1: check = False return check - elif ([x+1, y+1] in list_of_bonds and dist[x][y] > cutoff2) or\ - ([y+1, x+1] in list_of_bonds and dist[x][y] > cutoff2): + else: + if dist[x][y] > cutoff2: check = False return check - return True + return check - return check_distance() + +def get_ind_from_sdfline(sdf_line): + """Extract the indicies from the sdf string (for molecules with more than + 99 atoms)""" + l = len(sdf_line.split()[0]) + if l < 4: + ind1 = int(sdf_line.split()[0]) + ind2 = int(sdf_line.split()[1]) + else: + list_ind = list(sdf_line.split()[0]) + if len(list_ind) == 5: + ind1 = int(list_ind[0]+list_ind[1]) + ind2 = int(list_ind[2]+list_ind[3]+list_ind[4]) + if len(list_ind) == 6: + ind1 = int(list_ind[0]+list_ind[1]+list_ind[2]) + ind2 = int(list_ind[3]+list_ind[4]+list_ind[5]) + + return ind1, ind2 # Format conversions def sdf2aims(sdf_string): """Convert a sdf string to a aims string.""" - atoms = int(sdf_string.split('\n')[3].split()[0]) + atoms = get_ind_from_sdfline(sdf_string.split('\n')[3])[0] coord = [] for i in range(4, 4+atoms): x = float(sdf_string.split('\n')[i].split()[0]) y = float(sdf_string.split('\n')[i].split()[1]) z = float(sdf_string.split('\n')[i].split()[2]) name = sdf_string.split('\n')[i].split()[3] - coord.append('%s%10.4f%10.4f%10.4f%2s' % ('atom', x, y, z, name)) - if i != 3+atoms: - coord.append('\n') + coord.append('%s%10.4f%10.4f%10.4f%4s' % ('atom', x, y, z, name)) + coord.append('\n') aims_string = ''.join(coord) return aims_string def sdf2xyz(sdf_string): """Convert a sdf string to a xyz string.""" - atoms = int(sdf_string.split('\n')[3].split()[0]) + atoms = get_ind_from_sdfline(sdf_string.split('\n')[3])[0] coord = [str(atoms)+('\n')] for i in range(4, 4+atoms): x = float(sdf_string.split('\n')[i].split()[0]) @@ -235,12 +351,13 @@ def sdf2xyz(sdf_string): z = float(sdf_string.split('\n')[i].split()[2]) name = sdf_string.split('\n')[i].split()[3] coord.append('\n%2s%10.4f%10.4f%10.4f' % (name, x, y, z)) + coord.append('\n') xyz_string = ''.join(coord) return xyz_string def aims2sdf(aims_string, sdf_template_string): - """Convert a aims string to a sdf string. Template for the sdf string + """Convert a aims string to a sdf string. Template for the sdf string is required.""" atoms = len(aims_string.splitlines()) sdf_form = sdf_template_string.splitlines() @@ -253,27 +370,26 @@ def aims2sdf(aims_string, sdf_template_string): line[1] = aims_string.split()[5*cnt+2] line[2] = aims_string.split()[5*cnt+3] cnt += 1 - c.append('%10.4f%10.4f%10.4f%2s' % (float(line[0]), - float(line[1]), - float(line[2]), line[3])) + c.append('%10.4f%10.4f%10.4f%s%-2s' % (float(line[0]), + float(line[1]), + float(line[2]), str(' '), + line[3])) for j in xrange(4, len(line)): if j == 4: - c.append('%4d' % int(line[j])) + c.append('%3d' % int(line[j])) elif j == len(line)-1: c.append('%3d\n' % int(line[j])) else: c.append('%3d' % int(line[j])) else: - if i != len(sdf_form)-1: - c.append(''.join(sdf_form[i])+'\n') - else: - c.append(''.join(sdf_form[i])) + c.append(''.join(sdf_form[i])+'\n') + sdf_string = ''.join(c) return sdf_string def xyz2sdf(xyz_string, sdf_template_string): - """Convert a xyz string to a sdf string. Template for the sdf string + """Convert a xyz string to a sdf string. Template for the sdf string is required.""" arr = xyz_string.splitlines() atoms = int(arr[0].split()[0]) @@ -288,28 +404,27 @@ def xyz2sdf(xyz_string, sdf_template_string): line[1] = xyz_string_cut.split()[4*cnt+2] line[2] = xyz_string_cut.split()[4*cnt+3] cnt += 1 - c.append('%10.4f%10.4f%10.4f%2s' % (float(line[0]), - float(line[1]), - float(line[2]), line[3])) + c.append('%10.4f%10.4f%10.4f%s%-2s' % (float(line[0]), + float(line[1]), + float(line[2]), str(' '), + line[3])) for j in xrange(4, len(line)): if j == 4: - c.append('%4d' % int(line[j])) + c.append('%3d' % int(line[j])) elif j == len(line)-1: c.append('%3d\n' % int(line[j])) else: c.append('%3d' % int(line[j])) else: - if i != len(sdf_form)-1: - c.append(''.join(sdf_form[i])+'\n') - else: - c.append(''.join(sdf_form[i])) + c.append(''.join(sdf_form[i])+'\n') + sdf_string = ''.join(c) return sdf_string def mirror_sdf(sdf_string): """Mirror the geometry from a sdf string. Return a new sdf string.""" - atoms = int(sdf_string.split('\n')[3].split()[0]) + atoms = get_ind_from_sdfline(sdf_string.split('\n')[3])[0] sdf_form = sdf_string.splitlines() c = [] cnt = 0 @@ -320,20 +435,18 @@ def mirror_sdf(sdf_string): line[1] = -1.0*float(line[1]) line[2] = -1.0*float(line[2]) cnt += 1 - c.append('%10.4f%10.4f%10.4f%2s' % (float(line[0]), - float(line[1]), - float(line[2]), line[3])) + c.append('%10.4f%10.4f%10.4f%s%-2s' % (float(line[0]), + float(line[1]), + float(line[2]), str(' '), + line[3])) for j in xrange(4, len(line)): if j == 4: - c.append('%4d' % int(line[j])) + c.append('%3d' % int(line[j])) elif j == len(line)-1: c.append('%3d\n' % int(line[j])) else: c.append('%3d' % int(line[j])) else: - if i != len(sdf_form)-1: - c.append(''.join(sdf_form[i])+'\n') - else: - c.append(''.join(sdf_form[i])) + c.append(''.join(sdf_form[i])+'\n') mirror_sdf_string = ''.join(c) return mirror_sdf_string diff --git a/fafoom_manual.pdf b/fafoom_manual.pdf index fe2bda6..1149f90 100644 Binary files a/fafoom_manual.pdf and b/fafoom_manual.pdf differ