From 59058c6287a276a484f1ab3544c1f0805aababe0 Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Mon, 29 Oct 2018 14:20:28 -0400 Subject: [PATCH 01/10] Adds a script that can generate protonated and deprotonated forms of molecules from the GDB batabase. --- notebooks/GDB_charged_molecules.ipynb | 146 ++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 notebooks/GDB_charged_molecules.ipynb diff --git a/notebooks/GDB_charged_molecules.ipynb b/notebooks/GDB_charged_molecules.ipynb new file mode 100644 index 0000000..602b56a --- /dev/null +++ b/notebooks/GDB_charged_molecules.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from rdkit import Chem\n", + "\n", + "def IonizedMols(smi):\n", + " '''From a given SMILES, return all possible protonated/deprotonated molecules, considering N, O and S atoms'''\n", + " # Setting molecule\n", + " mol = Chem.MolFromSmiles(smi)\n", + " # Building lists of all N, O and S atoms\n", + " atoms_idx = [] # Atoms indexes\n", + " nfc = [] # New formal charge\n", + " nehs = [] # New number of explicit hydrogens\n", + " for atom in mol.GetAtoms():\n", + " atn = atom.GetAtomicNum()\n", + " if (atn == 7 or atn == 8 or atn == 16): # If N, O or S\n", + " skip = False\n", + " nhs = atom.GetTotalNumHs() # Number of hydrogen bonded to the atom\n", + " # If N with no protons, if N with two protons,\n", + " # if O with no protons, or if S with no protons: protonate\n", + " if ((atn == 7 and nhs == 0) or (atn == 7 and nhs == 2) or (atn == 8 and nhs == 0) or (atn == 16 and nhs == 0)):\n", + " nfc.append(atom.GetFormalCharge()+1)\n", + " nehs.append(atom.GetTotalNumHs()+1)\n", + " # If N with one proton, if N with three protons,\n", + " # if O with one proton, if S with one proton: deprotonate\n", + " elif ((atn == 7 and nhs == 1) or (atn == 7 and nhs == 3) or (atn == 8 and nhs == 1) or (atn == 16 and nhs == 1)):\n", + " nfc.append(atom.GetFormalCharge()-1)\n", + " nehs.append(max(0,atom.GetNumExplicitHs()-1))\n", + " # If none of the above conditions are true, skip this atom\n", + " else:\n", + " skip = True\n", + " if (not skip): # If not skipping\n", + " atoms_idx.append(atom.GetIdx())\n", + " # Combining all possible protonations/deprotonations\n", + " ntats=len(atoms_idx) # Number of titratable atoms\n", + " vec = [] # This is a reference array that will aid in preparing the new SMILES string\n", + " [vec.append(0) for cnt in range(ntats)] # Initializing the array\n", + " nmols = [] # New molecules\n", + " while (sum(vec) Date: Wed, 19 Dec 2018 14:48:08 -0500 Subject: [PATCH 02/10] Adds MD sampling to al_ions_cycle.py --- activelearning/al_ions_cycle.py | 10 +++---- lib/anialtools.py | 51 +++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py index 43406e8..21e72c2 100644 --- a/activelearning/al_ions_cycle.py +++ b/activelearning/al_ions_cycle.py @@ -92,7 +92,7 @@ 'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'], 'nmfile':None, #path to gaussian log file containing the data 'nm':0, #id of normal mode - 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. + 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. } dhparams = { 'Nmol': 250, @@ -109,9 +109,9 @@ #'mdselect' : [(10,0), (1,11)], #'N' : 20, 'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa) - 'T' : 400.0, # running temp + 'T' : 400.0, # running temp 'L' : 25.0, # box length - 'V' : 0.04, # Random init velocities + 'V' : 0.04, # Random init velocities 'dt' : 0.25, # MD time step 'Nm' : 100, # Molecules to embed #'Nm' : 160, # Molecules to embed @@ -178,11 +178,9 @@ #acs.run_sampling_cluster(gcmddict, GPU) #acs.run_sampling_dimer(dmrparams, GPU) acs.run_sampling_nms(nmsparams, GPU) - #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU) + acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU) #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU) #acs.run_sampling_TS(tsparams, gpus=GPU) ## Submit jobs, return and pack data ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50) - - diff --git a/lib/anialtools.py b/lib/anialtools.py index 980e6c3..0bc6775 100644 --- a/lib/anialtools.py +++ b/lib/anialtools.py @@ -75,7 +75,7 @@ def run_sampling_md(self, mdparams, perc=1.0, gpus=[0]): np.random.shuffle(md_work) md_work = md_work[0:int(perc*md_work.size)] md_work = np.array_split(md_work,len(gpus2)) - + proc = [] for i,(md,g) in enumerate(zip(md_work,gpus2)): proc.append(Process(target=self.mol_dyn_sampling, args=(md,i, @@ -246,6 +246,13 @@ def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): #print(di, ') Working on', id, '...') S = data["species"] + if "charge" in data and "multip" in data: + chg = data["charge"] + mlt = data["multip"] + else: + chg = "0" + mlt = "1" + # Set mols activ.setmol(data["coordinates"], S) @@ -261,7 +268,7 @@ def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): #print(nfo) if X.size > 0: - hdt.writexyzfile(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S) + hdt.writexyzfilewc(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S, ' '+chg+' '+mlt) difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n') print(Nmol) del activ @@ -280,7 +287,7 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid): Nm = dparam['Nm'] Ni = dparam['Ni'] Ns = dparam['Ns'] - + mols = [] difo = open(self.ldtdir + self.datdir + '/info_data_mddimer-'+str(tid)+'.nfo', 'w') for di,id in enumerate(dparam['mdselect']): @@ -290,26 +297,26 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid): dnfo = str(di) + ' of ' + str(len(dparam['mdselect'])) + ') dir: ' + str(self.idir[id[1]]) + ' Selecting: '+str(id[0]*len(files)) #print(dnfo) difo.write(dnfo+'\n') - + for i in range(id[0]): for n,m in enumerate(files): data = hdt.read_rcdb_coordsandnm(self.idir[id[1]]+m) if len(data['species']) < maxNa: mols.append(data) - dgen = pmf.dimergenerator(self.netdict['cnstfile'], - self.netdict['saefile'], - self.netdict['nnfprefix'], - self.netdict['num_nets'], + dgen = pmf.dimergenerator(self.netdict['cnstfile'], + self.netdict['saefile'], + self.netdict['nnfprefix'], + self.netdict['num_nets'], mols, gpuid) difo.write('Beginning dimer generation...\n') - + Nt = 0 Nd = 0 for i in range(Nr): dgen.init_dynamics(Nm, V, L, dt, T) - + for j in range(Ns): if j != 0: dgen.run_dynamics(Ni) @@ -322,7 +329,7 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid): Nt += dgen.Nt Nd += dgen.Nd - + #print('Step (',tid,',',i,') [', str(dgen.Nd), '/', str(dgen.Nt),'] generated ',len(dgen.frag_list), 'dimers...') #difo.write('Step ('+str(i)+') ['+ str(dgen.Nd)+ '/'+ str(dgen.Nt)+'] generated '+str(len(dgen.frag_list))+'dimers...\n') if max_sig > 3.0*sig: @@ -376,7 +383,7 @@ def run_sampling_TS(self, tsparams, gpus=[0], perc=1.0): np.random.shuffle(TS_infiles) TS_infiles = TS_infiles[0:int(perc*len(TS_infiles))] TS_infiles = np.array_split(TS_infiles,len(gpus2)) - + proc = [] for i,g in enumerate(gpus2): proc.append(Process(target=self.TS_sampling, args=(i, TS_infiles[i], tsparams, g))) @@ -481,7 +488,7 @@ def run_sampling_pDynTS(self, pdynparams, gpus=0): print('Finished pDynamo sampling.') - def pDyn_QMsampling(self, pdynparams, gpuid): + def pDyn_QMsampling(self, pdynparams, gpuid): #Call subproc_pDyn class in pyaniasetools as activ activ = aat.subproc_pDyn(self.netdict['cnstfile'], self.netdict['saefile'], @@ -500,7 +507,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid): indir=pdynparams['indir'] #path to save XYZ files of IRC points to check stddev XYZfile=pdynparams['XYZfile'] #XYZ file with high standard deviations structures l_val=pdynparams['l_val'] #Ri --> randomly perturb in the interval [+x,-x] - h_val=pdynparams['h_val'] + h_val=pdynparams['h_val'] n_points=pdynparams['n_points'] #Number of points along IRC (forward+backward+1 for TS) sig=pdynparams['sig'] N=pdynparams['N'] @@ -521,7 +528,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid): chk_TS = activ.subprocess_cmd(sbproc_cmdTS, False, logfile_TS) if chk_TS == 0: chk_IRC = activ.subprocess_cmd(sbproc_cmdIRC, False, logfile_IRC) - + # ----------------------- Save points along ANI IRC ------------------------ IRCfils=os.listdir(IRCdir) IRCfils.sort() @@ -530,16 +537,16 @@ def pDyn_QMsampling(self, pdynparams, gpuid): activ.getIRCpoints_toXYZ(n_points, IRCdir+f, f, indir) infils=os.listdir(indir) infils.sort() - + # ------ Check for high standard deviation structures and get vib modes ----- for f in infils: stdev = activ.check_stddev(indir+f, sig) if stdev > sig: #if stddev is high then get modes for that point nmc = activ.get_nm(indir+f) #save modes in memory for later use - + activ.write_nm_xyz(XYZfile) #writes all the structures with high standard deviations to xyz file - # ----------------------------- Read XYZ for NM ----------------------------- + # ----------------------------- Read XYZ for NM ----------------------------- X, spc, Na, C = hdt.readxyz3(XYZfile) # --------- NMS for XYZs with high stddev -------- @@ -553,7 +560,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid): gen_crd[k] = gen.get_random_structure() hdt.writexyzfile(self.cdir + 'nms_TS%i.xyz' %N, gen_crd, spc[i]) - + del activ @@ -895,11 +902,11 @@ def build_strided_training_cache(self,Nblocks,Nvalid,Ntest,build_test=True, buil S = data['species'] E = data['energies'] X = data['coordinates'] - + Esae = hdt.compute_sae(self.netdict['saefile'], S) - + dE.append((E-Esae)/np.sqrt(len(S))) - + dE = np.concatenate(dE) cidx = np.where(np.abs(dE) < 15.0) std = np.abs(dE[cidx]).std() From ddf495310999b1efcc8c1273d096698c07737d67 Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Wed, 19 Dec 2018 16:32:10 -0500 Subject: [PATCH 03/10] Input file modifications in al_ions_cycle.py --- activelearning/al_ions_cycle.py | 61 +++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py index 21e72c2..1bb1be7 100644 --- a/activelearning/al_ions_cycle.py +++ b/activelearning/al_ions_cycle.py @@ -58,10 +58,59 @@ root = '/home/jsmith48/scratch/auto_ion_al' wkdir = root+'/modelions/ANI-AL-smallions/' -iptfile = root+'/modelions/inputtrain.ipt' +#iptfile = root+'/modelions/inputtrain.ipt' saefile = root+'/modelions/sae_linfit.dat' cstfile = root+'/modelions/rHCNOSFCl-5.2R_16-3.8A_a4-8.params' +ipt = alt.anitrainerinputdesigner() + +ipt.set_parameter('atomEnergyFile','sae_linfit.dat') +ipt.set_parameter('sflparamsfile','rHCNOSFCl-5.2R_16-3.8A_a4-8.params') +ipt.set_parameter('eta','0.001') +ipt.set_parameter('energy','1') +ipt.set_parameter('force','0') +ipt.set_parameter('fmult','1.0') +ipt.set_parameter('feps','0.001') +ipt.set_parameter('dipole','0') +ipt.set_parameter('charge','0') +ipt.set_parameter('cdweight','2.0') +ipt.set_parameter('tolr','100') +ipt.set_parameter('tbtchsz','2560') +ipt.set_parameter('vbtchsz','2560') +ipt.set_parameter('nkde','2') + + +# Set network layers +ipt.add_layer('H',{"nodes":320,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('H',{"nodes":64 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('C',{"nodes":288,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('C',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('N',{"nodes":256,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('N',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('O',{"nodes":256,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('O',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('O',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('S',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('S',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('S',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('F',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('F',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('F',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('Cl',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('Cl',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('Cl',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.print_layer_parameters() + #-----------0--------- # Training varibles @@ -144,7 +193,7 @@ ### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ### #N = [20] -N = [1] +N = [2] for i in N: #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/' @@ -156,9 +205,9 @@ nnfprefix = netdir + 'train' - netdict = {'iptfile' : iptfile, - 'cnstfile' : cstfile, + netdict = {'cnstfile' : cstfile, 'saefile': saefile, + 'iptsize': 1008, 'nnfprefix': netdir+'train', 'aevsize': aevsize, 'num_nets': Nnets, @@ -166,8 +215,8 @@ } ## Train the ensemble ## - aln = att.alaniensembletrainer(netdir, netdict, h5stor, Nnets) - aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,False) + aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets) + aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energy',forces=False,dipole=False,rmhighe=True) aln.train_ensemble(GPU) if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)): From 647084c95cc2443ef0a2a013db92b79d507f01ab Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Wed, 12 Jun 2019 19:42:33 -0700 Subject: [PATCH 04/10] New sampling technique: structural sampling --- activelearning/al_1b-nn.py | 238 +++++++++++++++++++++++++++++++++++++ lib/anialtools.py | 99 ++++++++++++++- 2 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 activelearning/al_1b-nn.py diff --git a/activelearning/al_1b-nn.py b/activelearning/al_1b-nn.py new file mode 100644 index 0000000..5323166 --- /dev/null +++ b/activelearning/al_1b-nn.py @@ -0,0 +1,238 @@ +import anialservertools as ast +import anitraintools as att +import anialtools as alt +from time import sleep,time +import os +import sys + +#passfile = "/home/jsmith48/tkey.dat" + +hostname = "comet.sdsc.xsede.org" +#hostname = "bridges.psc.edu" +#hostname = "moria.chem.ufl.edu" +username = "cruzeiro" +#password = open(passfile,'r').read().strip() + +root_dir = '/server-home1/cruzeiro/auto_al/' + +swkdir = '/home/cruzeiro/scratch/auto_al_cycles/'# server working directory +datdir = 'ANI-AL-n2o5-1b-' + +h5stor = root_dir + 'h5files/'# h5store location + +strucsfolder = root_dir + 'strucs/'# strucs location + +optlfile = root_dir + 'input_files.dat' + +#Comet +mae = 'module load gnu/4.9.2\n' +\ + 'module load gaussian\n' +\ + 'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\ + 'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' + +# Bridges +#mae = 'module load gcc/5.3.0\n' +\ +# 'module load gaussian/09.D.01\n' +\ +# 'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\ +# 'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' +\ +# "export GAUSS_EXEDIR='/opt/packages/gaussian-RevD.01/g09/' \n" + +#fpatoms = ['C', 'N', 'O'] +fpatoms = ['N', 'O'] + +jtime = "0-12:00" + +#---- Training Parameters ---- +GPU = [1,2] # GPU IDs + +M = 0.06 # Max error per atom in kcal/mol +Nnets = 8 # networks in ensemble +Nblock = 16 # Number of blocks in split +Nbvald = 2 # number of valid blocks +Nbtest = 2 # number of test blocks +aevsize = 128 + +#wkdir = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/ANI-1x-RXN-0000/' +#iptfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/inputtrain.ipt' +#saefile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/sae_linfit.dat' +#cstfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/rHCNO-4.6R_16-3.1A_a4-8.params' + +root = '/server-home1/cruzeiro/auto_al' + +wkdir = root+'/models/ANI-AL-n2o5-1b/' +#iptfile = root+'/modelions/inputtrain.ipt' +saefile = root+'/models/ANI-AL-n2o5-1b/sae_linfit.dat' +cstfile = root+'/models/ANI-AL-n2o5-1b/rNO-5.2R_16-3.5A_a4-8.params' + +ipt = alt.anitrainerinputdesigner() + +ipt.set_parameter('atomEnergyFile','sae_linfit.dat') +ipt.set_parameter('sflparamsfile','rNO-5.2R_16-3.5A_a4-8.params') +ipt.set_parameter('eta','0.001') +ipt.set_parameter('energy','1') +ipt.set_parameter('force','0') +ipt.set_parameter('fmult','1.0') +ipt.set_parameter('feps','0.001') +ipt.set_parameter('dipole','0') +ipt.set_parameter('charge','0') +ipt.set_parameter('cdweight','2.0') +ipt.set_parameter('tolr','100') +ipt.set_parameter('tbtchsz','35') +ipt.set_parameter('vbtchsz','35') +ipt.set_parameter('nkde','2') + + +# Set network layers +#ipt.add_layer('H',{"nodes":160,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +#ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +#ipt.add_layer('H',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +#ipt.add_layer('C',{"nodes":144,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +#ipt.add_layer('C',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +#ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +#ipt.add_layer('N',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +#ipt.add_layer('N',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +#ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +#ipt.add_layer('O',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +#ipt.add_layer('O',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +#ipt.add_layer('O',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('N',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('N',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('N',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.add_layer('O',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) +ipt.add_layer('O',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) +ipt.add_layer('O',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) + +ipt.print_layer_parameters() + +#-----------0--------- + +# Training varibles + +#### Sampling parameters #### +nmsparams = {'T': 700.0, # Temperature + 'Ngen': 300, # Confs to generate + 'Nkep': 200, # Diverse confs to keep + 'maxd': 0.5, # Diverse confs to keep + 'sig' : M, + } + +strucsparams = {'N': 50, # number of maximum structures to select before QBC from each XYZ file + 'sig': M, + } + +mdsparams = {'N': 1, # trajectories to run + 'T1': 300, + 'T2': 1000, + 'dt': 0.5, + 'Nc': 3000, + 'Ns': 2, + 'sig': M, + } + +tsparams = {'T':200, # trajectories to run + 'n_samples' : 200, + 'n_steps': 10, + 'steps': 1500, + 'min_steps': 300, + 'sig' : M, + 'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'], + 'nmfile':None, #path to gaussian log file containing the data + 'nm':0, #id of normal mode + 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. + } + +dhparams = { 'Nmol': 250, + 'Nsamp': 2, + 'sig' : M, + 'rng' : 0.2, + 'MaxNa' : 25, + #'smilefile': '/home/jsmith48/scratch/auto_dhl_al/dhl_files/dhl_genentech.smi', + #'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi', + 'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi', + } + +dmrparams = {'mdselect' : [(4000,0),(1600,1),(400,2),(100,3),(25,4),(10,5),(1,6),(1,7),(1,8),(1,9),(1,10),(50,11),(1,12),(2,13),(2,14),], + #'mdselect' : [(10,0), (1,11)], + #'N' : 20, + 'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa) + 'T' : 400.0, # running temp + 'L' : 25.0, # box length + 'V' : 0.04, # Random init velocities + 'dt' : 0.25, # MD time step + 'Nm' : 100, # Molecules to embed + #'Nm' : 160, # Molecules to embed + 'Nr' : 15, # Number of total boxes to embed and test + 'Ni' : 10, # Number of steps to run the dynamics before fragmenting + 'Ns' : 10, + 'sig': M, + } + +solv_file = '/home/jsmith48/scratch/GDB-11-AL-wB97x631gd/gdb11_s01/inputs/gdb11_s01-2.ipt' +solu_dirs = '' + +gcmddict = {'edgepad': 0.8, # padding on the box edge + 'mindist': 1.6, # Minimum allow intermolecular distance + 'sig' : M, # sig hat for data selection + 'maxsig' : 3.0*M, # Max frag sig allowed to continue dynamics + 'Nr': 30, # Number of boxed to run + 'MolHigh': 910, #High number of molecules + 'MolLow': 820, #Low number of molecules + 'Ni': 50, #steps before checking frags + 'Ns': 100, + 'dt': 0.25, + 'V': 0.04, + 'L': 30.0, + 'T': 500.0, + 'Nembed' : 0, + 'solv_file' : solv_file, + 'solu_dirs' : solu_dirs, + } + +### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ### +#N = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40] +N = [41] + +for i in N: + #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/' + netdir = wkdir + 'ANI-AL-n2o5-1b-' + str(i).zfill(2) + '/' + ldtdir = root_dir # local data directories + + if not os.path.exists(netdir): + os.mkdir(netdir) + + nnfprefix = netdir + 'train' + + netdict = {'cnstfile' : cstfile, + 'saefile': saefile, + 'iptsize': 128, + 'nnfprefix': netdir+'train', + 'aevsize': aevsize, + 'num_nets': Nnets, + 'atomtyp' : ['N','O'] + } + + ## Train the ensemble ## + #aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets) + #aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energies',forces=False,dipole=False,rmhighe=True) + #aln.train_ensemble(GPU) + + if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)): + os.mkdir(root_dir + datdir + str(i+1).zfill(2)) + + ## Run active learning sampling ## + acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict) + #acs.run_sampling_cluster(gcmddict, GPU) + #acs.run_sampling_dimer(dmrparams, GPU) + #acs.run_sampling_nms(nmsparams, GPU) + acs.run_sampling_strucs(strucsparams, GPU) + #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU) + #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU) + #acs.run_sampling_TS(tsparams, gpus=GPU) + + ## Submit jobs, return and pack data + ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50) diff --git a/lib/anialtools.py b/lib/anialtools.py index 0bc6775..1e364be 100644 --- a/lib/anialtools.py +++ b/lib/anialtools.py @@ -26,7 +26,7 @@ class alconformationalsampler(): # Constructor - def __init__(self, ldtdir, datdir, optlfile, fpatoms, netdict): + def __init__(self, ldtdir, datdir, optlfile, strucsfolder, fpatoms, netdict): self.ldtdir = ldtdir # local working dir self.datdir = datdir # working data dir self.cdir = ldtdir+datdir+'/confs/' # confs store dir (the data gen code looks here for conformations to run QM on) @@ -35,6 +35,8 @@ def __init__(self, ldtdir, datdir, optlfile, fpatoms, netdict): self.optlfile = optlfile # Optimized molecules store path file + self.strucsfolder = strucsfolder # Structures folder + self.idir = [f for f in open(optlfile).read().split('\n') if f != ''] # read and store the paths to the opt files # create cdir if it does not exist @@ -57,6 +59,15 @@ def run_sampling_nms(self, nmsparams, gpus=[0]): p.start() p.join() + # Runs structural sampling (single GPU only currently) + def run_sampling_strucs(self, strucsparams, gpus=[0]): + print('Running Structural sampling...') + p = Process(target=self.structural_sampling, args=(strucsparams['N'], + strucsparams['sig'], + gpus[0])) + p.start() + p.join() + # Run MD sampling on N GPUs. This code will automatically run 2 mds per GPU for better utilization def run_sampling_md(self, mdparams, perc=1.0, gpus=[0]): md_work = [] @@ -204,7 +215,6 @@ def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid): sigma = anicv.compute_stddev_conformations(conformers,spc) sid = np.where( sigma > sig )[0] - Nt += sigma.size Nk += sid.size if 100.0*sid.size/float(Ngen) > 0: @@ -227,6 +237,91 @@ def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid): #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close() + # Structural sampler function + def structural_sampling(self, N, sig, gpuid): + of = open(self.ldtdir + self.datdir + '/info_data_strucs.nfo', 'w') + + aevsize = self.netdict['aevsize'] + + anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], + self.netdict['saefile'], + self.netdict['nnfprefix'], + self.netdict['num_nets'], + [gpuid], False) + + dc = aat.diverseconformers(self.netdict['cnstfile'], + self.netdict['saefile'], + self.netdict['nnfprefix']+'0/networks/', + aevsize, + gpuid, False) + + files = os.listdir(self.strucsfolder) + files.sort() + + Nkt = 0 + Ntt = 0 + cnt = 0 + for fi,f in enumerate(files): + print(f) + fil = open(self.strucsfolder+f,'r') + lines = fil.readlines() + fil.close() + nlines = len(lines) + # Reading all conformations + nat = int(lines[0]) + nconfs=int(round(len(lines)/(nat+2))) + crds=[] + for conf in range(nconfs): + crd =[] + if (conf==0): + if (not re.search("Charge:",lines[1]) or not re.search("Mul:",lines[1])): + raise ValueError('Error: the first comment line in %s must have charge and multiplicity. Please add something like " Charge: 0 Mul: 1 "'%(self.strucsfolder+f)) + chg = lines[1].split("Charge:")[1].split()[0] + mul = lines[1].split("Mul:")[1].split()[0] + spc = [] + for i in range(nat): + var = lines[conf*(nat+2)+2+i].split() + spc.append(var[0]) + crd.append([float(var[1]),float(var[2]),float(var[3])]) + else: + for i in range(nat): + var = lines[conf*(nat+2)+2+i].split() + crd.append([float(var[1]),float(var[2]),float(var[3])]) + crds.append(crd) + # Select up to N random structures, if needed + if (nconfs>N): + list=[] + for i in range(N): + num=np.random.random_integers(0,nconfs-1) + while(num in list): + num=num=np.random.random_integers(0,nconfs-1) + list.append(num) + ncrds=[] + for i in sorted(list): + ncrds.append(crds[i]) + del crds + crds=ncrds + del ncrds + # Converting list to numpy array + crds=np.asarray(crds, dtype=np.float32) + # Filter by QBC + sigma = anicv.compute_stddev_conformations(crds,spc) + sid = np.where( sigma > sig )[0] + + Ntt += sigma.size + Nkt += sid.size + of.write(str(cnt+1)+' of '+str(len(files))+') file: '+ str(self.strucsfolder+f) +'\n') + of.write(' -Total: '+str(sid.size)+' of '+str(sigma.size)+' percent: '+"{:.2f}".format(100.0*sid.size/sigma.size)+'\n') + of.flush() + if sid.size > 0: + cfn = f.split('.')[0]+'_strucs.xyz' + cmts = [' '+chg+' '+mul for c in range(sid.size)] + hdt.writexyzfilewc(self.cdir+cfn,crds[sid],spc,cmts) + cnt += 1 + + of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+'\n') + of.close() + # MD Sampling function def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): activ = aat.moldynactivelearning(self.netdict['cnstfile'], From a2fbf87c26f304491b9ce0d9641fc308379676fe Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 19:48:32 -0700 Subject: [PATCH 05/10] Remove activelearning/al_1b-nn.py --- activelearning/al_1b-nn.py | 238 ------------------------------------- 1 file changed, 238 deletions(-) delete mode 100644 activelearning/al_1b-nn.py diff --git a/activelearning/al_1b-nn.py b/activelearning/al_1b-nn.py deleted file mode 100644 index 5323166..0000000 --- a/activelearning/al_1b-nn.py +++ /dev/null @@ -1,238 +0,0 @@ -import anialservertools as ast -import anitraintools as att -import anialtools as alt -from time import sleep,time -import os -import sys - -#passfile = "/home/jsmith48/tkey.dat" - -hostname = "comet.sdsc.xsede.org" -#hostname = "bridges.psc.edu" -#hostname = "moria.chem.ufl.edu" -username = "cruzeiro" -#password = open(passfile,'r').read().strip() - -root_dir = '/server-home1/cruzeiro/auto_al/' - -swkdir = '/home/cruzeiro/scratch/auto_al_cycles/'# server working directory -datdir = 'ANI-AL-n2o5-1b-' - -h5stor = root_dir + 'h5files/'# h5store location - -strucsfolder = root_dir + 'strucs/'# strucs location - -optlfile = root_dir + 'input_files.dat' - -#Comet -mae = 'module load gnu/4.9.2\n' +\ - 'module load gaussian\n' +\ - 'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\ - 'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' - -# Bridges -#mae = 'module load gcc/5.3.0\n' +\ -# 'module load gaussian/09.D.01\n' +\ -# 'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\ -# 'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' +\ -# "export GAUSS_EXEDIR='/opt/packages/gaussian-RevD.01/g09/' \n" - -#fpatoms = ['C', 'N', 'O'] -fpatoms = ['N', 'O'] - -jtime = "0-12:00" - -#---- Training Parameters ---- -GPU = [1,2] # GPU IDs - -M = 0.06 # Max error per atom in kcal/mol -Nnets = 8 # networks in ensemble -Nblock = 16 # Number of blocks in split -Nbvald = 2 # number of valid blocks -Nbtest = 2 # number of test blocks -aevsize = 128 - -#wkdir = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/ANI-1x-RXN-0000/' -#iptfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/inputtrain.ipt' -#saefile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/sae_linfit.dat' -#cstfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/rHCNO-4.6R_16-3.1A_a4-8.params' - -root = '/server-home1/cruzeiro/auto_al' - -wkdir = root+'/models/ANI-AL-n2o5-1b/' -#iptfile = root+'/modelions/inputtrain.ipt' -saefile = root+'/models/ANI-AL-n2o5-1b/sae_linfit.dat' -cstfile = root+'/models/ANI-AL-n2o5-1b/rNO-5.2R_16-3.5A_a4-8.params' - -ipt = alt.anitrainerinputdesigner() - -ipt.set_parameter('atomEnergyFile','sae_linfit.dat') -ipt.set_parameter('sflparamsfile','rNO-5.2R_16-3.5A_a4-8.params') -ipt.set_parameter('eta','0.001') -ipt.set_parameter('energy','1') -ipt.set_parameter('force','0') -ipt.set_parameter('fmult','1.0') -ipt.set_parameter('feps','0.001') -ipt.set_parameter('dipole','0') -ipt.set_parameter('charge','0') -ipt.set_parameter('cdweight','2.0') -ipt.set_parameter('tolr','100') -ipt.set_parameter('tbtchsz','35') -ipt.set_parameter('vbtchsz','35') -ipt.set_parameter('nkde','2') - - -# Set network layers -#ipt.add_layer('H',{"nodes":160,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -#ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -#ipt.add_layer('H',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -#ipt.add_layer('C',{"nodes":144,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -#ipt.add_layer('C',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -#ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -#ipt.add_layer('N',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -#ipt.add_layer('N',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -#ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -#ipt.add_layer('O',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -#ipt.add_layer('O',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -#ipt.add_layer('O',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -ipt.add_layer('N',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -ipt.add_layer('N',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -ipt.add_layer('N',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -ipt.add_layer('O',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3}) -ipt.add_layer('O',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) -ipt.add_layer('O',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6}) - -ipt.print_layer_parameters() - -#-----------0--------- - -# Training varibles - -#### Sampling parameters #### -nmsparams = {'T': 700.0, # Temperature - 'Ngen': 300, # Confs to generate - 'Nkep': 200, # Diverse confs to keep - 'maxd': 0.5, # Diverse confs to keep - 'sig' : M, - } - -strucsparams = {'N': 50, # number of maximum structures to select before QBC from each XYZ file - 'sig': M, - } - -mdsparams = {'N': 1, # trajectories to run - 'T1': 300, - 'T2': 1000, - 'dt': 0.5, - 'Nc': 3000, - 'Ns': 2, - 'sig': M, - } - -tsparams = {'T':200, # trajectories to run - 'n_samples' : 200, - 'n_steps': 10, - 'steps': 1500, - 'min_steps': 300, - 'sig' : M, - 'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'], - 'nmfile':None, #path to gaussian log file containing the data - 'nm':0, #id of normal mode - 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. - } - -dhparams = { 'Nmol': 250, - 'Nsamp': 2, - 'sig' : M, - 'rng' : 0.2, - 'MaxNa' : 25, - #'smilefile': '/home/jsmith48/scratch/auto_dhl_al/dhl_files/dhl_genentech.smi', - #'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi', - 'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi', - } - -dmrparams = {'mdselect' : [(4000,0),(1600,1),(400,2),(100,3),(25,4),(10,5),(1,6),(1,7),(1,8),(1,9),(1,10),(50,11),(1,12),(2,13),(2,14),], - #'mdselect' : [(10,0), (1,11)], - #'N' : 20, - 'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa) - 'T' : 400.0, # running temp - 'L' : 25.0, # box length - 'V' : 0.04, # Random init velocities - 'dt' : 0.25, # MD time step - 'Nm' : 100, # Molecules to embed - #'Nm' : 160, # Molecules to embed - 'Nr' : 15, # Number of total boxes to embed and test - 'Ni' : 10, # Number of steps to run the dynamics before fragmenting - 'Ns' : 10, - 'sig': M, - } - -solv_file = '/home/jsmith48/scratch/GDB-11-AL-wB97x631gd/gdb11_s01/inputs/gdb11_s01-2.ipt' -solu_dirs = '' - -gcmddict = {'edgepad': 0.8, # padding on the box edge - 'mindist': 1.6, # Minimum allow intermolecular distance - 'sig' : M, # sig hat for data selection - 'maxsig' : 3.0*M, # Max frag sig allowed to continue dynamics - 'Nr': 30, # Number of boxed to run - 'MolHigh': 910, #High number of molecules - 'MolLow': 820, #Low number of molecules - 'Ni': 50, #steps before checking frags - 'Ns': 100, - 'dt': 0.25, - 'V': 0.04, - 'L': 30.0, - 'T': 500.0, - 'Nembed' : 0, - 'solv_file' : solv_file, - 'solu_dirs' : solu_dirs, - } - -### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ### -#N = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40] -N = [41] - -for i in N: - #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/' - netdir = wkdir + 'ANI-AL-n2o5-1b-' + str(i).zfill(2) + '/' - ldtdir = root_dir # local data directories - - if not os.path.exists(netdir): - os.mkdir(netdir) - - nnfprefix = netdir + 'train' - - netdict = {'cnstfile' : cstfile, - 'saefile': saefile, - 'iptsize': 128, - 'nnfprefix': netdir+'train', - 'aevsize': aevsize, - 'num_nets': Nnets, - 'atomtyp' : ['N','O'] - } - - ## Train the ensemble ## - #aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets) - #aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energies',forces=False,dipole=False,rmhighe=True) - #aln.train_ensemble(GPU) - - if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)): - os.mkdir(root_dir + datdir + str(i+1).zfill(2)) - - ## Run active learning sampling ## - acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict) - #acs.run_sampling_cluster(gcmddict, GPU) - #acs.run_sampling_dimer(dmrparams, GPU) - #acs.run_sampling_nms(nmsparams, GPU) - acs.run_sampling_strucs(strucsparams, GPU) - #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU) - #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU) - #acs.run_sampling_TS(tsparams, gpus=GPU) - - ## Submit jobs, return and pack data - ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50) From 03e33f8116d89518c8a65a88c2b1d30270c9b7a0 Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 19:57:07 -0700 Subject: [PATCH 06/10] Add structural sampling to al_conf_cycle.py and al_ions_cycle.py --- activelearning/al_conf_cycle.py | 17 +++++++++++------ activelearning/al_ions_cycle.py | 9 ++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/activelearning/al_conf_cycle.py b/activelearning/al_conf_cycle.py index 4308dc5..a8bbaee 100644 --- a/activelearning/al_conf_cycle.py +++ b/activelearning/al_conf_cycle.py @@ -22,6 +22,8 @@ h5stor = root_dir + 'h5files/'# h5store location +strucsfolder = root_dir + 'strucs/'# strucs location + optlfile = root_dir + 'optimized_input_files.dat' #Comet @@ -73,6 +75,10 @@ 'sig' : M, } +strucsparams = {'N': 40, # number of maximum structures to select before QBC from each XYZ file + 'sig': M, + } + mdsparams = {'N': 2, # trajectories to run 'T1': 300, 'T2': 1000, @@ -91,7 +97,7 @@ 'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'], 'nmfile':None, #path to gaussian log file containing the data 'nm':0, #id of normal mode - 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. + 'perc':0, #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. } dhparams = { 'Nmol': 100, @@ -106,9 +112,9 @@ dmrparams = {#'mdselect' : [(400,0),(60,2),(40,3),(5,4)], 'mdselect' : [(10,0), (1,11)], 'N' : 20, - 'T' : 400.0, # running temp + 'T' : 400.0, # running temp 'L' : 25.0, # box length - 'V' : 0.04, # Random init velocities + 'V' : 0.04, # Random init velocities 'dt' : 0.5, # MD time step 'Nm' : 140, # Molecules to embed #'Nm' : 160, # Molecules to embed @@ -186,10 +192,11 @@ os.mkdir(root_dir + datdir + str(i+1).zfill(2)) ## Run active learning sampling ## - acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, fpatoms, netdict) + acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms, netdict) #acs.run_sampling_cluster(gcmddict, GPU) #acs.run_sampling_dimer(dmrparams, GPU) #acs.run_sampling_nms(nmsparams, GPU) + #acs.run_sampling_strucs(strucsparams, GPU) #acs.run_sampling_md(mdsparams, perc=0.5, gpus=GPU) #acs.run_sampling_TS(tsparams, gpus=[2]) acs.run_sampling_dhl(dhparams, gpus=GPU+GPU) @@ -199,5 +206,3 @@ ## Submit jobs, return and pack data ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime) - - diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py index 1bb1be7..e85486e 100644 --- a/activelearning/al_ions_cycle.py +++ b/activelearning/al_ions_cycle.py @@ -20,6 +20,8 @@ h5stor = root_dir + 'h5files/'# h5store location +strucsfolder = root_dir + 'strucs/'# strucs locations + optlfile = root_dir + 'optimized_input_files.dat' #Comet @@ -123,6 +125,10 @@ 'sig' : M, } +strucsparams = {'N': 40, # number of maximum structures to select before QBC from each XYZ file + 'sig': M, + } + mdsparams = {'N': 1, # trajectories to run 'T1': 300, 'T2': 1000, @@ -223,10 +229,11 @@ os.mkdir(root_dir + datdir + str(i+1).zfill(2)) ## Run active learning sampling ## - acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, fpatoms+['H'], netdict) + acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict) #acs.run_sampling_cluster(gcmddict, GPU) #acs.run_sampling_dimer(dmrparams, GPU) acs.run_sampling_nms(nmsparams, GPU) + #acs.run_sampling_strucs(strucsparams, GPU) acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU) #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU) #acs.run_sampling_TS(tsparams, gpus=GPU) From e7e494addaba3f4db85aa161dafed71529b7456f Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 20:06:12 -0700 Subject: [PATCH 07/10] Fix a mistake in the expression for the distortions of each normal mode that was effectively making the temperature 3 times larger than the temperature inputed by the user. --- lib/nmstools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/nmstools.py b/lib/nmstools.py index 634a1d9..4705bbb 100644 --- a/lib/nmstools.py +++ b/lib/nmstools.py @@ -74,7 +74,7 @@ def __genrandomstruct__(self): Ki = mDynetoMet * self.fcc[i] ci = rdt[i+1]-rdt[i] Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0 - Ri = Sn * MtoA * np.sqrt((3.0 * ci * Kb * float(self.Nf) * self.T)/(Ki)) + Ri = Sn * MtoA * np.sqrt((ci * Kb * float(self.Nf) * self.T)/(Ki)) Ri = min([Ri,self.maxd]) oxyz = oxyz + Ri * self.nmo[i] return oxyz @@ -157,7 +157,7 @@ def __check_distance_from_eq__(self,rxyz): def __genrandomstruct__(self): ("generating randomstruct") oxyz = self.xyz.copy() - Ri = np.random.uniform(low=self.l_val, high=self.h_val, size=None) + Ri = np.random.uniform(low=self.l_val, high=self.h_val, size=None) oxyz = oxyz + Ri * self.nmo return oxyz From 7fa346d983ce36170555b7abf80763833a97dbf7 Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 20:09:38 -0700 Subject: [PATCH 08/10] Fix __getCharge__ inside lib/nmstools.py --- lib/nmstools.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/nmstools.py b/lib/nmstools.py index 4705bbb..9b45283 100644 --- a/lib/nmstools.py +++ b/lib/nmstools.py @@ -27,15 +27,15 @@ def __getCharge__(self,type): elif type is 'C': return 6.0 elif type is 'N': - return 6.0 + return 7.0 elif type is 'O': - return 6.0 + return 8.0 elif type is 'F': - return 6.0 + return 9.0 elif type is 'S': - return 6.0 + return 16.0 elif type == 'Cl': - return 6.0 + return 17.0 else: print('Unknown atom type! ',type) exit(1) @@ -131,6 +131,12 @@ def __getCharge__(self,type): return 7.0 elif type is 'O': return 8.0 + elif type is 'F': + return 9.0 + elif type is 'S': + return 16.0 + elif type == 'Cl': + return 17.0 else: print('Unknown atom type! ',type) exit(1) From ac1a797f9250139a62de6d86ab9c4edd1d9317f7 Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 20:19:32 -0700 Subject: [PATCH 09/10] Modify the algorithm for the random number generator inside the NMS code at lib/nmstools.py --- lib/nmstools.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/nmstools.py b/lib/nmstools.py index 9b45283..e861aa1 100644 --- a/lib/nmstools.py +++ b/lib/nmstools.py @@ -62,17 +62,14 @@ def __check_distance_from_eq__(self,rxyz): # Generate a structure def __genrandomstruct__(self): - rdt = np.random.random(self.Nf+1) - rdt[0] = 0.0 + rdt = np.random.random(self.Nf) norm = np.random.random(1)[0] - rdt = norm*np.sort(rdt) - rdt[self.Nf] = norm oxyz = self.xyz.copy() for i in range(self.Nf): Ki = mDynetoMet * self.fcc[i] - ci = rdt[i+1]-rdt[i] + ci = norm*rdt[i]/np.sum(rdt) Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0 Ri = Sn * MtoA * np.sqrt((ci * Kb * float(self.Nf) * self.T)/(Ki)) Ri = min([Ri,self.maxd]) From bc44c65589148b6e6dd8065d63e73ead00d8ac9d Mon Sep 17 00:00:00 2001 From: "Vinicius Wilian D. Cruzeiro" Date: Sat, 19 Oct 2019 20:23:50 -0700 Subject: [PATCH 10/10] Remove notebooks/GDB_charged_molecules.ipynb --- notebooks/GDB_charged_molecules.ipynb | 146 -------------------------- 1 file changed, 146 deletions(-) delete mode 100644 notebooks/GDB_charged_molecules.ipynb diff --git a/notebooks/GDB_charged_molecules.ipynb b/notebooks/GDB_charged_molecules.ipynb deleted file mode 100644 index 602b56a..0000000 --- a/notebooks/GDB_charged_molecules.ipynb +++ /dev/null @@ -1,146 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from rdkit import Chem\n", - "\n", - "def IonizedMols(smi):\n", - " '''From a given SMILES, return all possible protonated/deprotonated molecules, considering N, O and S atoms'''\n", - " # Setting molecule\n", - " mol = Chem.MolFromSmiles(smi)\n", - " # Building lists of all N, O and S atoms\n", - " atoms_idx = [] # Atoms indexes\n", - " nfc = [] # New formal charge\n", - " nehs = [] # New number of explicit hydrogens\n", - " for atom in mol.GetAtoms():\n", - " atn = atom.GetAtomicNum()\n", - " if (atn == 7 or atn == 8 or atn == 16): # If N, O or S\n", - " skip = False\n", - " nhs = atom.GetTotalNumHs() # Number of hydrogen bonded to the atom\n", - " # If N with no protons, if N with two protons,\n", - " # if O with no protons, or if S with no protons: protonate\n", - " if ((atn == 7 and nhs == 0) or (atn == 7 and nhs == 2) or (atn == 8 and nhs == 0) or (atn == 16 and nhs == 0)):\n", - " nfc.append(atom.GetFormalCharge()+1)\n", - " nehs.append(atom.GetTotalNumHs()+1)\n", - " # If N with one proton, if N with three protons,\n", - " # if O with one proton, if S with one proton: deprotonate\n", - " elif ((atn == 7 and nhs == 1) or (atn == 7 and nhs == 3) or (atn == 8 and nhs == 1) or (atn == 16 and nhs == 1)):\n", - " nfc.append(atom.GetFormalCharge()-1)\n", - " nehs.append(max(0,atom.GetNumExplicitHs()-1))\n", - " # If none of the above conditions are true, skip this atom\n", - " else:\n", - " skip = True\n", - " if (not skip): # If not skipping\n", - " atoms_idx.append(atom.GetIdx())\n", - " # Combining all possible protonations/deprotonations\n", - " ntats=len(atoms_idx) # Number of titratable atoms\n", - " vec = [] # This is a reference array that will aid in preparing the new SMILES string\n", - " [vec.append(0) for cnt in range(ntats)] # Initializing the array\n", - " nmols = [] # New molecules\n", - " while (sum(vec)