From 59058c6287a276a484f1ab3544c1f0805aababe0 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Mon, 29 Oct 2018 14:20:28 -0400
Subject: [PATCH 01/10] Adds a script that can generate protonated and
 deprotonated forms of molecules from the GDB batabase.

---
 notebooks/GDB_charged_molecules.ipynb | 146 ++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 notebooks/GDB_charged_molecules.ipynb

diff --git a/notebooks/GDB_charged_molecules.ipynb b/notebooks/GDB_charged_molecules.ipynb
new file mode 100644
index 0000000..602b56a
--- /dev/null
+++ b/notebooks/GDB_charged_molecules.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "\n",
+    "def IonizedMols(smi):\n",
+    "    '''From a given SMILES, return all possible protonated/deprotonated molecules, considering N, O and S atoms'''\n",
+    "    # Setting molecule\n",
+    "    mol = Chem.MolFromSmiles(smi)\n",
+    "    # Building lists of all N, O and S atoms\n",
+    "    atoms_idx = [] # Atoms indexes\n",
+    "    nfc = [] # New formal charge\n",
+    "    nehs = [] # New number of explicit hydrogens\n",
+    "    for atom in mol.GetAtoms():\n",
+    "        atn = atom.GetAtomicNum()\n",
+    "        if (atn == 7 or atn == 8 or atn == 16): # If N, O or S\n",
+    "            skip = False\n",
+    "            nhs = atom.GetTotalNumHs() # Number of hydrogen bonded to the atom\n",
+    "            # If N with no protons, if N with two protons,\n",
+    "            # if O with no protons, or if S with no protons: protonate\n",
+    "            if ((atn == 7 and nhs == 0) or (atn == 7 and nhs == 2) or (atn == 8 and nhs == 0) or (atn == 16 and nhs == 0)):\n",
+    "                nfc.append(atom.GetFormalCharge()+1)\n",
+    "                nehs.append(atom.GetTotalNumHs()+1)\n",
+    "            # If N with one proton, if N with three protons,\n",
+    "            # if O with one proton, if S with one proton: deprotonate\n",
+    "            elif ((atn == 7 and nhs == 1) or (atn == 7 and nhs == 3) or (atn == 8 and nhs == 1) or (atn == 16 and nhs == 1)):\n",
+    "                nfc.append(atom.GetFormalCharge()-1)\n",
+    "                nehs.append(max(0,atom.GetNumExplicitHs()-1))\n",
+    "            # If none of the above conditions are true, skip this atom\n",
+    "            else:\n",
+    "                skip = True\n",
+    "            if (not skip): # If not skipping\n",
+    "                atoms_idx.append(atom.GetIdx())\n",
+    "    # Combining all possible protonations/deprotonations\n",
+    "    ntats=len(atoms_idx) # Number of titratable atoms\n",
+    "    vec = [] # This is a reference array that will aid in preparing the new SMILES string\n",
+    "    [vec.append(0) for cnt in range(ntats)] # Initializing the array\n",
+    "    nmols = [] # New molecules\n",
+    "    while (sum(vec)<ntats): # With this loop we generate all possible protonations/deprotonations\n",
+    "        pos = 0\n",
+    "        if (vec[pos]==0):\n",
+    "            vec[pos]=1\n",
+    "        else:\n",
+    "            vec[pos]=0\n",
+    "            while (vec[pos+1]==1):\n",
+    "                vec[pos+1]=0\n",
+    "                pos+=1\n",
+    "            vec[pos+1]=1\n",
+    "        # Generating the new SMILES string\n",
+    "        nmol = Chem.MolFromSmiles(smi)\n",
+    "        for i in range(ntats):\n",
+    "            if (vec[i]==1):\n",
+    "                nmol.GetAtomWithIdx(atoms_idx[i]).SetFormalCharge(nfc[i])\n",
+    "                nmol.GetAtomWithIdx(atoms_idx[i]).SetNumExplicitHs(nehs[i])\n",
+    "        Chem.SanitizeMol(nmol)\n",
+    "        if (Chem.MolToSmiles(mol) != Chem.MolToSmiles(nmol)):\n",
+    "            # Printing new SMILES string\n",
+    "            nmols.append((Chem.MolToSmiles(nmol)))\n",
+    "    return nmols"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Working on: gdb11_size01.smi and gdb11SFCl_size01.smi\n",
+      "Working on: gdb11_size02.smi and gdb11SFCl_size02.smi\n",
+      "Working on: gdb11_size03.smi and gdb11SFCl_size03.smi\n",
+      "Working on: gdb11_size04.smi and gdb11SFCl_size04.smi\n",
+      "Working on: gdb11_size05.smi and gdb11SFCl_size05.smi\n",
+      "Working on: gdb11_size06.smi and gdb11SFCl_size06.smi\n",
+      "Working on: gdb11_size07.smi and gdb11SFCl_size07.smi\n",
+      "Working on: gdb11_size08.smi and gdb11SFCl_size08.smi\n",
+      "Working on: gdb11_size09.smi and gdb11SFCl_size09.smi\n",
+      "Working on: gdb11_size10.smi and gdb11SFCl_size10.smi\n",
+      "Working on: gdb11_size11.smi and gdb11SFCl_size11.smi\n"
+     ]
+    }
+   ],
+   "source": [
+    "files1 = [\"gdb11_size%02d.smi\"%(i) for i in range(1,12)]\n",
+    "files2 = [\"gdb11SFCl_size%02d.smi\"%(i) for i in range(1,12)]\n",
+    "for i in range(len(files1)):\n",
+    "    print('Working on: '+files1[i]+' and '+files2[i])\n",
+    "    nf=open(\"gdb11charged_size%02d.smi\"%(i+1), 'w')\n",
+    "    cnt=1\n",
+    "    files=[files1[i],files2[i]]\n",
+    "    for fl in files:\n",
+    "        f=open(fl, \"r\")\n",
+    "        for line in f:\n",
+    "            if not line.strip():\n",
+    "                break\n",
+    "            smi = line.split()[0]\n",
+    "            data = IonizedMols(smi)\n",
+    "            if (data):\n",
+    "                for d in data:\n",
+    "                    nf.write(d+'\\t'+str(cnt)+'\\t1\\n')\n",
+    "                    cnt+=1\n",
+    "        f.close()\n",
+    "    nf.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 1041eb024ffc56e1d752ba3d66799d29a6d497e3 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Wed, 19 Dec 2018 14:48:08 -0500
Subject: [PATCH 02/10] Adds MD sampling to al_ions_cycle.py

---
 activelearning/al_ions_cycle.py | 10 +++----
 lib/anialtools.py               | 51 +++++++++++++++++++--------------
 2 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py
index 43406e8..21e72c2 100644
--- a/activelearning/al_ions_cycle.py
+++ b/activelearning/al_ions_cycle.py
@@ -92,7 +92,7 @@
              'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'],
              'nmfile':None,                          #path to gaussian log file containing the data
              'nm':0,                                 #id of normal mode
-             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. 
+             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok.
              }
 
 dhparams = { 'Nmol': 250,
@@ -109,9 +109,9 @@
              #'mdselect' : [(10,0), (1,11)],
              #'N' : 20,
              'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa)
-             'T' : 400.0, # running temp 
+             'T' : 400.0, # running temp
              'L' : 25.0, # box length
-             'V' : 0.04, # Random init velocities 
+             'V' : 0.04, # Random init velocities
              'dt' : 0.25, # MD time step
              'Nm' : 100, # Molecules to embed
              #'Nm' : 160, # Molecules to embed
@@ -178,11 +178,9 @@
     #acs.run_sampling_cluster(gcmddict, GPU)
     #acs.run_sampling_dimer(dmrparams, GPU)
     acs.run_sampling_nms(nmsparams, GPU)
-    #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU)
+    acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU)
     #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU)
     #acs.run_sampling_TS(tsparams, gpus=GPU)
 
     ## Submit jobs, return and pack data
     ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50)
-
-
diff --git a/lib/anialtools.py b/lib/anialtools.py
index 980e6c3..0bc6775 100644
--- a/lib/anialtools.py
+++ b/lib/anialtools.py
@@ -75,7 +75,7 @@ def run_sampling_md(self, mdparams, perc=1.0, gpus=[0]):
         np.random.shuffle(md_work)
         md_work = md_work[0:int(perc*md_work.size)]
         md_work = np.array_split(md_work,len(gpus2))
-	
+
         proc = []
         for i,(md,g) in enumerate(zip(md_work,gpus2)):
             proc.append(Process(target=self.mol_dyn_sampling, args=(md,i,
@@ -246,6 +246,13 @@ def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid):
             #print(di, ') Working on', id, '...')
             S = data["species"]
 
+            if "charge" in data and "multip" in data:
+                chg = data["charge"]
+                mlt = data["multip"]
+            else:
+                chg = "0"
+                mlt = "1"
+
             # Set mols
             activ.setmol(data["coordinates"], S)
 
@@ -261,7 +268,7 @@ def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid):
             #print(nfo)
 
             if X.size > 0:
-                hdt.writexyzfile(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S)
+                hdt.writexyzfilewc(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S, ' '+chg+' '+mlt)
         difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n')
         print(Nmol)
         del activ
@@ -280,7 +287,7 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid):
         Nm = dparam['Nm']
         Ni = dparam['Ni']
         Ns = dparam['Ns']
-        
+
         mols = []
         difo = open(self.ldtdir + self.datdir + '/info_data_mddimer-'+str(tid)+'.nfo', 'w')
         for di,id in enumerate(dparam['mdselect']):
@@ -290,26 +297,26 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid):
             dnfo = str(di) + ' of ' + str(len(dparam['mdselect'])) + ') dir: ' + str(self.idir[id[1]]) + ' Selecting: '+str(id[0]*len(files))
             #print(dnfo)
             difo.write(dnfo+'\n')
-        
+
             for i in range(id[0]):
                 for n,m in enumerate(files):
                         data = hdt.read_rcdb_coordsandnm(self.idir[id[1]]+m)
                         if len(data['species']) < maxNa:
                             mols.append(data)
 
-        dgen = pmf.dimergenerator(self.netdict['cnstfile'], 
-                                  self.netdict['saefile'], 
-                                  self.netdict['nnfprefix'], 
-                                  self.netdict['num_nets'], 
+        dgen = pmf.dimergenerator(self.netdict['cnstfile'],
+                                  self.netdict['saefile'],
+                                  self.netdict['nnfprefix'],
+                                  self.netdict['num_nets'],
                                   mols, gpuid)
 
         difo.write('Beginning dimer generation...\n')
-        
+
         Nt = 0
         Nd = 0
         for i in range(Nr):
             dgen.init_dynamics(Nm, V, L, dt, T)
- 
+
             for j in range(Ns):
                 if j != 0:
                     dgen.run_dynamics(Ni)
@@ -322,7 +329,7 @@ def dimer_sampling(self, tid, Nr, dparam, gpuid):
 
                 Nt += dgen.Nt
                 Nd += dgen.Nd
-        
+
                 #print('Step (',tid,',',i,') [', str(dgen.Nd), '/', str(dgen.Nt),'] generated ',len(dgen.frag_list), 'dimers...')
                 #difo.write('Step ('+str(i)+') ['+ str(dgen.Nd)+ '/'+ str(dgen.Nt)+'] generated '+str(len(dgen.frag_list))+'dimers...\n')
                 if max_sig > 3.0*sig:
@@ -376,7 +383,7 @@ def run_sampling_TS(self, tsparams, gpus=[0], perc=1.0):
         np.random.shuffle(TS_infiles)
         TS_infiles = TS_infiles[0:int(perc*len(TS_infiles))]
         TS_infiles = np.array_split(TS_infiles,len(gpus2))
-	
+
         proc = []
         for i,g in enumerate(gpus2):
             proc.append(Process(target=self.TS_sampling, args=(i, TS_infiles[i], tsparams, g)))
@@ -481,7 +488,7 @@ def run_sampling_pDynTS(self, pdynparams, gpus=0):
         print('Finished pDynamo sampling.')
 
 
-    def pDyn_QMsampling(self, pdynparams, gpuid):       
+    def pDyn_QMsampling(self, pdynparams, gpuid):
                                                                   #Call subproc_pDyn class in pyaniasetools as activ
         activ = aat.subproc_pDyn(self.netdict['cnstfile'],
                                          self.netdict['saefile'],
@@ -500,7 +507,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid):
         indir=pdynparams['indir']                               #path to save XYZ files of IRC points to check stddev
         XYZfile=pdynparams['XYZfile']                           #XYZ file with high standard deviations structures
         l_val=pdynparams['l_val']                               #Ri --> randomly perturb in the interval [+x,-x]
-        h_val=pdynparams['h_val']                               
+        h_val=pdynparams['h_val']
         n_points=pdynparams['n_points']                         #Number of points along IRC (forward+backward+1 for TS)
         sig=pdynparams['sig']
         N=pdynparams['N']
@@ -521,7 +528,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid):
             chk_TS = activ.subprocess_cmd(sbproc_cmdTS, False, logfile_TS)
             if chk_TS == 0:
                 chk_IRC = activ.subprocess_cmd(sbproc_cmdIRC, False, logfile_IRC)
-                
+
         # ----------------------- Save points along ANI IRC ------------------------
         IRCfils=os.listdir(IRCdir)
         IRCfils.sort()
@@ -530,16 +537,16 @@ def pDyn_QMsampling(self, pdynparams, gpuid):
             activ.getIRCpoints_toXYZ(n_points, IRCdir+f, f, indir)
         infils=os.listdir(indir)
         infils.sort()
-        
+
         # ------ Check for high standard deviation structures and get vib modes -----
         for f in infils:
             stdev = activ.check_stddev(indir+f, sig)
             if stdev > sig:                      #if stddev is high then get modes for that point
                 nmc = activ.get_nm(indir+f)      #save modes in memory for later use
-            
+
             activ.write_nm_xyz(XYZfile)          #writes all the structures with high standard deviations to xyz file
 
-        # ----------------------------- Read XYZ for NM -----------------------------      
+        # ----------------------------- Read XYZ for NM -----------------------------
         X, spc, Na, C = hdt.readxyz3(XYZfile)
 
         # --------- NMS for XYZs with high stddev --------
@@ -553,7 +560,7 @@ def pDyn_QMsampling(self, pdynparams, gpuid):
                     gen_crd[k] = gen.get_random_structure()
 
                 hdt.writexyzfile(self.cdir + 'nms_TS%i.xyz' %N, gen_crd, spc[i])
-                
+
         del activ
 
 
@@ -895,11 +902,11 @@ def build_strided_training_cache(self,Nblocks,Nvalid,Ntest,build_test=True, buil
                     S = data['species']
                     E = data['energies']
                     X = data['coordinates']
-    
+
                     Esae = hdt.compute_sae(self.netdict['saefile'], S)
-    
+
                     dE.append((E-Esae)/np.sqrt(len(S)))
-    
+
             dE = np.concatenate(dE)
             cidx = np.where(np.abs(dE) < 15.0)
             std = np.abs(dE[cidx]).std()

From ddf495310999b1efcc8c1273d096698c07737d67 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Wed, 19 Dec 2018 16:32:10 -0500
Subject: [PATCH 03/10] Input file modifications in al_ions_cycle.py

---
 activelearning/al_ions_cycle.py | 61 +++++++++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py
index 21e72c2..1bb1be7 100644
--- a/activelearning/al_ions_cycle.py
+++ b/activelearning/al_ions_cycle.py
@@ -58,10 +58,59 @@
 root = '/home/jsmith48/scratch/auto_ion_al'
 
 wkdir   = root+'/modelions/ANI-AL-smallions/'
-iptfile = root+'/modelions/inputtrain.ipt'
+#iptfile = root+'/modelions/inputtrain.ipt'
 saefile = root+'/modelions/sae_linfit.dat'
 cstfile = root+'/modelions/rHCNOSFCl-5.2R_16-3.8A_a4-8.params'
 
+ipt = alt.anitrainerinputdesigner()
+
+ipt.set_parameter('atomEnergyFile','sae_linfit.dat')
+ipt.set_parameter('sflparamsfile','rHCNOSFCl-5.2R_16-3.8A_a4-8.params')
+ipt.set_parameter('eta','0.001')
+ipt.set_parameter('energy','1')
+ipt.set_parameter('force','0')
+ipt.set_parameter('fmult','1.0')
+ipt.set_parameter('feps','0.001')
+ipt.set_parameter('dipole','0')
+ipt.set_parameter('charge','0')
+ipt.set_parameter('cdweight','2.0')
+ipt.set_parameter('tolr','100')
+ipt.set_parameter('tbtchsz','2560')
+ipt.set_parameter('vbtchsz','2560')
+ipt.set_parameter('nkde','2')
+
+
+# Set network layers
+ipt.add_layer('H',{"nodes":320,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('H',{"nodes":64 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('C',{"nodes":288,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('C',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('N',{"nodes":256,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('N',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('O',{"nodes":256,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('O',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('O',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('S',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('S',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('S',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('F',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('F',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('F',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('Cl',{"nodes":192,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('Cl',{"nodes":48 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('Cl',{"nodes":80 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.print_layer_parameters()
+
 #-----------0---------
 
 # Training varibles
@@ -144,7 +193,7 @@
 
 ### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ###
 #N = [20]
-N = [1]
+N = [2]
 
 for i in N:
     #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/'
@@ -156,9 +205,9 @@
 
     nnfprefix   = netdir + 'train'
 
-    netdict = {'iptfile' : iptfile,
-               'cnstfile' : cstfile,
+    netdict = {'cnstfile' : cstfile,
                'saefile': saefile,
+               'iptsize': 1008,
                'nnfprefix': netdir+'train',
                'aevsize': aevsize,
                'num_nets': Nnets,
@@ -166,8 +215,8 @@
                }
 
     ## Train the ensemble ##
-    aln = att.alaniensembletrainer(netdir, netdict, h5stor, Nnets)
-    aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,False)
+    aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets)
+    aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energy',forces=False,dipole=False,rmhighe=True)
     aln.train_ensemble(GPU)
 
     if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)):

From 647084c95cc2443ef0a2a013db92b79d507f01ab Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Wed, 12 Jun 2019 19:42:33 -0700
Subject: [PATCH 04/10] New sampling technique: structural sampling

---
 activelearning/al_1b-nn.py | 238 +++++++++++++++++++++++++++++++++++++
 lib/anialtools.py          |  99 ++++++++++++++-
 2 files changed, 335 insertions(+), 2 deletions(-)
 create mode 100644 activelearning/al_1b-nn.py

diff --git a/activelearning/al_1b-nn.py b/activelearning/al_1b-nn.py
new file mode 100644
index 0000000..5323166
--- /dev/null
+++ b/activelearning/al_1b-nn.py
@@ -0,0 +1,238 @@
+import anialservertools as ast
+import anitraintools as att
+import anialtools as alt
+from time import sleep,time
+import os
+import sys
+
+#passfile = "/home/jsmith48/tkey.dat"
+
+hostname = "comet.sdsc.xsede.org"
+#hostname = "bridges.psc.edu"
+#hostname = "moria.chem.ufl.edu"
+username = "cruzeiro"
+#password = open(passfile,'r').read().strip()
+
+root_dir = '/server-home1/cruzeiro/auto_al/'
+
+swkdir = '/home/cruzeiro/scratch/auto_al_cycles/'# server working directory
+datdir = 'ANI-AL-n2o5-1b-'
+
+h5stor = root_dir + 'h5files/'# h5store location
+
+strucsfolder = root_dir + 'strucs/'# strucs location
+
+optlfile = root_dir + 'input_files.dat'
+
+#Comet
+mae = 'module load gnu/4.9.2\n' +\
+      'module load gaussian\n' +\
+      'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\
+      'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n'
+
+# Bridges
+#mae = 'module load gcc/5.3.0\n' +\
+#      'module load gaussian/09.D.01\n' +\
+#      'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\
+#      'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' +\
+#      "export GAUSS_EXEDIR='/opt/packages/gaussian-RevD.01/g09/' \n"
+
+#fpatoms = ['C', 'N', 'O']
+fpatoms = ['N', 'O']
+
+jtime = "0-12:00"
+
+#---- Training Parameters ----
+GPU = [1,2] # GPU IDs
+
+M   = 0.06 # Max error per atom in kcal/mol
+Nnets = 8 # networks in ensemble
+Nblock = 16 # Number of blocks in split
+Nbvald = 2 # number of valid blocks
+Nbtest = 2 # number of test blocks
+aevsize = 128
+
+#wkdir = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/ANI-1x-RXN-0000/'
+#iptfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/inputtrain.ipt'
+#saefile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/sae_linfit.dat'
+#cstfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/rHCNO-4.6R_16-3.1A_a4-8.params'
+
+root = '/server-home1/cruzeiro/auto_al'
+
+wkdir   = root+'/models/ANI-AL-n2o5-1b/'
+#iptfile = root+'/modelions/inputtrain.ipt'
+saefile = root+'/models/ANI-AL-n2o5-1b/sae_linfit.dat'
+cstfile = root+'/models/ANI-AL-n2o5-1b/rNO-5.2R_16-3.5A_a4-8.params'
+
+ipt = alt.anitrainerinputdesigner()
+
+ipt.set_parameter('atomEnergyFile','sae_linfit.dat')
+ipt.set_parameter('sflparamsfile','rNO-5.2R_16-3.5A_a4-8.params')
+ipt.set_parameter('eta','0.001')
+ipt.set_parameter('energy','1')
+ipt.set_parameter('force','0')
+ipt.set_parameter('fmult','1.0')
+ipt.set_parameter('feps','0.001')
+ipt.set_parameter('dipole','0')
+ipt.set_parameter('charge','0')
+ipt.set_parameter('cdweight','2.0')
+ipt.set_parameter('tolr','100')
+ipt.set_parameter('tbtchsz','35')
+ipt.set_parameter('vbtchsz','35')
+ipt.set_parameter('nkde','2')
+
+
+# Set network layers
+#ipt.add_layer('H',{"nodes":160,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+#ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+#ipt.add_layer('H',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+#ipt.add_layer('C',{"nodes":144,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+#ipt.add_layer('C',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+#ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+#ipt.add_layer('N',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+#ipt.add_layer('N',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+#ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+#ipt.add_layer('O',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+#ipt.add_layer('O',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+#ipt.add_layer('O',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('N',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('N',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('N',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.add_layer('O',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
+ipt.add_layer('O',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+ipt.add_layer('O',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
+
+ipt.print_layer_parameters()
+
+#-----------0---------
+
+# Training varibles
+
+#### Sampling parameters ####
+nmsparams = {'T': 700.0, # Temperature
+             'Ngen': 300, # Confs to generate
+             'Nkep': 200, # Diverse confs to keep
+             'maxd': 0.5, # Diverse confs to keep
+             'sig' : M,
+             }
+
+strucsparams = {'N': 50, # number of maximum structures to select before QBC from each XYZ file
+                'sig': M,
+               }
+
+mdsparams = {'N': 1, # trajectories to run
+             'T1': 300,
+             'T2': 1000,
+             'dt': 0.5,
+             'Nc': 3000,
+             'Ns': 2,
+             'sig': M,
+             }
+
+tsparams = {'T':200, # trajectories to run
+             'n_samples' : 200,
+             'n_steps': 10,
+             'steps': 1500,
+             'min_steps': 300,
+             'sig' : M,
+             'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'],
+             'nmfile':None,                          #path to gaussian log file containing the data
+             'nm':0,                                 #id of normal mode
+             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok.
+             }
+
+dhparams = { 'Nmol': 250,
+             'Nsamp': 2,
+             'sig' : M,
+             'rng' : 0.2,
+             'MaxNa' : 25,
+             #'smilefile': '/home/jsmith48/scratch/auto_dhl_al/dhl_files/dhl_genentech.smi',
+             #'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi',
+             'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi',
+             }
+
+dmrparams = {'mdselect' : [(4000,0),(1600,1),(400,2),(100,3),(25,4),(10,5),(1,6),(1,7),(1,8),(1,9),(1,10),(50,11),(1,12),(2,13),(2,14),],
+             #'mdselect' : [(10,0), (1,11)],
+             #'N' : 20,
+             'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa)
+             'T' : 400.0, # running temp
+             'L' : 25.0, # box length
+             'V' : 0.04, # Random init velocities
+             'dt' : 0.25, # MD time step
+             'Nm' : 100, # Molecules to embed
+             #'Nm' : 160, # Molecules to embed
+             'Nr' : 15, # Number of total boxes to embed and test
+             'Ni' : 10, # Number of steps to run the dynamics before fragmenting
+             'Ns' : 10,
+             'sig': M,
+            }
+
+solv_file = '/home/jsmith48/scratch/GDB-11-AL-wB97x631gd/gdb11_s01/inputs/gdb11_s01-2.ipt'
+solu_dirs = ''
+
+gcmddict = {'edgepad': 0.8, # padding on the box edge
+            'mindist': 1.6, # Minimum allow intermolecular distance
+            'sig' : M, # sig hat for data selection
+            'maxsig' : 3.0*M, # Max frag sig allowed to continue dynamics
+            'Nr': 30, # Number of boxed to run
+            'MolHigh': 910, #High number of molecules
+            'MolLow': 820, #Low number of molecules
+            'Ni': 50, #steps before checking frags
+            'Ns': 100,
+            'dt': 0.25,
+            'V': 0.04,
+            'L': 30.0,
+            'T': 500.0,
+            'Nembed' : 0,
+            'solv_file' : solv_file,
+            'solu_dirs' : solu_dirs,
+            }
+
+### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ###
+#N = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]
+N = [41]
+
+for i in N:
+    #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/'
+    netdir = wkdir + 'ANI-AL-n2o5-1b-' + str(i).zfill(2) + '/'
+    ldtdir = root_dir  # local data directories
+
+    if not os.path.exists(netdir):
+        os.mkdir(netdir)
+
+    nnfprefix   = netdir + 'train'
+
+    netdict = {'cnstfile' : cstfile,
+               'saefile': saefile,
+               'iptsize': 128,
+               'nnfprefix': netdir+'train',
+               'aevsize': aevsize,
+               'num_nets': Nnets,
+               'atomtyp' : ['N','O']
+               }
+
+    ## Train the ensemble ##
+    #aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets)
+    #aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energies',forces=False,dipole=False,rmhighe=True)
+    #aln.train_ensemble(GPU)
+
+    if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)):
+        os.mkdir(root_dir + datdir + str(i+1).zfill(2))
+
+    ## Run active learning sampling ##
+    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict)
+    #acs.run_sampling_cluster(gcmddict, GPU)
+    #acs.run_sampling_dimer(dmrparams, GPU)
+    #acs.run_sampling_nms(nmsparams, GPU)
+    acs.run_sampling_strucs(strucsparams, GPU)
+    #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU)
+    #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU)
+    #acs.run_sampling_TS(tsparams, gpus=GPU)
+
+    ## Submit jobs, return and pack data
+    ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50)
diff --git a/lib/anialtools.py b/lib/anialtools.py
index 0bc6775..1e364be 100644
--- a/lib/anialtools.py
+++ b/lib/anialtools.py
@@ -26,7 +26,7 @@
 class alconformationalsampler():
 
     # Constructor
-    def __init__(self, ldtdir, datdir, optlfile, fpatoms, netdict):
+    def __init__(self, ldtdir, datdir, optlfile, strucsfolder, fpatoms, netdict):
         self.ldtdir = ldtdir # local working dir
         self.datdir = datdir # working data dir
         self.cdir = ldtdir+datdir+'/confs/' # confs store dir (the data gen code looks here for conformations to run QM on)
@@ -35,6 +35,8 @@ def __init__(self, ldtdir, datdir, optlfile, fpatoms, netdict):
 
         self.optlfile = optlfile # Optimized molecules store path file
 
+        self.strucsfolder = strucsfolder # Structures folder
+
         self.idir = [f for f in open(optlfile).read().split('\n') if f != ''] # read and store the paths to the opt files
 
         # create cdir if it does not exist
@@ -57,6 +59,15 @@ def run_sampling_nms(self, nmsparams, gpus=[0]):
         p.start()
         p.join()
 
+    # Runs structural sampling (single GPU only currently)
+    def run_sampling_strucs(self, strucsparams, gpus=[0]):
+        print('Running Structural sampling...')
+        p = Process(target=self.structural_sampling, args=(strucsparams['N'],
+                                                           strucsparams['sig'],
+                                                           gpus[0]))
+        p.start()
+        p.join()
+
     # Run MD sampling on N GPUs. This code will automatically run 2 mds per GPU for better utilization
     def run_sampling_md(self, mdparams, perc=1.0, gpus=[0]):
         md_work = []
@@ -204,7 +215,6 @@ def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid):
                     sigma = anicv.compute_stddev_conformations(conformers,spc)
                     sid = np.where( sigma >  sig )[0]
 
-
                     Nt += sigma.size
                     Nk += sid.size
                     if 100.0*sid.size/float(Ngen) > 0:
@@ -227,6 +237,91 @@ def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid):
         #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp)
         of.close()
 
+    # Structural sampler function
+    def structural_sampling(self, N, sig, gpuid):
+        of = open(self.ldtdir + self.datdir + '/info_data_strucs.nfo', 'w')
+
+        aevsize = self.netdict['aevsize']
+
+        anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'],
+                                                self.netdict['saefile'],
+                                                self.netdict['nnfprefix'],
+                                                self.netdict['num_nets'],
+                                                [gpuid], False)
+
+        dc = aat.diverseconformers(self.netdict['cnstfile'],
+                                   self.netdict['saefile'],
+                                   self.netdict['nnfprefix']+'0/networks/',
+                                   aevsize,
+                                   gpuid, False)
+
+        files = os.listdir(self.strucsfolder)
+        files.sort()
+
+        Nkt = 0
+        Ntt = 0
+        cnt = 0
+        for fi,f in enumerate(files):
+            print(f)
+            fil = open(self.strucsfolder+f,'r')
+            lines = fil.readlines()
+            fil.close()
+            nlines = len(lines)
+            # Reading all conformations
+            nat = int(lines[0])
+            nconfs=int(round(len(lines)/(nat+2)))
+            crds=[]
+            for conf in range(nconfs):
+                crd =[]
+                if (conf==0):
+                    if (not re.search("Charge:",lines[1]) or not re.search("Mul:",lines[1])):
+                        raise ValueError('Error: the first comment line in %s must have charge and multiplicity. Please add something like " Charge: 0 Mul: 1 "'%(self.strucsfolder+f))
+                    chg = lines[1].split("Charge:")[1].split()[0]
+                    mul = lines[1].split("Mul:")[1].split()[0]
+                    spc = []
+                    for i in range(nat):
+                        var = lines[conf*(nat+2)+2+i].split()
+                        spc.append(var[0])
+                        crd.append([float(var[1]),float(var[2]),float(var[3])])
+                else:
+                    for i in range(nat):
+                        var = lines[conf*(nat+2)+2+i].split()
+                        crd.append([float(var[1]),float(var[2]),float(var[3])])
+                crds.append(crd)
+            # Select up to N random structures, if needed
+            if (nconfs>N):
+                list=[]
+                for i in range(N):
+                    num=np.random.random_integers(0,nconfs-1)
+                    while(num in list):
+                        num=num=np.random.random_integers(0,nconfs-1)
+                    list.append(num)
+                ncrds=[]
+                for i in sorted(list):
+                    ncrds.append(crds[i])
+                del crds
+                crds=ncrds
+                del ncrds
+            # Converting list to numpy array
+            crds=np.asarray(crds, dtype=np.float32)
+            # Filter by QBC
+            sigma = anicv.compute_stddev_conformations(crds,spc)
+            sid = np.where( sigma >  sig )[0]
+
+            Ntt += sigma.size
+            Nkt += sid.size
+            of.write(str(cnt+1)+' of '+str(len(files))+') file: '+ str(self.strucsfolder+f) +'\n')
+            of.write('    -Total: '+str(sid.size)+' of '+str(sigma.size)+' percent: '+"{:.2f}".format(100.0*sid.size/sigma.size)+'\n')
+            of.flush()
+            if sid.size > 0:
+                cfn = f.split('.')[0]+'_strucs.xyz'
+                cmts = [' '+chg+' '+mul for c in range(sid.size)]
+                hdt.writexyzfilewc(self.cdir+cfn,crds[sid],spc,cmts)
+            cnt += 1
+
+        of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+'\n')
+        of.close()
+
     # MD Sampling function
     def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid):
         activ = aat.moldynactivelearning(self.netdict['cnstfile'],

From a2fbf87c26f304491b9ce0d9641fc308379676fe Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 19:48:32 -0700
Subject: [PATCH 05/10] Remove activelearning/al_1b-nn.py

---
 activelearning/al_1b-nn.py | 238 -------------------------------------
 1 file changed, 238 deletions(-)
 delete mode 100644 activelearning/al_1b-nn.py

diff --git a/activelearning/al_1b-nn.py b/activelearning/al_1b-nn.py
deleted file mode 100644
index 5323166..0000000
--- a/activelearning/al_1b-nn.py
+++ /dev/null
@@ -1,238 +0,0 @@
-import anialservertools as ast
-import anitraintools as att
-import anialtools as alt
-from time import sleep,time
-import os
-import sys
-
-#passfile = "/home/jsmith48/tkey.dat"
-
-hostname = "comet.sdsc.xsede.org"
-#hostname = "bridges.psc.edu"
-#hostname = "moria.chem.ufl.edu"
-username = "cruzeiro"
-#password = open(passfile,'r').read().strip()
-
-root_dir = '/server-home1/cruzeiro/auto_al/'
-
-swkdir = '/home/cruzeiro/scratch/auto_al_cycles/'# server working directory
-datdir = 'ANI-AL-n2o5-1b-'
-
-h5stor = root_dir + 'h5files/'# h5store location
-
-strucsfolder = root_dir + 'strucs/'# strucs location
-
-optlfile = root_dir + 'input_files.dat'
-
-#Comet
-mae = 'module load gnu/4.9.2\n' +\
-      'module load gaussian\n' +\
-      'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\
-      'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n'
-
-# Bridges
-#mae = 'module load gcc/5.3.0\n' +\
-#      'module load gaussian/09.D.01\n' +\
-#      'export PATH="/home/$USER/Gits/RCDBuilder/build/bin:$PATH"\n' +\
-#      'export LD_LIBRARY_PATH="/home/$USER/Gits/RCDBuilder/build/lib:$LD_LIBRARY_PATH"\n' +\
-#      "export GAUSS_EXEDIR='/opt/packages/gaussian-RevD.01/g09/' \n"
-
-#fpatoms = ['C', 'N', 'O']
-fpatoms = ['N', 'O']
-
-jtime = "0-12:00"
-
-#---- Training Parameters ----
-GPU = [1,2] # GPU IDs
-
-M   = 0.06 # Max error per atom in kcal/mol
-Nnets = 8 # networks in ensemble
-Nblock = 16 # Number of blocks in split
-Nbvald = 2 # number of valid blocks
-Nbtest = 2 # number of test blocks
-aevsize = 128
-
-#wkdir = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/ANI-1x-RXN-0000/'
-#iptfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/inputtrain.ipt'
-#saefile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/sae_linfit.dat'
-#cstfile = '/home/jsmith48/scratch/auto_rxn_al/modelrxn/rHCNO-4.6R_16-3.1A_a4-8.params'
-
-root = '/server-home1/cruzeiro/auto_al'
-
-wkdir   = root+'/models/ANI-AL-n2o5-1b/'
-#iptfile = root+'/modelions/inputtrain.ipt'
-saefile = root+'/models/ANI-AL-n2o5-1b/sae_linfit.dat'
-cstfile = root+'/models/ANI-AL-n2o5-1b/rNO-5.2R_16-3.5A_a4-8.params'
-
-ipt = alt.anitrainerinputdesigner()
-
-ipt.set_parameter('atomEnergyFile','sae_linfit.dat')
-ipt.set_parameter('sflparamsfile','rNO-5.2R_16-3.5A_a4-8.params')
-ipt.set_parameter('eta','0.001')
-ipt.set_parameter('energy','1')
-ipt.set_parameter('force','0')
-ipt.set_parameter('fmult','1.0')
-ipt.set_parameter('feps','0.001')
-ipt.set_parameter('dipole','0')
-ipt.set_parameter('charge','0')
-ipt.set_parameter('cdweight','2.0')
-ipt.set_parameter('tolr','100')
-ipt.set_parameter('tbtchsz','35')
-ipt.set_parameter('vbtchsz','35')
-ipt.set_parameter('nkde','2')
-
-
-# Set network layers
-#ipt.add_layer('H',{"nodes":160,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-#ipt.add_layer('H',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-#ipt.add_layer('H',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-#ipt.add_layer('C',{"nodes":144,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-#ipt.add_layer('C',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-#ipt.add_layer('C',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-#ipt.add_layer('N',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-#ipt.add_layer('N',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-#ipt.add_layer('N',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-#ipt.add_layer('O',{"nodes":128,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-#ipt.add_layer('O',{"nodes":112,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-#ipt.add_layer('O',{"nodes":96 ,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-ipt.add_layer('N',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-ipt.add_layer('N',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-ipt.add_layer('N',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-ipt.add_layer('O',{"nodes":32,"activation":9,"type":0,"l2norm":1,"l2valu":5.000e-3})
-ipt.add_layer('O',{"nodes":28,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-ipt.add_layer('O',{"nodes":24,"activation":9,"type":0,"l2norm":1,"l2valu":1.000e-6})
-
-ipt.print_layer_parameters()
-
-#-----------0---------
-
-# Training varibles
-
-#### Sampling parameters ####
-nmsparams = {'T': 700.0, # Temperature
-             'Ngen': 300, # Confs to generate
-             'Nkep': 200, # Diverse confs to keep
-             'maxd': 0.5, # Diverse confs to keep
-             'sig' : M,
-             }
-
-strucsparams = {'N': 50, # number of maximum structures to select before QBC from each XYZ file
-                'sig': M,
-               }
-
-mdsparams = {'N': 1, # trajectories to run
-             'T1': 300,
-             'T2': 1000,
-             'dt': 0.5,
-             'Nc': 3000,
-             'Ns': 2,
-             'sig': M,
-             }
-
-tsparams = {'T':200, # trajectories to run
-             'n_samples' : 200,
-             'n_steps': 10,
-             'steps': 1500,
-             'min_steps': 300,
-             'sig' : M,
-             'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'],
-             'nmfile':None,                          #path to gaussian log file containing the data
-             'nm':0,                                 #id of normal mode
-             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok.
-             }
-
-dhparams = { 'Nmol': 250,
-             'Nsamp': 2,
-             'sig' : M,
-             'rng' : 0.2,
-             'MaxNa' : 25,
-             #'smilefile': '/home/jsmith48/scratch/auto_dhl_al/dhl_files/dhl_genentech.smi',
-             #'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi',
-             'smilefile': '/home/jsmith48/scratch/Drug_moles_raw/chembl_22_clean_1576904_sorted_std_final.smi',
-             }
-
-dmrparams = {'mdselect' : [(4000,0),(1600,1),(400,2),(100,3),(25,4),(10,5),(1,6),(1,7),(1,8),(1,9),(1,10),(50,11),(1,12),(2,13),(2,14),],
-             #'mdselect' : [(10,0), (1,11)],
-             #'N' : 20,
-             'maxNa' : 15, # Largest molecule to consider (for dimers max size is 2x maxNa)
-             'T' : 400.0, # running temp
-             'L' : 25.0, # box length
-             'V' : 0.04, # Random init velocities
-             'dt' : 0.25, # MD time step
-             'Nm' : 100, # Molecules to embed
-             #'Nm' : 160, # Molecules to embed
-             'Nr' : 15, # Number of total boxes to embed and test
-             'Ni' : 10, # Number of steps to run the dynamics before fragmenting
-             'Ns' : 10,
-             'sig': M,
-            }
-
-solv_file = '/home/jsmith48/scratch/GDB-11-AL-wB97x631gd/gdb11_s01/inputs/gdb11_s01-2.ipt'
-solu_dirs = ''
-
-gcmddict = {'edgepad': 0.8, # padding on the box edge
-            'mindist': 1.6, # Minimum allow intermolecular distance
-            'sig' : M, # sig hat for data selection
-            'maxsig' : 3.0*M, # Max frag sig allowed to continue dynamics
-            'Nr': 30, # Number of boxed to run
-            'MolHigh': 910, #High number of molecules
-            'MolLow': 820, #Low number of molecules
-            'Ni': 50, #steps before checking frags
-            'Ns': 100,
-            'dt': 0.25,
-            'V': 0.04,
-            'L': 30.0,
-            'T': 500.0,
-            'Nembed' : 0,
-            'solv_file' : solv_file,
-            'solu_dirs' : solu_dirs,
-            }
-
-### BEGIN CONFORMATIONAL REFINEMENT LOOP HERE ###
-#N = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]
-N = [41]
-
-for i in N:
-    #netdir = wkdir+'ANI-1x-RXN-0000.00'+str(i).zfill(2)+'/'
-    netdir = wkdir + 'ANI-AL-n2o5-1b-' + str(i).zfill(2) + '/'
-    ldtdir = root_dir  # local data directories
-
-    if not os.path.exists(netdir):
-        os.mkdir(netdir)
-
-    nnfprefix   = netdir + 'train'
-
-    netdict = {'cnstfile' : cstfile,
-               'saefile': saefile,
-               'iptsize': 128,
-               'nnfprefix': netdir+'train',
-               'aevsize': aevsize,
-               'num_nets': Nnets,
-               'atomtyp' : ['N','O']
-               }
-
-    ## Train the ensemble ##
-    #aln = att.alaniensembletrainer(netdir, netdict, ipt, h5stor, Nnets)
-    #aln.build_strided_training_cache(Nblock,Nbvald,Nbtest,Ekey='energies',forces=False,dipole=False,rmhighe=True)
-    #aln.train_ensemble(GPU)
-
-    if not os.path.exists(root_dir + datdir + str(i+1).zfill(2)):
-        os.mkdir(root_dir + datdir + str(i+1).zfill(2))
-
-    ## Run active learning sampling ##
-    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict)
-    #acs.run_sampling_cluster(gcmddict, GPU)
-    #acs.run_sampling_dimer(dmrparams, GPU)
-    #acs.run_sampling_nms(nmsparams, GPU)
-    acs.run_sampling_strucs(strucsparams, GPU)
-    #acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU)
-    #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU)
-    #acs.run_sampling_TS(tsparams, gpus=GPU)
-
-    ## Submit jobs, return and pack data
-    ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime,max_jobs=50)

From 03e33f8116d89518c8a65a88c2b1d30270c9b7a0 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 19:57:07 -0700
Subject: [PATCH 06/10] Add structural sampling to al_conf_cycle.py and
 al_ions_cycle.py

---
 activelearning/al_conf_cycle.py | 17 +++++++++++------
 activelearning/al_ions_cycle.py |  9 ++++++++-
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/activelearning/al_conf_cycle.py b/activelearning/al_conf_cycle.py
index 4308dc5..a8bbaee 100644
--- a/activelearning/al_conf_cycle.py
+++ b/activelearning/al_conf_cycle.py
@@ -22,6 +22,8 @@
 
 h5stor = root_dir + 'h5files/'# h5store location
 
+strucsfolder = root_dir + 'strucs/'# strucs location
+
 optlfile = root_dir + 'optimized_input_files.dat'
 
 #Comet
@@ -73,6 +75,10 @@
              'sig' : M,
              }
 
+strucsparams = {'N': 40, # number of maximum structures to select before QBC from each XYZ file
+                'sig': M,
+               }
+
 mdsparams = {'N': 2, # trajectories to run
              'T1': 300,
              'T2': 1000,
@@ -91,7 +97,7 @@
              'tsfiles': ['/home/jsmith48/scratch/auto_rxn_al/rxns/'],
              'nmfile':None,                          #path to gaussian log file containing the data
              'nm':0,                                 #id of normal mode
-             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok. 
+             'perc':0,                               #Move the molecules initial coordiantes along the mode by this amount. Negative numbers are ok.
              }
 
 dhparams = { 'Nmol': 100,
@@ -106,9 +112,9 @@
 dmrparams = {#'mdselect' : [(400,0),(60,2),(40,3),(5,4)],
              'mdselect' : [(10,0), (1,11)],
              'N' : 20,
-             'T' : 400.0, # running temp 
+             'T' : 400.0, # running temp
              'L' : 25.0, # box length
-             'V' : 0.04, # Random init velocities 
+             'V' : 0.04, # Random init velocities
              'dt' : 0.5, # MD time step
              'Nm' : 140, # Molecules to embed
              #'Nm' : 160, # Molecules to embed
@@ -186,10 +192,11 @@
         os.mkdir(root_dir + datdir + str(i+1).zfill(2))
 
     ## Run active learning sampling ##
-    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, fpatoms, netdict)
+    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms, netdict)
     #acs.run_sampling_cluster(gcmddict, GPU)
     #acs.run_sampling_dimer(dmrparams, GPU)
     #acs.run_sampling_nms(nmsparams, GPU)
+    #acs.run_sampling_strucs(strucsparams, GPU)
     #acs.run_sampling_md(mdsparams, perc=0.5, gpus=GPU)
     #acs.run_sampling_TS(tsparams, gpus=[2])
     acs.run_sampling_dhl(dhparams, gpus=GPU+GPU)
@@ -199,5 +206,3 @@
 
     ## Submit jobs, return and pack data
     ast.generateQMdata(hostname, username, swkdir, ldtdir, datdir + str(i+1).zfill(2), h5stor, mae, jtime)
-
-
diff --git a/activelearning/al_ions_cycle.py b/activelearning/al_ions_cycle.py
index 1bb1be7..e85486e 100644
--- a/activelearning/al_ions_cycle.py
+++ b/activelearning/al_ions_cycle.py
@@ -20,6 +20,8 @@
 
 h5stor = root_dir + 'h5files/'# h5store location
 
+strucsfolder = root_dir + 'strucs/'# strucs locations
+
 optlfile = root_dir + 'optimized_input_files.dat'
 
 #Comet
@@ -123,6 +125,10 @@
              'sig' : M,
              }
 
+strucsparams = {'N': 40, # number of maximum structures to select before QBC from each XYZ file
+                'sig': M,
+               }
+
 mdsparams = {'N': 1, # trajectories to run
              'T1': 300,
              'T2': 1000,
@@ -223,10 +229,11 @@
         os.mkdir(root_dir + datdir + str(i+1).zfill(2))
 
     ## Run active learning sampling ##
-    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, fpatoms+['H'], netdict)
+    acs = alt.alconformationalsampler(ldtdir, datdir + str(i+1).zfill(2), optlfile, strucsfolder, fpatoms+['H'], netdict)
     #acs.run_sampling_cluster(gcmddict, GPU)
     #acs.run_sampling_dimer(dmrparams, GPU)
     acs.run_sampling_nms(nmsparams, GPU)
+    #acs.run_sampling_strucs(strucsparams, GPU)
     acs.run_sampling_md(mdsparams, perc=0.25, gpus=GPU+GPU+GPU)
     #acs.run_sampling_dhl(dhparams, gpus=GPU+GPU)
     #acs.run_sampling_TS(tsparams, gpus=GPU)

From e7e494addaba3f4db85aa161dafed71529b7456f Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 20:06:12 -0700
Subject: [PATCH 07/10] Fix a mistake in the expression for the distortions of
 each normal mode that was effectively making the temperature 3 times larger
 than the temperature inputed by the user.

---
 lib/nmstools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/nmstools.py b/lib/nmstools.py
index 634a1d9..4705bbb 100644
--- a/lib/nmstools.py
+++ b/lib/nmstools.py
@@ -74,7 +74,7 @@ def __genrandomstruct__(self):
             Ki = mDynetoMet * self.fcc[i]
             ci = rdt[i+1]-rdt[i]
             Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0
-            Ri = Sn * MtoA * np.sqrt((3.0 * ci * Kb * float(self.Nf) * self.T)/(Ki))
+            Ri = Sn * MtoA * np.sqrt((ci * Kb * float(self.Nf) * self.T)/(Ki))
             Ri = min([Ri,self.maxd])
             oxyz = oxyz + Ri * self.nmo[i]
         return oxyz
@@ -157,7 +157,7 @@ def __check_distance_from_eq__(self,rxyz):
     def __genrandomstruct__(self):
         ("generating randomstruct")
         oxyz = self.xyz.copy()
-        Ri = np.random.uniform(low=self.l_val, high=self.h_val, size=None)             
+        Ri = np.random.uniform(low=self.l_val, high=self.h_val, size=None)
         oxyz = oxyz + Ri * self.nmo
         return oxyz
 

From 7fa346d983ce36170555b7abf80763833a97dbf7 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 20:09:38 -0700
Subject: [PATCH 08/10] Fix __getCharge__ inside lib/nmstools.py

---
 lib/nmstools.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/lib/nmstools.py b/lib/nmstools.py
index 4705bbb..9b45283 100644
--- a/lib/nmstools.py
+++ b/lib/nmstools.py
@@ -27,15 +27,15 @@ def __getCharge__(self,type):
         elif type is 'C':
             return 6.0
         elif type is 'N':
-            return 6.0
+            return 7.0
         elif type is 'O':
-            return 6.0
+            return 8.0
         elif type is 'F':
-            return 6.0
+            return 9.0
         elif type is 'S':
-            return 6.0
+            return 16.0
         elif type == 'Cl':
-            return 6.0
+            return 17.0
         else:
             print('Unknown atom type! ',type)
             exit(1)
@@ -131,6 +131,12 @@ def __getCharge__(self,type):
             return 7.0
         elif type is 'O':
             return 8.0
+        elif type is 'F':
+            return 9.0
+        elif type is 'S':
+            return 16.0
+        elif type == 'Cl':
+            return 17.0
         else:
             print('Unknown atom type! ',type)
             exit(1)

From ac1a797f9250139a62de6d86ab9c4edd1d9317f7 Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 20:19:32 -0700
Subject: [PATCH 09/10] Modify the algorithm for the random number generator
 inside the NMS code at lib/nmstools.py

---
 lib/nmstools.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/nmstools.py b/lib/nmstools.py
index 9b45283..e861aa1 100644
--- a/lib/nmstools.py
+++ b/lib/nmstools.py
@@ -62,17 +62,14 @@ def __check_distance_from_eq__(self,rxyz):
 
     # Generate a structure
     def __genrandomstruct__(self):
-        rdt = np.random.random(self.Nf+1)
-        rdt[0] = 0.0
+        rdt = np.random.random(self.Nf)
         norm = np.random.random(1)[0]
-        rdt = norm*np.sort(rdt)
-        rdt[self.Nf] = norm
 
         oxyz = self.xyz.copy()
 
         for i in range(self.Nf):
             Ki = mDynetoMet * self.fcc[i]
-            ci = rdt[i+1]-rdt[i]
+            ci = norm*rdt[i]/np.sum(rdt)
             Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0
             Ri = Sn * MtoA * np.sqrt((ci * Kb * float(self.Nf) * self.T)/(Ki))
             Ri = min([Ri,self.maxd])

From bc44c65589148b6e6dd8065d63e73ead00d8ac9d Mon Sep 17 00:00:00 2001
From: "Vinicius Wilian D. Cruzeiro" <vwcruzeiro@ufl.edu>
Date: Sat, 19 Oct 2019 20:23:50 -0700
Subject: [PATCH 10/10] Remove notebooks/GDB_charged_molecules.ipynb

---
 notebooks/GDB_charged_molecules.ipynb | 146 --------------------------
 1 file changed, 146 deletions(-)
 delete mode 100644 notebooks/GDB_charged_molecules.ipynb

diff --git a/notebooks/GDB_charged_molecules.ipynb b/notebooks/GDB_charged_molecules.ipynb
deleted file mode 100644
index 602b56a..0000000
--- a/notebooks/GDB_charged_molecules.ipynb
+++ /dev/null
@@ -1,146 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from rdkit import Chem\n",
-    "\n",
-    "def IonizedMols(smi):\n",
-    "    '''From a given SMILES, return all possible protonated/deprotonated molecules, considering N, O and S atoms'''\n",
-    "    # Setting molecule\n",
-    "    mol = Chem.MolFromSmiles(smi)\n",
-    "    # Building lists of all N, O and S atoms\n",
-    "    atoms_idx = [] # Atoms indexes\n",
-    "    nfc = [] # New formal charge\n",
-    "    nehs = [] # New number of explicit hydrogens\n",
-    "    for atom in mol.GetAtoms():\n",
-    "        atn = atom.GetAtomicNum()\n",
-    "        if (atn == 7 or atn == 8 or atn == 16): # If N, O or S\n",
-    "            skip = False\n",
-    "            nhs = atom.GetTotalNumHs() # Number of hydrogen bonded to the atom\n",
-    "            # If N with no protons, if N with two protons,\n",
-    "            # if O with no protons, or if S with no protons: protonate\n",
-    "            if ((atn == 7 and nhs == 0) or (atn == 7 and nhs == 2) or (atn == 8 and nhs == 0) or (atn == 16 and nhs == 0)):\n",
-    "                nfc.append(atom.GetFormalCharge()+1)\n",
-    "                nehs.append(atom.GetTotalNumHs()+1)\n",
-    "            # If N with one proton, if N with three protons,\n",
-    "            # if O with one proton, if S with one proton: deprotonate\n",
-    "            elif ((atn == 7 and nhs == 1) or (atn == 7 and nhs == 3) or (atn == 8 and nhs == 1) or (atn == 16 and nhs == 1)):\n",
-    "                nfc.append(atom.GetFormalCharge()-1)\n",
-    "                nehs.append(max(0,atom.GetNumExplicitHs()-1))\n",
-    "            # If none of the above conditions are true, skip this atom\n",
-    "            else:\n",
-    "                skip = True\n",
-    "            if (not skip): # If not skipping\n",
-    "                atoms_idx.append(atom.GetIdx())\n",
-    "    # Combining all possible protonations/deprotonations\n",
-    "    ntats=len(atoms_idx) # Number of titratable atoms\n",
-    "    vec = [] # This is a reference array that will aid in preparing the new SMILES string\n",
-    "    [vec.append(0) for cnt in range(ntats)] # Initializing the array\n",
-    "    nmols = [] # New molecules\n",
-    "    while (sum(vec)<ntats): # With this loop we generate all possible protonations/deprotonations\n",
-    "        pos = 0\n",
-    "        if (vec[pos]==0):\n",
-    "            vec[pos]=1\n",
-    "        else:\n",
-    "            vec[pos]=0\n",
-    "            while (vec[pos+1]==1):\n",
-    "                vec[pos+1]=0\n",
-    "                pos+=1\n",
-    "            vec[pos+1]=1\n",
-    "        # Generating the new SMILES string\n",
-    "        nmol = Chem.MolFromSmiles(smi)\n",
-    "        for i in range(ntats):\n",
-    "            if (vec[i]==1):\n",
-    "                nmol.GetAtomWithIdx(atoms_idx[i]).SetFormalCharge(nfc[i])\n",
-    "                nmol.GetAtomWithIdx(atoms_idx[i]).SetNumExplicitHs(nehs[i])\n",
-    "        Chem.SanitizeMol(nmol)\n",
-    "        if (Chem.MolToSmiles(mol) != Chem.MolToSmiles(nmol)):\n",
-    "            # Printing new SMILES string\n",
-    "            nmols.append((Chem.MolToSmiles(nmol)))\n",
-    "    return nmols"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Working on: gdb11_size01.smi and gdb11SFCl_size01.smi\n",
-      "Working on: gdb11_size02.smi and gdb11SFCl_size02.smi\n",
-      "Working on: gdb11_size03.smi and gdb11SFCl_size03.smi\n",
-      "Working on: gdb11_size04.smi and gdb11SFCl_size04.smi\n",
-      "Working on: gdb11_size05.smi and gdb11SFCl_size05.smi\n",
-      "Working on: gdb11_size06.smi and gdb11SFCl_size06.smi\n",
-      "Working on: gdb11_size07.smi and gdb11SFCl_size07.smi\n",
-      "Working on: gdb11_size08.smi and gdb11SFCl_size08.smi\n",
-      "Working on: gdb11_size09.smi and gdb11SFCl_size09.smi\n",
-      "Working on: gdb11_size10.smi and gdb11SFCl_size10.smi\n",
-      "Working on: gdb11_size11.smi and gdb11SFCl_size11.smi\n"
-     ]
-    }
-   ],
-   "source": [
-    "files1 = [\"gdb11_size%02d.smi\"%(i) for i in range(1,12)]\n",
-    "files2 = [\"gdb11SFCl_size%02d.smi\"%(i) for i in range(1,12)]\n",
-    "for i in range(len(files1)):\n",
-    "    print('Working on: '+files1[i]+' and '+files2[i])\n",
-    "    nf=open(\"gdb11charged_size%02d.smi\"%(i+1), 'w')\n",
-    "    cnt=1\n",
-    "    files=[files1[i],files2[i]]\n",
-    "    for fl in files:\n",
-    "        f=open(fl, \"r\")\n",
-    "        for line in f:\n",
-    "            if not line.strip():\n",
-    "                break\n",
-    "            smi = line.split()[0]\n",
-    "            data = IonizedMols(smi)\n",
-    "            if (data):\n",
-    "                for d in data:\n",
-    "                    nf.write(d+'\\t'+str(cnt)+'\\t1\\n')\n",
-    "                    cnt+=1\n",
-    "        f.close()\n",
-    "    nf.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}