From dece99d7a06daa561b0ec67a68745e0e92bbde84 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Wed, 10 Jul 2019 11:54:20 -0400
Subject: [PATCH 01/25]  adding a gitignore to ignore binary stuff

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0fe2c40
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+build/

From 70c277801a5d65716cd678208c3e599eeabdee4e Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Wed, 10 Jul 2019 12:05:00 -0400
Subject: [PATCH 02/25] fixed paths in runAnalysis for my mac

---
 prediction/runAnalysis.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 0ae88b6..cfbd95c 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -17,16 +17,16 @@
 condition = 'moving' # Moving, immobilized, chip
 first = True # if 0true, create new HDF5 file
 transient = 0
-save = False
+save = True
 ###############################################    
 # 
 #    load data into dictionary
 #
 ##############################################
-folder = '{}_{}/'.format(typ, condition)
-dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition)
-outLocData = "Analysis/{}_{}.hdf5".format(typ, condition)
+folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
 
 # data parameters
 dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd

From ea6fc5c7a007ed18f0dc43de7d3daf5157f3a02b Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Wed, 10 Jul 2019 13:55:56 -0400
Subject: [PATCH 03/25] updated paths for S4.py

---
 figures/supp/S4.py          |  10 +++++-----
 prediction/dataHandler.pyc  | Bin 19334 -> 19334 bytes
 prediction/dimReduction.py  |   2 +-
 prediction/dimReduction.pyc | Bin 34403 -> 34406 bytes
 prediction/makePlots.pyc    | Bin 49872 -> 49872 bytes
 prediction/plotAnalysis.py  |   9 +++++----
 prediction/runAnalysis.py   |   2 +-
 prediction/stylesheet.pyc   | Bin 11948 -> 11948 bytes
 8 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/figures/supp/S4.py b/figures/supp/S4.py
index 2ade581..abfc9b2 100755
--- a/figures/supp/S4.py
+++ b/figures/supp/S4.py
@@ -64,11 +64,11 @@
 
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175']:
-    for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+    for condition in [ 'chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']:
+        folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+        dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+        outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+        outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
         
         try:
             # load multiple datasets
diff --git a/prediction/dataHandler.pyc b/prediction/dataHandler.pyc
index 79a907304cd584301d9e2e4b88e3506e16107d05..619e87f96a1c199b42f07d4075a8efb515c3f784 100644
GIT binary patch
delta 19
acmZph&e%4ck%Rd&FP99XTI@!SI&T0ulLd4D

delta 19
acmZph&e%4ck%Rd&FV{`OJ24wM>bwC(3I`Sd

diff --git a/prediction/dimReduction.py b/prediction/dimReduction.py
index b1b5a1c..f5da67f 100755
--- a/prediction/dimReduction.py
+++ b/prediction/dimReduction.py
@@ -1268,7 +1268,7 @@ def predictNeuralDynamicsfromBehavior(data,  splits, pars):
     train, test = splits[label]['Train'], splits[label]['Test']
     # create dimensionality-reduced behavior - pca with 10 eigenworms
     cl = data['CL']
-    eigenworms = dh.loadEigenBasis(filename = 'utility/Eigenworms.dat', nComp=4, new=True)
+    eigenworms = dh.loadEigenBasis(filename = '../utility/Eigenworms.dat', nComp=4, new=True)
     pcsNew, meanAngle, lengths, refPoint = dh.calculateEigenwormsFromCL(cl, eigenworms)
     behavior = pcsNew.T
     # nevermind, use the behaviors we use for lasso instead
diff --git a/prediction/dimReduction.pyc b/prediction/dimReduction.pyc
index bd9ea36f1f30cd63c5e9c7ef1704baa30f0d16bb..740d5a54af933412795271bbaf6c87ac0d9023b9 100644
GIT binary patch
delta 333
zcmaFd!}P3&iG%qwFV`(*wb+duM|~JeHlOq16J|`?d^f(8kuhU(a^gIJ0!9Xgpb!v|
z1tQWm3nqoLaA$+qoeV&td~$JG9%IGiM`>z6{haBmSr{uPZ_Tz504s?GDT#onW6Tkj
zssib&1`%u^q6S1L1En>DCtnPcWxEVgSvxr}Cy1vGBvk?;>KP|bP*>c1A*Y>H1ninL
zplJHXjK>{Ye>|}Z%AcHFRL|J8`E}6^4t7bP0eX7+n~zs3FfmTw{G#?fGvmC;HyTf}
ztpypiZu0UbS;p;~k2alQWL!0QQp;c4WRSQchyXjd9K_NFIimr@Y6B4+AYvMbm<1yC
TgNOq_LW5y)zq-=ocdexWpdw#Z

delta 330
zcmaFX!}Pd^iG%qwFW1rE4lx@!j`}bbZ$9V4C(M|-`EGnGBV+pH<ivRb`HTz<K_MU_
z6GWtL7EB6f;m!iFI~jmP+2rE1JjU|LkJ8kD`Z?2AvoKan-kNPA09FzWQW61C$Cx85
zRSD8p1tQo$L^X&|21;uPPreu?%XS&0vSxB%P7qHmNU9h_)G<z;psu+2LQXrY2-r2L
zK+*J%8IL=*{&->+ls7rMsGhNN^XsA;94ul$12&(kR$yYBw)u7KduGPDlW#YkWLpC=
zXYJ%wO|p#JHlJuZ!^pUD^3;~Uwn-pyM-TyaZ5fEA4RS+0h}8-r+Cjur5HS-(>;n<|
QfrJLb<i>iX$sb!w0ndA1_y7O^

diff --git a/prediction/makePlots.pyc b/prediction/makePlots.pyc
index feed789a0bd6871aaba9ff5a956b35ffdf378893..448d7a74e02441c7f88ff7a13252dadba5386d32 100644
GIT binary patch
delta 19
bcmcc6%6y@fnS=Q=FW2`K%CQ?cP8<RNP5%fe

delta 19
bcmcc6%6y@fnS=Q=FV~8P9x)p^P8<RNP45Uc

diff --git a/prediction/plotAnalysis.py b/prediction/plotAnalysis.py
index 2067d72..721afed 100755
--- a/prediction/plotAnalysis.py
+++ b/prediction/plotAnalysis.py
@@ -17,6 +17,7 @@
 ###############################################
 typ = 'AML70' # possible values AML32, AML18, AML70
 condition = 'moving' # Moving, immobilized, chip
+overview = True
 ###############################################    
 # 
 #    load data into dictionary
@@ -25,10 +26,10 @@
 data = {}
 for typ in ['AML32', 'AML70']:
     for condition in ['moving', 'immobilized']:
-        folder = '{}_{}/'.format(typ, condition)
-        dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "Analysis/{}_{}.hdf5".format(typ, condition)
+        folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+        dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+        outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+        outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
         
         
         # load multiple datasets
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index cfbd95c..c47b4b8 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -13,7 +13,7 @@
 #    run parameters
 #
 ###############################################
-typ = 'AML32' # possible values AML32, AML18, AML70
+typ = 'AML175' # possible values AML32, AML18, AML70, AML175
 condition = 'moving' # Moving, immobilized, chip
 first = True # if 0true, create new HDF5 file
 transient = 0
diff --git a/prediction/stylesheet.pyc b/prediction/stylesheet.pyc
index 8ff3935adce4dc8beef038de3f7ccf8cbc34d4b5..08589e9da249b040061b7457bf9cd993ee07a384 100644
GIT binary patch
delta 17
ZcmZ1zyC#-{`7<xq_Z7;q8#$Kf0RTUh2GRfk

delta 17
ZcmZ1zyC#-{`7<xq>W(up8#$Kf0RTWo2Mz!L


From 48b39c5c93eba1d352cf38c2714242301229ca15 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 31 Oct 2019 14:36:22 -0400
Subject: [PATCH 04/25] Miminum set of changes that allow me to regenerate
 figures

I'm a little dissapointed to see that in the newly regenerated Fig 2 I
get an R^2 of .44 instead of the expected .49. Not sure whats going on
here because as far as I can tell this should be the code exaclty as
Monika had left it, with only superficial changes such as
wrapping things in a function and pointing to the correct filenams.

But its pretty close.
I plan to use this as a jumping off point to reimpement the minumum
changes I want to change how e do normalizaiton, interpolation and smoothing.
---
 prediction/metaRunAnalysis.py |  33 ++
 prediction/noiseModel.py      | 217 ++++++++++++
 prediction/provenance.py      |  43 +++
 prediction/runAnalysis.py     | 623 +++++++++++++++++-----------------
 4 files changed, 608 insertions(+), 308 deletions(-)
 create mode 100644 prediction/metaRunAnalysis.py
 create mode 100644 prediction/noiseModel.py
 create mode 100644 prediction/provenance.py

diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
new file mode 100644
index 0000000..0882f6d
--- /dev/null
+++ b/prediction/metaRunAnalysis.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jul 10 14:14:34 2019
+This is a meta script to run runAnalysis over and over again.
+@author: leifer
+"""
+
+####
+
+import runAnalysis as analysis
+
+analysis.actuallyRun('AML32','moving')
+analysis.actuallyRun('AML18','moving')
+analysis.actuallyRun('AML70','moving')
+analysis.actuallyRun('AML175','moving')
+
+
+analysis.actuallyRun('AML32','immobilized')
+analysis.actuallyRun('AML18','immobilized')
+analysis.actuallyRun('AML70','immobilized')
+
+analysis.actuallyRun('AML32','chip')
+analysis.actuallyRun('Special','transition') #Note these are identicail..
+                                            # I copied and pasted the input files..
+
+analysis.actuallyRun('AML70','chip')
+
+execfile('../figures/main/fig2v3.py')
+execfile('../figures/supp/S4.py')
+execfile('../figures/supp/S2.py')
+execfile('../figures/main/fig1v3.py')
+execfile('../figures/main/fig2_expVarslm.py')
diff --git a/prediction/noiseModel.py b/prediction/noiseModel.py
new file mode 100644
index 0000000..0088a71
--- /dev/null
+++ b/prediction/noiseModel.py
@@ -0,0 +1,217 @@
+### noiseModel.py
+# We suspect that the neural dynamics we observe in freely moving GCaMP worms have three components:
+# 1) Neural Signals representing locomotion
+# 2) Neural signals representing other biological processes (sensory processing, working memory, etc)
+# 3) Noise, including from motion artifact
+#
+# We know for sure that (1) & (3) exist. We can decode locomotion so  we know that (1) must be persent.
+# And we know that noise  (3) is present  because we see it in the GFP moving animals.
+#
+# To bolster our argument that neural signals representing other biological processes are present in our recordings,
+# we will attempt to reject the following null hypothesis:
+#
+# Null hypothesis is that ONLY neural signals representing locomotion AND noise are present. E.g. only (1) and (3) and
+# not (2).
+#
+#
+# We will build uop our null model by taking a real freely moving GFP worm that therefore has only noise (3) and we
+# will synthetically generate pure locmootory signals (1) and adjust the relative proportion of noise to
+# locomotory signals.
+#
+# We  will study the predicitive performance of our decoder on this model and also the percent variance explaiend by the
+# locomotory siganl to try to assess whether the null model fits our experimental observations.
+
+
+## Find a good GFP recording to use.
+
+dataset = '/Users/leifer/workspace/PredictionCode/AML18_moving/BrainScanner20160506_160928_MS/heatData.mat'
+
+# Import the recording based on the output of Jeff's 3dBrain matlab analysis pipeline
+import scipy.io as sio
+mat_contents = sio.loadmat(dataset)
+rPhotoCorr = mat_contents['rPhotoCorr']
+rRaw = mat_contents['rRaw']
+gPhotoCorr = mat_contents['gPhotoCorr']
+gRaw = mat_contents['gRaw']
+Ratio2 = mat_contents['Ratio2']
+
+# Import Monika's ICA'd version (with my modification s.t. there is no z-scoring)
+# (Note this section is copy pasted from S2.py)
+import prediction.dataHandler as dh
+import numpy as np
+data = {}
+for typ in ['AML18','AML32']:
+    for condition in ['moving']:
+        folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+        dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+        outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+        outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+
+        try:
+            # load multiple datasets
+            dataSets = dh.loadDictFromHDF(outLocData)
+            keyList = np.sort(dataSets.keys())
+            results = dh.loadDictFromHDF(outLoc)
+            # store in dictionary by typ and condition
+            key = '{}_{}'.format(typ, condition)
+            data[key] = {}
+            data[key]['dsets'] = keyList
+            data[key]['input'] = dataSets
+            data[key]['analysis'] = results
+        except IOError:
+            print typ, condition, 'not found.'
+            pass
+print 'Done reading data.'
+
+
+
+
+import matplotlib.pyplot as plt
+from prediction import provenance as prov
+
+
+ordIndx=data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ordering']
+
+fig1=plt.figure()
+plt.subplot(4, 1, 1)
+plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto')
+plt.colorbar()
+plt.title('rPhotoCorr')
+
+plt.subplot(4,1,2)
+plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto')
+plt.colorbar()
+plt.title('gPhotoCorr')
+
+plt.subplot(4,1,3)
+plt.imshow(np.divide(gPhotoCorr,rPhotoCorr)[ordIndx,:],aspect='auto')
+plt.colorbar()
+plt.title('gPhotoCorr/rPhotoCorr')
+
+
+ax=plt.subplot(4, 1, 4)
+plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto')
+plt.colorbar()
+prov.stamp(ax,0,-.3)
+plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset)
+plt.show()
+
+
+
+fig2=plt.figure()
+ax21=plt.subplot(5, 1, 1)
+plt.imshow(rRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(rRaw)-2*np.nanstd(rRaw), vmax=np.nanmean(rRaw)+2*np.nanstd(rRaw))
+plt.colorbar()
+plt.title('rRaw')
+
+
+ax22=plt.subplot(5,1,2)
+plt.plot(10*np.sum(np.isnan(rRaw) ,axis=0)) #plot how many nans at each time point (x10 fo visibility)
+plt.plot(np.nanmean(rRaw, axis=0)) # plot the average rRaw intesnity
+plt.title('rRaw Average Intensity, and number of neurons with NaN')
+ax22.set_ylim( np.nanmean(rRaw)-1*np.nanstd(rRaw), np.nanmean(rRaw)+2*np.nanstd(rRaw) )
+ax22.set_xlim(0,rRaw.shape[1])
+pos21 = ax21.get_position()
+pos22 = ax22.get_position()
+ax22.set_position([pos21.x0,pos22.y0,pos21.width,pos22.height])
+
+
+
+plt.subplot(5,1,3)
+plt.imshow(gRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(gRaw)-2*np.nanstd(gRaw), vmax=np.nanmean(gRaw)+2*np.nanstd(gRaw))
+plt.colorbar()
+plt.title('gRaw')
+
+plt.subplot(5,1,4)
+plt.imshow(np.divide(gRaw, rRaw)[ordIndx, :], aspect='auto')
+plt.colorbar()
+plt.title('gRaw/rRaw')
+
+
+ax=plt.subplot(5, 1, 5)
+plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto')
+plt.colorbar()
+prov.stamp(ax,0,-.3)
+plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset)
+plt.xlabel('Volumes (6 vol /s )')
+plt.show()
+
+
+
+
+
+
+## Plot velocity and body curvature
+vel=data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['AngleVelocity']
+bodycurv= data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['Eigenworm3']
+
+if True:
+    plt.subplots(4,1,sharex=True)
+    ax1=plt.subplot(4, 1, 1)
+    plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto')
+    plt.colorbar()
+    plt.title('rPhotoCorr')
+
+    ax2=plt.subplot(4,1,2)
+    plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto')
+    plt.colorbar()
+    plt.title('gPhotoCorr')
+
+
+
+    ax3=plt.subplot(4,1,3)
+    plt.plot(vel)
+    plt.title('Velocity')
+    ax3.set_xlim(ax1.get_xlim())
+
+    ax4=plt.subplot(4, 1, 4)
+    plt.plot(bodycurv)
+    prov.stamp(ax4,0,-.3)
+    plt.title('Body Curvature')
+    ax4.set_xlim(ax1.get_xlim())
+
+    # align the axes
+    pos1 = ax1.get_position()
+    pos2 = ax2.get_position()
+    pos3 = ax3.get_position()
+    pos4 = ax4.get_position()
+    ax3.set_position([pos1.x0,pos3.y0,pos1.width,pos3.height])
+    ax4.set_position([pos1.x0,pos4.y0,pos1.width,pos4.height])
+
+    plt.show()
+
+
+
+if False: #Just checking the difference between TimeFull and Time
+    plt.figure()
+    plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull'], label='TimeFull')
+    plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time'],label='Time')
+    plt.legend()
+    plt.show()
+
+    plt.figure()
+    plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull']),
+             label='TimeFull')
+    plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time']),
+             label='Time')
+    plt.legend()
+    plt.show()
+
+## Extract neural weights learned by the SLM from a GCaMP Recording
+
+
+
+## Shuffle the neural Weights
+
+## Generate synthetic pure locomotory signals based on the GFP recording
+
+## Combine the GFP and synthetic locomotory signals for a given relative strength (lambda)
+
+## Figure out how normalization should occur.  [Has somethign to do with lambda.]
+
+## Measure Decoder Performance (R^2)
+
+## Measure variance explained
+
+#Get the heatmap values
+
diff --git a/prediction/provenance.py b/prediction/provenance.py
new file mode 100644
index 0000000..769b8d7
--- /dev/null
+++ b/prediction/provenance.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+"""
+Module to stamp plots with current git hash and creation date.
+This helps keep track of what the state of the code was in when it produced a plot.
+
+19 July 2019
+Andrew Leifer
+leifer@princeton.edu
+"""
+
+def getStampString():
+    """
+
+    :return: string with date, time, hash, url and path of current code
+    """
+    ## Stamp with code version and date info
+    import git
+    repo = git.Repo(search_parent_directories=True)
+    hash = repo.head.object.hexsha
+    gitpath = repoPath = repo._working_tree_dir
+    giturl = repo.remotes.origin.url
+
+    from datetime import datetime
+    timestamp = datetime.today().strftime('%Y-%m-%d %H:%M')
+
+    return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath)
+
+def stamp(ax, x,y,notes):
+    """
+    :param ax: matplotlib axes h
+    :param x: fractional location in the plot
+    :param y: fractional location in the plot
+    :return:
+    """
+
+    # these are matplotlib.patch.Patch properties
+    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+
+    # place a text box in upper left in axes coords
+    ax.text(x, y, getStampString()+'\n'+notes, transform=ax.transAxes, fontsize=8,
+            verticalalignment='top', bbox=props)
+
+
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index c47b4b8..8058402 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -13,320 +13,327 @@
 #    run parameters
 #
 ###############################################
-typ = 'AML175' # possible values AML32, AML18, AML70, AML175
-condition = 'moving' # Moving, immobilized, chip
-first = True # if 0true, create new HDF5 file
-transient = 0
-save = True
-###############################################    
-# 
-#    load data into dictionary
-#
-##############################################
-folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
-dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
-outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
-
-# data parameters
-dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
-            'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd
-            'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
-            'windowGCamp': 6,  # gauss window for red and green channel
-            'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data
-            }
-
-dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)
-keyList = np.sort(dataSets.keys())
-if save:
-    dh.saveDictToHDF(outLocData, dataSets)
-
-## results dictionary 
-resultDict = {}
-for kindex, key in enumerate(keyList):
-    resultDict[key] = {}
-    resultDict[key]['pars'] = dataPars
-# analysis parameters
-
-pars ={'nCompPCA':20, # no of PCA components
-        'PCAtimewarp':False, #timewarp so behaviors are equally represented
-        'trainingCut': 0.6, # what fraction of data to use for training 
-        'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle
-        'linReg': 'simple', # ordinary or ransac least squares
-        'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart
-        'useRank': 0, # use the rank transformed version of neural data for all analyses
-        'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses
-        'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses
-        'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses
-        'useClust':False,# use clusters in the fitting procedure.
-        'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate
-         }
-
-behaviors = ['AngleVelocity', 'Eigenworm3']
 
-###############################################    
-# 
-# check which calculations to perform
-#
-##############################################
-createIndicesTest = 1#True 
-periodogram = 1
-half_period = 1
-hierclust = 0
-bta = 0
-pca = 1#False
-kato_pca = 1#False
-half_pca = 1
-corr = 1
-predNeur = 0
-predPCA = 0
-svm = 0
-lasso = 0
-elasticnet = 0
-lagregression = 0
-# this requires moving animals
-if condition != 'immobilized':
-    predNeur = 1
+def actuallyRun(typ='AML32', condition = 'moving'):
+#    typ  possible values AML32, AML18, AML70, AML175
+#    condition possible values moving, immobilized, chip
+
+
+    #typ = 'AML175' # possible values AML32, AML18, AML70, AML175
+    #condition = 'moving' # Moving, immobilized, chip
+
+    first = True # if 0true, create new HDF5 file
+    transient = 0
+    save = True
+    ###############################################
+    #
+    #    load data into dictionary
+    #
+    ##############################################
+    folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+    dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+    outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+    outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+
+    # data parameters
+    dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
+                'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd
+                'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
+                'windowGCamp': 6,  # gauss window for red and green channel
+                'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data
+                }
+
+    dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)
+    keyList = np.sort(dataSets.keys())
+    if save:
+        dh.saveDictToHDF(outLocData, dataSets)
+
+    ## results dictionary
+    resultDict = {}
+    for kindex, key in enumerate(keyList):
+        resultDict[key] = {}
+        resultDict[key]['pars'] = dataPars
+    # analysis parameters
+
+    pars ={'nCompPCA':20, # no of PCA components
+            'PCAtimewarp':False, #timewarp so behaviors are equally represented
+            'trainingCut': 0.6, # what fraction of data to use for training
+            'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle
+            'linReg': 'simple', # ordinary or ransac least squares
+            'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart
+            'useRank': 0, # use the rank transformed version of neural data for all analyses
+            'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses
+            'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses
+            'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses
+            'useClust':False,# use clusters in the fitting procedure.
+            'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate
+             }
+
+    behaviors = ['AngleVelocity', 'Eigenworm3']
+
+    ###############################################
+    #
+    # check which calculations to perform
+    #
+    ##############################################
+    createIndicesTest = 1#True
+    periodogram = 1
+    half_period = 1
+    hierclust = 0
+    bta = 0
+    pca = 1#False
+    kato_pca = 1#False
+    half_pca = 1
+    corr = 1
+    predNeur = 0
+    predPCA = 0
     svm = 0
-    lasso = 1
-    elasticnet = 1#True
-    predPCA = 1
+    lasso = 0
+    elasticnet = 0
     lagregression = 0
+    # this requires moving animals
+    if condition != 'immobilized':
+        predNeur = 1
+        svm = 0
+        lasso = 1
+        elasticnet = 1#True
+        predPCA = 1
+        lagregression = 0
 
 
-###############################################    
-# 
-# create training and test set indices
-# 
-##############################################
-if createIndicesTest:
-    for kindex, key in enumerate(keyList):
-        resultDict[key] = {'Training':{}}
-        for label in behaviors:
-            train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label)
-            if transient:
-               train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0]
-                # after 4:30 min
-               test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0]
-               resultDict[key]['Training']['Half'] ={'Train':train}
-               resultDict[key]['Training']['Half']['Test'] = test
-            else:
-                 # add half split
-                midpoint = np.mean(dataSets[key]['Neurons']['Time'])
-                trainhalf = np.where(dataSets[key]['Neurons']['Time']<midpoint)[0]
-                testhalf = np.where(dataSets[key]['Neurons']['Time']>midpoint)[0]
-                resultDict[key]['Training']['Half'] ={'Train':trainhalf}
-                resultDict[key]['Training']['Half']['Test'] = testhalf
-            resultDict[key]['Training'][label] = {'Train':train  }
-            resultDict[key]['Training'][label]['Test']=test
-           
-
-    print "Done generating trainingsets"
+    ###############################################
+    #
+    # create training and test set indices
+    #
+    ##############################################
+    if createIndicesTest:
+        for kindex, key in enumerate(keyList):
+            resultDict[key] = {'Training':{}}
+            for label in behaviors:
+                train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label)
+                if transient:
+                   train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0]
+                    # after 4:30 min
+                   test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0]
+                   resultDict[key]['Training']['Half'] ={'Train':train}
+                   resultDict[key]['Training']['Half']['Test'] = test
+                else:
+                     # add half split
+                    midpoint = np.mean(dataSets[key]['Neurons']['Time'])
+                    trainhalf = np.where(dataSets[key]['Neurons']['Time']<midpoint)[0]
+                    testhalf = np.where(dataSets[key]['Neurons']['Time']>midpoint)[0]
+                    resultDict[key]['Training']['Half'] ={'Train':trainhalf}
+                    resultDict[key]['Training']['Half']['Test'] = testhalf
+                resultDict[key]['Training'][label] = {'Train':train  }
+                resultDict[key]['Training'][label]['Test']=test
 
-###############################################    
-# 
-# calculate the periodogram of the neural signals
-#
-##############################################
-if periodogram:
-    print 'running periodogram(s)'
-    for kindex, key in enumerate(keyList):
-        resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None)
-# for half the sample each
-if half_period:
-    print 'running half periodogram(s)'
-    for kindex, key in enumerate(keyList):
-        splits = resultDict[key]['Training']
-        resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train'])
-        resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test'])
 
-###############################################    
-# 
-# correlation neurons and behavior
-#
-##############################################
-if corr:
-    print 'running Correlation.'
-    for kindex, key in enumerate(keyList):
-        resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors)
-        #half1 =  resultDict[key]['Training'][behaviors[0]]['Train']
-        #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1)
-        
-###############################################    
-# 
-# run svm to predict discrete behaviors
-#
-##############################################
-if svm:
-    for kindex, key in enumerate(keyList):
-        print 'running SVM.'
-        splits = resultDict[key]['Training']
-        resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits )
+        print "Done generating trainingsets"
 
-###############################################    
-# 
-# run PCA and store results
-#
-##############################################
-#%%
-if pca:
-    print 'running PCA'
-    for kindex, key in enumerate(keyList):
-        resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars)
-        resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True)
-        
-        
-        #correlate behavior and PCA
-        #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None)
-###############################################    
-# 
-# run Kato PCA
-#
-##############################################
-#%%
-if kato_pca:
-    print 'running Kato et. al PCA'
-    for kindex, key in enumerate(keyList):
-        resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True)
-        splits = resultDict[key]['Training']
-        resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True)
-  
-        resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True)
-  
-###############################################    
-# 
-# run split first-second half PCA
-#
-##############################################
-#%%
-if half_pca:
-    print 'half-split PCA'
-    for kindex, key in enumerate(keyList):
-        # run PCA on each half
-        splits = resultDict[key]['Training']
-        resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'])
-        resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test'])
-        resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key])
-#%%
-###############################################    
-# 
-# predict neural dynamics from behavior
-#
-##############################################
-if predPCA:
-    for kindex, key in enumerate(keyList):
-        print 'predicting behavior PCA'
-        splits = resultDict[key]['Training']
-        resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \
-                    splits, pars, behaviors)
-#%%
-###############################################    
-# 
-# predict neural dynamics from behavior
-#
-##############################################
-if predNeur:
-    for kindex, key in enumerate(keyList):
-        print 'predicting neural dynamics from behavior'
-        splits = resultDict[key]['Training']
-        resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars)
-    plt.show()
-#%%
-###############################################    
-# 
-# use agglomerative clustering to connect similar neurons
-#
-##############################################
-if hierclust:
-    for kindex, key in enumerate(keyList):
-        print 'running clustering'
-        resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars)
-#%%
-###############################################    
-# 
-# use behavior triggered averaging to create non-othogonal axes
-#
-##############################################
-if bta:
-    for kindex, key in enumerate(keyList):
-        print 'running BTA'
-        resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars)
-#%%
-###############################################    
-# 
-# linear regression using LASSO
-#
-##############################################
-if lasso:
-    print "Performing LASSO.",
-    for kindex, key in enumerate(keyList):
-        
-        splits = resultDict[key]['Training']
-        resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors)
-        # calculate how much more neurons contribute
-        tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors)
-        for tmpKey in tmpDict.keys():
-            resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey])
-        
-        tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors)
-        for tmpKey in tmpDict.keys():
-            resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey]
-            
-        # do converse calculation -- give it only the neurons non-zero in previous case
-        subset = {}
-        subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0]
-        subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0]
-        resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset)
-        
-    
-#%%
-###############################################    
-# 
-# linear regression using elastic Net
-#
-##############################################
-if elasticnet:
-    for kindex, key in enumerate(keyList):
-        print 'Running Elastic Net',  key
-        splits = resultDict[key]['Training']
-        resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors)
-        # calculate how much more neurons contribute
-        tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, )
-        for tmpKey in tmpDict.keys():
-            resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey])
-        
-        tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors)
-        for tmpKey in tmpDict.keys():
-            resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey]
-        # do converse calculation -- give it only the neurons non-zero in previous case
-        subset = {}
-        subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0]
-        subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0]
-        resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset)
-        
-        # run scrambled control
-        #print 'Running Elastic Net scrambled'
-        #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True)
-
-
-#%%
-###############################################    
-# 
-# lag-time fits of neural activity
-#
-##############################################
-if lagregression:
-    for kindex, key in enumerate(keyList):
-        print 'Running lag calculation',  key
-        splits = resultDict[key]['Training']
-        #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3))
-        resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet')
+    ###############################################
+    #
+    # calculate the periodogram of the neural signals
+    #
+    ##############################################
+    if periodogram:
+        print 'running periodogram(s)'
+        for kindex, key in enumerate(keyList):
+            resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None)
+    # for half the sample each
+    if half_period:
+        print 'running half periodogram(s)'
+        for kindex, key in enumerate(keyList):
+            splits = resultDict[key]['Training']
+            resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train'])
+            resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test'])
 
-#%%
-###############################################    
-# 
-# save data as HDF5 file
-#
-##############################################
-if save:
-    dh.saveDictToHDF(outLoc, resultDict)
+    ###############################################
+    #
+    # correlation neurons and behavior
+    #
+    ##############################################
+    if corr:
+        print 'running Correlation.'
+        for kindex, key in enumerate(keyList):
+            resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors)
+            #half1 =  resultDict[key]['Training'][behaviors[0]]['Train']
+            #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1)
+
+    ###############################################
+    #
+    # run svm to predict discrete behaviors
+    #
+    ##############################################
+    if svm:
+        for kindex, key in enumerate(keyList):
+            print 'running SVM.'
+            splits = resultDict[key]['Training']
+            resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits )
+
+    ###############################################
+    #
+    # run PCA and store results
+    #
+    ##############################################
+    #%%
+    if pca:
+        print 'running PCA'
+        for kindex, key in enumerate(keyList):
+            resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars)
+            resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True)
+
+
+            #correlate behavior and PCA
+            #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None)
+    ###############################################
+    #
+    # run Kato PCA
+    #
+    ##############################################
+    #%%
+    if kato_pca:
+        print 'running Kato et. al PCA'
+        for kindex, key in enumerate(keyList):
+            resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True)
+            splits = resultDict[key]['Training']
+            resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True)
+
+            resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True)
+
+    ###############################################
+    #
+    # run split first-second half PCA
+    #
+    ##############################################
+    #%%
+    if half_pca:
+        print 'half-split PCA'
+        for kindex, key in enumerate(keyList):
+            # run PCA on each half
+            splits = resultDict[key]['Training']
+            resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'])
+            resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test'])
+            resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key])
+    #%%
+    ###############################################
+    #
+    # predict neural dynamics from behavior
+    #
+    ##############################################
+    if predPCA:
+        for kindex, key in enumerate(keyList):
+            print 'predicting behavior PCA'
+            splits = resultDict[key]['Training']
+            resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \
+                        splits, pars, behaviors)
+    #%%
+    ###############################################
+    #
+    # predict neural dynamics from behavior
+    #
+    ##############################################
+    if predNeur:
+        for kindex, key in enumerate(keyList):
+            print 'predicting neural dynamics from behavior'
+            splits = resultDict[key]['Training']
+            resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars)
+        plt.show()
+    #%%
+    ###############################################
+    #
+    # use agglomerative clustering to connect similar neurons
+    #
+    ##############################################
+    if hierclust:
+        for kindex, key in enumerate(keyList):
+            print 'running clustering'
+            resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars)
+    #%%
+    ###############################################
+    #
+    # use behavior triggered averaging to create non-othogonal axes
+    #
+    ##############################################
+    if bta:
+        for kindex, key in enumerate(keyList):
+            print 'running BTA'
+            resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars)
+    #%%
+    ###############################################
+    #
+    # linear regression using LASSO
+    #
+    ##############################################
+    if lasso:
+        print "Performing LASSO.",
+        for kindex, key in enumerate(keyList):
+
+            splits = resultDict[key]['Training']
+            resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors)
+            # calculate how much more neurons contribute
+            tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors)
+            for tmpKey in tmpDict.keys():
+                resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey])
+
+            tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors)
+            for tmpKey in tmpDict.keys():
+                resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey]
+
+            # do converse calculation -- give it only the neurons non-zero in previous case
+            subset = {}
+            subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0]
+            subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0]
+            resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset)
+
+
+    #%%
+    ###############################################
+    #
+    # linear regression using elastic Net
+    #
+    ##############################################
+    if elasticnet:
+        for kindex, key in enumerate(keyList):
+            print 'Running Elastic Net',  key
+            splits = resultDict[key]['Training']
+            resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors)
+            # calculate how much more neurons contribute
+            tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, )
+            for tmpKey in tmpDict.keys():
+                resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey])
+
+            tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors)
+            for tmpKey in tmpDict.keys():
+                resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey]
+            # do converse calculation -- give it only the neurons non-zero in previous case
+            subset = {}
+            subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0]
+            subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0]
+            resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset)
+
+            # run scrambled control
+            #print 'Running Elastic Net scrambled'
+            #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True)
+
+
+    #%%
+    ###############################################
+    #
+    # lag-time fits of neural activity
+    #
+    ##############################################
+    if lagregression:
+        for kindex, key in enumerate(keyList):
+            print 'Running lag calculation',  key
+            splits = resultDict[key]['Training']
+            #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3))
+            resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet')
+
+    #%%
+    ###############################################
+    #
+    # save data as HDF5 file
+    #
+    ##############################################
+    if save:
+        dh.saveDictToHDF(outLoc, resultDict)

From 66baa05d9e41385a42f3d400df6cc627dd6a0d1e Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 31 Oct 2019 15:24:43 -0400
Subject: [PATCH 05/25] Changed GCAMP smoothing from 6 to 5.

---
 prediction/dataHandler.py | 3 ++-
 prediction/runAnalysis.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index 911f46c..e60ddc6 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -318,7 +318,8 @@ def preprocessNeuralData(R, G, dataPars):
     
     # smooth with GCamp6 halftime = 1s
     RS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in R])       
-    GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G])       
+    GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G])
+    print("windowGCaMP:", str(dataPars['windowGCamp']))
 #    YR = GS/RS
     
 ##    meansubtract = False#False#True
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 8058402..0f20af1 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -39,8 +39,8 @@ def actuallyRun(typ='AML32', condition = 'moving'):
     dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
                 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd
                 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
-                'windowGCamp': 6,  # gauss window for red and green channel
-                'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data
+                'windowGCamp': 5,  # gauss window for red and green channel
+                'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data
                 }
 
     dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)

From ea64fc8045d96dbde79521b116c777f3adcb0294 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 31 Oct 2019 15:33:37 -0400
Subject: [PATCH 06/25] Turned off periodogram

and some of the other analyses that I don't think are particularly
relevant..
---
 prediction/runAnalysis.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 0f20af1..f334419 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -77,13 +77,13 @@ def actuallyRun(typ='AML32', condition = 'moving'):
     #
     ##############################################
     createIndicesTest = 1#True
-    periodogram = 1
-    half_period = 1
+    periodogram = 0
+    half_period = 0
     hierclust = 0
     bta = 0
     pca = 1#False
-    kato_pca = 1#False
-    half_pca = 1
+    kato_pca = 0#False
+    half_pca = 0
     corr = 1
     predNeur = 0
     predPCA = 0
@@ -95,7 +95,7 @@ def actuallyRun(typ='AML32', condition = 'moving'):
     if condition != 'immobilized':
         predNeur = 1
         svm = 0
-        lasso = 1
+        lasso = 0
         elasticnet = 1#True
         predPCA = 1
         lagregression = 0

From 73091b2afbd46474d377a4a607dca57b789f83bd Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 7 Nov 2019 11:20:04 -0500
Subject: [PATCH 07/25] photobleaching correction by dividing off exponential
 fit

I've added the photobleachign correction code. I am photobleaching
following the logic outlined in Xiaowen Chen's Phys Rev E paper 2019.
This is slightly different then what Jeff did in the 3dBrain MATLAB code.
In that code he fit the exponential and then subtracted it off. Here I
divide it off, but rescale the trace to match the original.

Note: in its current state, the pipeline is not utilizing the
photocorrected traces, but now at least I have the machinery to create
it.
---
 prediction/dataHandler.py | 148 +++++++++++++++++++++++++++++++++++++-
 prediction/runAnalysis.py |   2 +
 2 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index e60ddc6..ca2d881 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -4,6 +4,7 @@
 load and compare multiple data sets for behavior prediction.
 @author: monika scholz
 """
+from __future__ import division # make division give you floats, as is standard for python3
 import scipy.io
 import os
 import numpy as np
@@ -257,8 +258,11 @@ def transformEigenworms(pcs, dataPars):
     for pcindex, pc in enumerate(pcs):
         # mask nans by linearly interpolating
         mask = np.isnan(pc1)
+
         if np.any(mask):
             pc[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), pc1[~mask])
+            import warnings
+            warnings.warn('Found ' + str(np.sum(mask)) + 'NaNs that are being interpolated over in the behavior PC1, PC2 or PC3.')
         pcs[pcindex] = pc
         
     theta = np.unwrap(np.arctan2(pcs[2], pcs[1]))
@@ -382,7 +386,12 @@ def loadData(folder, dataPars, ew=1):
     ethomask = np.isnan(etho)
     if np.any(ethomask):
         etho[ethomask] = 0
-    
+
+    rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))]
+    gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))]
+    rPhotoCorr=correctPhotobleaching(rRaw,dataPars['volumeAcquisitionRate'])
+    gPhotoCorr = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate'])
+
     #load neural data
     R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
     G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
@@ -558,3 +567,140 @@ def loadDictFromHDF(filePath):
     f.close()
     return d
 
+def correctPhotobleaching(raw, vps=6):
+    """ Apply photobleaching correction to raw signals, works on a whole heatmap of neurons
+
+    Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off
+    Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3
+    Chen, Randi, Leifer and Bialek, Phys Rev E 2019
+
+    takes raw neural signals N (rows) neruons x T (cols) time points
+    we require that each neuron is a row and that there are more time points than rows
+
+    """
+
+    # Make sure the data we are getting makes sense.
+    assert raw.ndim <= 2, "raw fluorescent traces passed to correctPhotobleaching() have the wrong number of dimensions"
+
+    if raw.ndim == 2:
+        assert raw.shape[1] > raw.shape[
+            0], "raw fluorescent traces passed to correctPhotobleaching() have the wrong size! Perhaps it is transposed?"
+
+    window_s = 12.6  # time of median filter window in seconds
+    medfilt_window = np.int(np.ceil(window_s * vps / 2.) * 2 + 1)  # make it an odd number of frames
+
+    photoCorr = np.zeros_like(raw)  # initialize photoCorr
+
+    showPlots = True
+
+
+    if raw.ndim == 2:
+
+        # perform median filtering (Nan friendly)
+        from scipy.signal import medfilt
+        smoothed = medfilt(raw, [1, medfilt_window])
+
+        N_neurons = smoothed.shape[0]
+
+        # initialize the exponential fit parameter outputs
+        popt = np.zeros([N_neurons, 3])
+        pcov = np.zeros([3, 3, N_neurons])
+
+        # Exponential Fit on each neuron
+        for row in np.arange(N_neurons):
+            popt[row, :], pcov[:, :, row], xVals = fitPhotobleaching(smoothed[row, :], vps)
+            photoCorr[row, :] = popt[row, 0] * raw[row, :] / expfunc(xVals, *popt[row, :])
+
+            if showPlots:
+                import matplotlib.pyplot as plt
+                plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)])
+                plt.plot(xVals, photoCorr[row, :], "r-",
+                         label=['photoCorr, row: '+np.str(row)])
+                plt.xlabel('Time (s)')
+                plt.ylabel('ActivityTrace')
+                plt.legend()
+                plt.show()
+
+    elif raw.ndim == 1:
+        smoothed = medfilt(raw, medfilt_window)
+        popt, pcov, xVals = fitPhotobleaching(smoothed, vps)
+        photoCorr= popt[0] * raw / expfunc(xVals, *popt)
+
+    else:
+        raise ValueError('The  number dimensions of the data in correctPhotobleaching are not 1 or 2')
+
+    return photoCorr
+
+
+def fitPhotobleaching(activityTrace, vps):
+    """"
+    Fit an exponential.
+
+    Accepts a single neuron's activity trace.
+
+    Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off
+    Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3
+    Chen, Randi, Leifer and Bialek, Phys Rev E 2019
+
+
+    """
+    assert activityTrace.ndim == 1, "fitPhotobleaching only works on single neuron traces"
+
+    # Now we will fit an exponential, following along with the tuotiral here:
+    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
+    xVals = np.array(np.arange(activityTrace.shape[-1])) / np.float(vps)
+
+    # Identify just the data that is not a NaN
+    nonNaNs = np.logical_not(np.isnan(activityTrace))
+
+    from scipy.optimize import curve_fit
+
+    # set up some bounds on our exponential fitting parameter, y=a*exp(bx)+c
+    num_recordingLengths = 6  # the timescale of exponential decay should not be more than six recording lengths long
+    # because otherwise we could have recorded for way longer!!
+
+    bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0],  # lower bounds
+              [max(activityTrace[nonNaNs]), 0.5, 2 * np.nanmean(activityTrace)])  # upper bound
+
+    # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average
+    popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)]
+
+    popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess,
+                           bounds=bounds)
+
+    showPlots = False
+    if showPlots:
+        import matplotlib.pyplot as plt
+        plt.plot(xVals, activityTrace, 'b-', label='data')
+        plt.plot(xVals, expfunc(xVals, *popt), "r-",
+                 label='fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
+        plt.xlabel('Time (s)')
+        plt.ylabel('ActivityTrace')
+        plt.legend()
+        plt.show()
+
+    ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those
+    residual = xVals - expfunc(xVals, *popt)  # its ok to have nan's here
+
+    nSigmas = 3  # we want to exclude points that are three standard deviations away from the fit
+
+    # Make a new mask that excludes the outliers
+    excOutliers = np.copy(nonNaNs)
+    excOutliers[np.nonzero(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False
+
+    # Refit excluding the outliers, use the previous fit as initial guess
+    popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt,
+                           bounds=bounds)
+    if showPlots:
+        plt.plot(xVals, expfunc(xVals, *popt), 'y-',
+                 label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
+        plt.legend()
+
+    return popt, pcov, xVals
+
+
+def expfunc(x, a, b, c):
+    # type: (xVals, a, b, c) -> yVals
+    return a * np.exp(-b * x) + c
+
+
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index f334419..ea31017 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -1,3 +1,4 @@
+from __future__ import division #give me floating point when I divide (standard in python3)
 
 # standard modules
 import numpy as np
@@ -41,6 +42,7 @@ def actuallyRun(typ='AML32', condition = 'moving'):
                 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
                 'windowGCamp': 5,  # gauss window for red and green channel
                 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data
+                'volumeAcquisitionRate': 6., #rate at which volumes are acquired
                 }
 
     dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)

From d26ead8743e8c708fd67e6b088c3e8637fa1adad Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 7 Nov 2019 11:20:49 -0500
Subject: [PATCH 08/25] Adjusted the gaussian window used to calculate theta
 dot from 100 to 75

---
 prediction/runAnalysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index ea31017..8b3dc82 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -38,7 +38,7 @@ def actuallyRun(typ='AML32', condition = 'moving'):
 
     # data parameters
     dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
-                'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd
+                'gaussWindow': 75, # gaussianfilter1D is uesed to calculate theta dot from theta in transformEigenworms
                 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
                 'windowGCamp': 5,  # gauss window for red and green channel
                 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data

From 44aeca5fe7f49f3ad0551f331582ba850aea7237 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 7 Nov 2019 16:13:37 -0500
Subject: [PATCH 09/25] Improved robustness and fixed errors in photobleaching
 correction

Now I can run the analysis all the way through and generate fig 2.
Performance however is extremely low, probably because by using the raw
traces I am skipping over some outlier removal that Jeff must have been
taking care of.

Also turned of the PCARaw analysis which seems to be unused and was
throwing an error.
---
 prediction/dataHandler.py     | 41 +++++++++++++++++++++++++----------
 prediction/metaRunAnalysis.py |  6 ++++-
 prediction/runAnalysis.py     |  2 +-
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index ca2d881..67f1eec 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -389,12 +389,20 @@ def loadData(folder, dataPars, ew=1):
 
     rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))]
     gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))]
-    rPhotoCorr=correctPhotobleaching(rRaw,dataPars['volumeAcquisitionRate'])
-    gPhotoCorr = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate'])
+
 
     #load neural data
-    R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
-    G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+#    R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+#    G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+
+    R = correctPhotobleaching(rRaw, dataPars['volumeAcquisitionRate'])
+    G = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate'])
+
+    if np.any(np.isnan(R)):
+        print("New rPHotoCorr has some NaNs in it ")
+    if np.any(np.isnan(G)):
+        print("New gPHotoCorr has some NaNs in it ")
+
     #
     Ratio = np.array(data['Ratio2'])[:,:len(np.array(data['hasPointsTime']))]
     Y, dR, GS, RS, RM = preprocessNeuralData(R, G, dataPars)
@@ -591,7 +599,7 @@ def correctPhotobleaching(raw, vps=6):
 
     photoCorr = np.zeros_like(raw)  # initialize photoCorr
 
-    showPlots = True
+    showPlots = False
 
 
     if raw.ndim == 2:
@@ -613,7 +621,7 @@ def correctPhotobleaching(raw, vps=6):
 
             if showPlots:
                 import matplotlib.pyplot as plt
-                plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)])
+                plt.plot(xVals, smoothed[row, :], 'b-', label=['raw, row: '+np.str(row)])
                 plt.plot(xVals, photoCorr[row, :], "r-",
                          label=['photoCorr, row: '+np.str(row)])
                 plt.xlabel('Time (s)')
@@ -659,8 +667,12 @@ def fitPhotobleaching(activityTrace, vps):
     num_recordingLengths = 6  # the timescale of exponential decay should not be more than six recording lengths long
     # because otherwise we could have recorded for way longer!!
 
+    #Scale the activity trace to somethign around one.
+    scaleFactor=np.nanmean(activityTrace)
+    activityTrace=activityTrace/scaleFactor
+
     bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0],  # lower bounds
-              [max(activityTrace[nonNaNs]), 0.5, 2 * np.nanmean(activityTrace)])  # upper bound
+              [max(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)])  # upper bound
 
     # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average
     popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)]
@@ -680,22 +692,29 @@ def fitPhotobleaching(activityTrace, vps):
         plt.show()
 
     ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those
-    residual = xVals - expfunc(xVals, *popt)  # its ok to have nan's here
+    residual = activityTrace - expfunc(xVals, *popt)  # its ok to have nan's here
 
     nSigmas = 3  # we want to exclude points that are three standard deviations away from the fit
 
     # Make a new mask that excludes the outliers
     excOutliers = np.copy(nonNaNs)
-    excOutliers[np.nonzero(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False
+    excOutliers[(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False
 
     # Refit excluding the outliers, use the previous fit as initial guess
-    popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt,
-                           bounds=bounds)
+    # note we relax the bounds here a bit
+
+    try:
+        popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt,bounds=bounds)
+    except:
+        popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt)
+
     if showPlots:
         plt.plot(xVals, expfunc(xVals, *popt), 'y-',
                  label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
         plt.legend()
 
+    #rescale back to full size
+    popt[0]=scaleFactor*popt[0]
     return popt, pcov, xVals
 
 
diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index 0882f6d..f4bafb5 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -10,16 +10,20 @@
 
 import runAnalysis as analysis
 
+
+
+print("Doing the Moving Worms")
 analysis.actuallyRun('AML32','moving')
 analysis.actuallyRun('AML18','moving')
 analysis.actuallyRun('AML70','moving')
 analysis.actuallyRun('AML175','moving')
 
-
+print("Doing the Immobilized Worms")
 analysis.actuallyRun('AML32','immobilized')
 analysis.actuallyRun('AML18','immobilized')
 analysis.actuallyRun('AML70','immobilized')
 
+print("Doing worms in Chips")
 analysis.actuallyRun('AML32','chip')
 analysis.actuallyRun('Special','transition') #Note these are identicail..
                                             # I copied and pasted the input files..
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 8b3dc82..5de3119 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -182,7 +182,7 @@ def actuallyRun(typ='AML32', condition = 'moving'):
         print 'running PCA'
         for kindex, key in enumerate(keyList):
             resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars)
-            resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True)
+     #       resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True)
 
 
             #correlate behavior and PCA

From 5242d487a5cc2ec70290faf987ba88ee1d5fa864 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 8 Nov 2019 20:34:46 -0500
Subject: [PATCH 10/25] Place holder for outlier detection and NaNing

Plan is to use a Hampel filter but replacing the outliers with NaNs.

See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d
---
 prediction/dataHandler.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index 67f1eec..df7cb25 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -395,13 +395,13 @@ def loadData(folder, dataPars, ew=1):
 #    R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
 #    G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
 
-    R = correctPhotobleaching(rRaw, dataPars['volumeAcquisitionRate'])
-    G = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate'])
+    vps=dataPars['volumeAcquisitionRate']
+    R = correctPhotobleaching(rRaw, vps)
+    G = correctPhotobleaching(gRaw, vps)
+
+    R = nanOutliers(rRaw,np.round(3.3*vps))
+    G = nanOutliers(gRaw,np.round(3.3*vps))
 
-    if np.any(np.isnan(R)):
-        print("New rPHotoCorr has some NaNs in it ")
-    if np.any(np.isnan(G)):
-        print("New gPHotoCorr has some NaNs in it ")
 
     #
     Ratio = np.array(data['Ratio2'])[:,:len(np.array(data['hasPointsTime']))]
@@ -640,6 +640,7 @@ def correctPhotobleaching(raw, vps=6):
     return photoCorr
 
 
+
 def fitPhotobleaching(activityTrace, vps):
     """"
     Fit an exponential.
@@ -722,4 +723,18 @@ def expfunc(x, a, b, c):
     # type: (xVals, a, b, c) -> yVals
     return a * np.exp(-b * x) + c
 
+def nanOutliers(trace, windowSize):
+    """
+    Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with the mean.
+
+    This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.
 
+    See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d
+
+
+    :param trace:
+    :param windowSize:
+    :return:
+    """
+    print("Need to replace outliers with NaNs")
+    return trace

From f5570a0a54ac1ddaef21e5e4201ed8c4bb880e43 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 8 Nov 2019 20:38:58 -0500
Subject: [PATCH 11/25] typo

---
 prediction/dataHandler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index df7cb25..5b4d569 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -725,7 +725,7 @@ def expfunc(x, a, b, c):
 
 def nanOutliers(trace, windowSize):
     """
-    Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with the mean.
+    Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
 
     This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.
 

From 28adc7cb03ef00481d1eedcd63a51038f0c2b8e4 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 14 Nov 2019 14:50:46 -0500
Subject: [PATCH 12/25] nanOutliers function. no errors, but not fully tested.

added a nanoutliers funciotn that uses a modified hampel filter to
replace 3sigma outliers in a specificed framewindow size (here 3.3s, or
20 volumes) with NaNs.

The functions run without error. but i haven't fully tested the results.

Note that the function is super slow and seems to seriously slow down
the analysis.
---
 prediction/dataHandler.py | 62 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index 5b4d569..c868f8a 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -399,8 +399,8 @@ def loadData(folder, dataPars, ew=1):
     R = correctPhotobleaching(rRaw, vps)
     G = correctPhotobleaching(gRaw, vps)
 
-    R = nanOutliers(rRaw,np.round(3.3*vps))
-    G = nanOutliers(gRaw,np.round(3.3*vps))
+    R = nanOutliers_allNeurons(rRaw,np.round(3.3*vps))
+    G = nanOutliers_allNeurons(gRaw,np.round(3.3*vps))
 
 
     #
@@ -723,18 +723,66 @@ def expfunc(x, a, b, c):
     # type: (xVals, a, b, c) -> yVals
     return a * np.exp(-b * x) + c
 
-def nanOutliers(trace, windowSize):
+
+
+
+def nanOutliers_allNeurons(multiNeurons, window_size, n_sigmas=3):
+    """
+    Run nanOutliers on many neurons
+    
+    Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
+
+    This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.    
+    :return: 
     """
-    Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
+    assert multiNeurons.ndim == 2, '2D input data is expected'
+    assert multiNeurons.shape[1]>multiNeurons.shape[0], 'we expect more time points than neurons'
+
+    new_multiNeurons=np.copy(multiNeurons)
+
+    for i in np.arange(multiNeurons.shape[0]):
+        new_multiNeurons[i,:]=nanOutliers(multiNeurons[i,:],window_size,n_sigmas)[0]
+    return new_multiNeurons
+
+
+
+
+def nanOutliers(input_series, window_size, n_sigmas=3):
+    """
+    Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
 
     This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.
 
     See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d
 
 
-    :param trace:
+
+
+
+    :param input_series:
     :param windowSize:
+    :param n_sigmas=3:
     :return:
     """
-    print("Need to replace outliers with NaNs")
-    return trace
+    assert input_series.ndim == 1, 'input dimension greater than 1! nanOutliers can only deal with 1D data'
+
+    window_size = np.int(np.round(window_size))
+
+    #The following is identical to here: https://gist.githubusercontent.com/erykml/d15525855f2ef455bd7969240f6f4073/raw/c17741cdef7f69cebd3ddae191ecdd7db95d4051/hampel_filter_forloop.py
+    #Except for using nanmedian instead of median, and replacing with nans instead of with teh median
+    n = len(input_series)
+    new_series = input_series.copy()
+    k = 1.4826  # scale factor for Gaussian distribution
+
+    indices = []
+    thresh=0.0
+
+    # possibly use np.nanmedian
+    for i in range((window_size), (n - window_size)):
+        x0 = np.nanmedian(input_series[(i - window_size):(i + window_size)])
+        S0 = k * np.nanmedian(np.abs(input_series[(i - window_size):(i + window_size)] - x0))
+        if np.any(np.abs(input_series[i] - x0) > n_sigmas * S0):
+            new_series[i] = np.nan #this line is different
+            indices.append(i)
+
+    return new_series, indices

From d42483dfe25581bf0f74778eeda7448fdbb12503 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 14 Nov 2019 17:03:23 -0500
Subject: [PATCH 13/25] Now pipeline uses raw, does photobleaching, and outlier
 removal

Needs to test.
---
 prediction/dataHandler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index c868f8a..d847faa 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -399,8 +399,8 @@ def loadData(folder, dataPars, ew=1):
     R = correctPhotobleaching(rRaw, vps)
     G = correctPhotobleaching(gRaw, vps)
 
-    R = nanOutliers_allNeurons(rRaw,np.round(3.3*vps))
-    G = nanOutliers_allNeurons(gRaw,np.round(3.3*vps))
+    R = nanOutliers_allNeurons(R,np.round(3.3*vps))
+    G = nanOutliers_allNeurons(G,np.round(3.3*vps))
 
 
     #

From 467a92b9d05c7b4ae5995faeec0b51059b83f675 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 11:17:56 -0500
Subject: [PATCH 14/25] Made an if statement to turn off new photobleaching and
 outlier detection

Code should be  functionally the same as the state Monika left it for the
latest manuscript.

Note fig 2v3 still generates 0.44 R2 instaed of 0.49 as expected
(but still consistent with how I recieved the code in early june).
---
 prediction/dataHandler.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index d847faa..bdff394 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -392,15 +392,17 @@ def loadData(folder, dataPars, ew=1):
 
 
     #load neural data
-#    R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
-#    G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
-
-    vps=dataPars['volumeAcquisitionRate']
-    R = correctPhotobleaching(rRaw, vps)
-    G = correctPhotobleaching(gRaw, vps)
+    original=True
+    if original:
+        R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+        G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+    else:
+        vps = dataPars['volumeAcquisitionRate']
+        R = correctPhotobleaching(rRaw, vps)
+        G = correctPhotobleaching(gRaw, vps)
 
-    R = nanOutliers_allNeurons(R,np.round(3.3*vps))
-    G = nanOutliers_allNeurons(G,np.round(3.3*vps))
+        R = nanOutliers_allNeurons(R,np.round(3.3*vps))
+        G = nanOutliers_allNeurons(G,np.round(3.3*vps))
 
 
     #

From 3830991dab494d0f24a9ba68c8398dbf992e41e7 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 11:18:39 -0500
Subject: [PATCH 15/25] Changed folder directories of data quality check to
 make it run.

---
 prediction/dataQualityCheck.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py
index 314120e..500e85a 100755
--- a/prediction/dataQualityCheck.py
+++ b/prediction/dataQualityCheck.py
@@ -16,17 +16,22 @@
 #
 ###############################################
 typ = 'AML32' # possible values AML32, AML18, AML70
-condition = 'immobilized'# # Moving, immobilized, chip
+condition = 'moving'# # Moving, immobilized, chip
 first = True # if true, create new HDF5 file
 ###############################################    
 # 
 #    load data into dictionary
 #
 ##############################################
-folder = '../{}_{}/'.format(typ, condition)
-dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition)
-outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition)
+#folder = '../{}_{}/'.format(typ, condition)
+#dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+#outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition)
+#outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition)
+
+folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
 
 # data parameters
 dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
@@ -88,7 +93,7 @@
 periodogram = 0
 nestedvalidation = 0
 lasso = 0
-elasticnet = 0#True
+elasticnet = 1#True
 lagregression = 0
 positionweights = 0#True
 resultsPredictionOverview = 0

From 1846ecc7632377c2da7ad0ee8a442a8420ef647e Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 11:19:24 -0500
Subject: [PATCH 16/25] Adjusted the parameters back to the original

---
 prediction/runAnalysis.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 5de3119..467a212 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -45,6 +45,16 @@ def actuallyRun(typ='AML32', condition = 'moving'):
                 'volumeAcquisitionRate': 6., #rate at which volumes are acquired
                 }
 
+
+    #original data parameters
+    dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
+            'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data
+            'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
+            'windowGCamp': 6,  # gauss window for red and green channel
+            'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data
+            }
+
+
     dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)
     keyList = np.sort(dataSets.keys())
     if save:
@@ -57,7 +67,7 @@ def actuallyRun(typ='AML32', condition = 'moving'):
         resultDict[key]['pars'] = dataPars
     # analysis parameters
 
-    pars ={'nCompPCA':20, # no of PCA components
+    pars ={'nCompPCA':10, # no of PCA components
             'PCAtimewarp':False, #timewarp so behaviors are equally represented
             'trainingCut': 0.6, # what fraction of data to use for training
             'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle
@@ -71,6 +81,8 @@ def actuallyRun(typ='AML32', condition = 'moving'):
             'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate
              }
 
+
+
     behaviors = ['AngleVelocity', 'Eigenworm3']
 
     ###############################################

From a3e24f93bbabc947ffe91b7e7e7d54ea68b836c3 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 11:19:47 -0500
Subject: [PATCH 17/25] Set meta run analysis to only deal with moving worms
 for now

---
 prediction/metaRunAnalysis.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index f4bafb5..4f162c5 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -14,24 +14,25 @@
 
 print("Doing the Moving Worms")
 analysis.actuallyRun('AML32','moving')
-analysis.actuallyRun('AML18','moving')
 analysis.actuallyRun('AML70','moving')
 analysis.actuallyRun('AML175','moving')
+analysis.actuallyRun('AML18','moving')
 
-print("Doing the Immobilized Worms")
-analysis.actuallyRun('AML32','immobilized')
-analysis.actuallyRun('AML18','immobilized')
-analysis.actuallyRun('AML70','immobilized')
+if True:
+    print("Doing the Immobilized Worms")
+    analysis.actuallyRun('AML32','immobilized')
+    analysis.actuallyRun('AML18','immobilized')
+    analysis.actuallyRun('AML70','immobilized')
 
-print("Doing worms in Chips")
-analysis.actuallyRun('AML32','chip')
-analysis.actuallyRun('Special','transition') #Note these are identicail..
-                                            # I copied and pasted the input files..
+    print("Doing worms in Chips")
+    analysis.actuallyRun('AML32','chip')
+    analysis.actuallyRun('Special','transition') #Note these are identicail..
+                                                # I copied and pasted the input files..
 
-analysis.actuallyRun('AML70','chip')
+    analysis.actuallyRun('AML70','chip')
 
-execfile('../figures/main/fig2v3.py')
-execfile('../figures/supp/S4.py')
-execfile('../figures/supp/S2.py')
-execfile('../figures/main/fig1v3.py')
-execfile('../figures/main/fig2_expVarslm.py')
+#execfile('../figures/main/fig2v3.py')
+#execfile('../figures/supp/S4.py')
+#execfile('../figures/supp/S2.py')
+#execfile('../figures/main/fig1v3.py')
+#execfile('../figures/main/fig2_expVarslm.py')

From 3f0e900b5f733d4e0688153ed266db0c359fc0f0 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 11:27:10 -0500
Subject: [PATCH 18/25] Doing some tests to find out why fig2 only generates
 R2=0.44, no 0.49

Made minimum changes necessary to run metaRunAnalysis and regenerate
the analysis for AML32 moving. And also to run dataQualityCheck.

Code should otherwise be as Monika left it in June.
---
 prediction/dataQualityCheck.py | 10 +++++-----
 prediction/metaRunAnalysis.py  | 32 +++++++++++++++++---------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py
index 314120e..88762b5 100755
--- a/prediction/dataQualityCheck.py
+++ b/prediction/dataQualityCheck.py
@@ -16,17 +16,17 @@
 #
 ###############################################
 typ = 'AML32' # possible values AML32, AML18, AML70
-condition = 'immobilized'# # Moving, immobilized, chip
+condition = 'moving'# # Moving, immobilized, chip
 first = True # if true, create new HDF5 file
 ###############################################    
 # 
 #    load data into dictionary
 #
 ##############################################
-folder = '../{}_{}/'.format(typ, condition)
-dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition)
-outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition)
+folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
+dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
+outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
+outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
 
 # data parameters
 dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index 0882f6d..a0c51b9 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -11,23 +11,25 @@
 import runAnalysis as analysis
 
 analysis.actuallyRun('AML32','moving')
-analysis.actuallyRun('AML18','moving')
-analysis.actuallyRun('AML70','moving')
-analysis.actuallyRun('AML175','moving')
 
+if False:
+    analysis.actuallyRun('AML18','moving')
+    analysis.actuallyRun('AML70','moving')
+    analysis.actuallyRun('AML175','moving')
 
-analysis.actuallyRun('AML32','immobilized')
-analysis.actuallyRun('AML18','immobilized')
-analysis.actuallyRun('AML70','immobilized')
 
-analysis.actuallyRun('AML32','chip')
-analysis.actuallyRun('Special','transition') #Note these are identicail..
-                                            # I copied and pasted the input files..
+    analysis.actuallyRun('AML32','immobilized')
+    analysis.actuallyRun('AML18','immobilized')
+    analysis.actuallyRun('AML70','immobilized')
 
-analysis.actuallyRun('AML70','chip')
+    analysis.actuallyRun('AML32','chip')
+    analysis.actuallyRun('Special','transition') #Note these are identicail..
+                                                # I copied and pasted the input files..
 
-execfile('../figures/main/fig2v3.py')
-execfile('../figures/supp/S4.py')
-execfile('../figures/supp/S2.py')
-execfile('../figures/main/fig1v3.py')
-execfile('../figures/main/fig2_expVarslm.py')
+    analysis.actuallyRun('AML70','chip')
+
+    execfile('../figures/main/fig2v3.py')
+    execfile('../figures/supp/S4.py')
+    execfile('../figures/supp/S2.py')
+    execfile('../figures/main/fig1v3.py')
+    execfile('../figures/main/fig2_expVarslm.py')

From 8745a9335b2ead2a278dbbb63e963f0d3b3a8bf6 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 15 Nov 2019 14:05:48 -0500
Subject: [PATCH 19/25] updated stamp on figure w/ provenance

---
 figures/main/fig2v3.py        | 6 ++++++
 prediction/metaRunAnalysis.py | 5 +----
 prediction/provenance.py      | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py
index 06ca0cd..dcd64c6 100755
--- a/figures/main/fig2v3.py
+++ b/figures/main/fig2v3.py
@@ -218,6 +218,7 @@
 ax7.text(-140, 0, 'Velocity\n(rad/s)', color = R1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center')
 ax8.text(-160, 0, 'Body \n curvature \n(a.u.)', color = B1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center')
 
+
 # move axis to the right
 ax7.yaxis.tick_right()
 ax8.yaxis.tick_right()
@@ -493,6 +494,11 @@
 plt.show()
 # get all the weights for the different samples
 
+
+import prediction.provenance as prov
+prov.stamp(axV,.1,.3,'branch: minimumLocal')
+
+
 #for typ, colors, ax in zip(['AML32', 'AML18'], [colorsExp, colorCtrl], [ax11, ax12]):
 #    for condition in ['moving', 'immobilized']:
 #        key = '{}_{}'.format(typ, condition)
diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index a0c51b9..54436b3 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -11,13 +11,10 @@
 import runAnalysis as analysis
 
 analysis.actuallyRun('AML32','moving')
-
-if False:
+if True:
     analysis.actuallyRun('AML18','moving')
     analysis.actuallyRun('AML70','moving')
     analysis.actuallyRun('AML175','moving')
-
-
     analysis.actuallyRun('AML32','immobilized')
     analysis.actuallyRun('AML18','immobilized')
     analysis.actuallyRun('AML70','immobilized')
diff --git a/prediction/provenance.py b/prediction/provenance.py
index 769b8d7..ba67b98 100644
--- a/prediction/provenance.py
+++ b/prediction/provenance.py
@@ -25,7 +25,7 @@ def getStampString():
 
     return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath)
 
-def stamp(ax, x,y,notes):
+def stamp(ax, x,y,notes=''):
     """
     :param ax: matplotlib axes h
     :param x: fractional location in the plot

From bed516f98d0961e380e3789bef203a917470bea4 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 21 Nov 2019 11:04:35 -0500
Subject: [PATCH 20/25] Now updated the provenance module to include branch
 name

---
 figures/main/fig2v3.py   | 2 +-
 prediction/provenance.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py
index dcd64c6..e01ff3a 100755
--- a/figures/main/fig2v3.py
+++ b/figures/main/fig2v3.py
@@ -496,7 +496,7 @@
 
 
 import prediction.provenance as prov
-prov.stamp(axV,.1,.3,'branch: minimumLocal')
+prov.stamp(axV,.1,.3)
 
 
 #for typ, colors, ax in zip(['AML32', 'AML18'], [colorsExp, colorCtrl], [ax11, ax12]):
diff --git a/prediction/provenance.py b/prediction/provenance.py
index ba67b98..858163e 100644
--- a/prediction/provenance.py
+++ b/prediction/provenance.py
@@ -19,11 +19,11 @@ def getStampString():
     hash = repo.head.object.hexsha
     gitpath = repoPath = repo._working_tree_dir
     giturl = repo.remotes.origin.url
-
+    gitbranch = repo.active_branch
     from datetime import datetime
     timestamp = datetime.today().strftime('%Y-%m-%d %H:%M')
 
-    return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath)
+    return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath + '\nBranch: ' + str(gitbranch))
 
 def stamp(ax, x,y,notes=''):
     """

From 4c6b01c81c197ea003a2ed00ef295e2c311dd357 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Thu, 21 Nov 2019 15:24:23 -0500
Subject: [PATCH 21/25] testing hampel filter with debug output

---
 prediction/dataHandler.py     |  10 +++++++++-
 prediction/dataHandler.pyc    | Bin 20534 -> 27715 bytes
 prediction/metaRunAnalysis.py |   2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index bdff394..3d36ecd 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -392,7 +392,7 @@ def loadData(folder, dataPars, ew=1):
 
 
     #load neural data
-    original=True
+    original=False
     if original:
         R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
         G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
@@ -787,4 +787,12 @@ def nanOutliers(input_series, window_size, n_sigmas=3):
             new_series[i] = np.nan #this line is different
             indices.append(i)
 
+    Debug = True
+    if Debug:
+        import matplotlib.pyplot as plt
+        plt.plot(input_series,'r')
+        plt.plot(new_series,'ob')
+        plt.show()
+
+
     return new_series, indices
diff --git a/prediction/dataHandler.pyc b/prediction/dataHandler.pyc
index c3e868c1c279434ffc57e225ae43d34416a8af68..c453db316763fbeff1ace772180051e5d7224371 100644
GIT binary patch
delta 11894
zcmcIqdvIJ=c|T`YT3atmwpP#8<LY5~rRZtdiKEDI{D|$mWarAnM75)=_FhRV?aO<w
zEIHA3(>M>H0S0b51lmG6?Er-q=%g)#6etuzfetAY3MIo(C<Ug046pK-c1nN0@9s*n
zlQR6#?CgGf-sd~tIp_O+-*@j{KdJxjtGW>UNloSB|NhVC<Er=x;{PT?Dd(qJDNCst
zP_3+hN@|q~sF{G$%m-C6sAkHf4(W0=Q(j7k)XcV0db?VxP&1XKY*ndCnMzixRE?UP
zsqx1vSIHeJRjX!dH^#41GdrbYNF{fvRK1$1->3_#nXr1(Go1xtra~oKRH{|Yw5n7@
z%|y@)Q&y>mw3=yCcDu5wCEcN{0cF)nx>H$o>LIMpP<EHHc1mHlvUW+fUOj}h_DJ8b
zRP`#WLE7(DR-+{PP+O1|P11UgvYMqLs;m}C^ed}X5|}k2iEETqqpUU=agb89+9f@#
ztPW`~qO49yj4I2J#F(<WB(Yan-I5qrR*xk1sYwfb?Uo_-E2~dRuT|C_NnFR(SW&4x
zAnATdU(boBqt|#})Q{|nVw;t_9mGmpOgM>5+Iw~2acy`%2(;;lR~h^U=zj!{=*Uw;
zWuMUa`(gRF1G>li^7dzRi}$9A`Es6?Deu24_MzoirP&s_tQEelQcEGVUe0tlkCaxq
zTfKKwo(dSs`)TFffiC4uSB(c}^;xg3YN*=Xrk3i|Wrf}A_C8VdIOaH8{Y;vpN5LMt
zW5jiQ1FCtwOmUX5tJP9~g;nc8zhqh#G!^AJHp@tAL0o3-wQRJUCF3mNx(^FYxoqdg
zR>sU~?}3_oz|S{o9t{n!X^ofMk&G_9N<N=Dw2_DgP@CWw3H(%a7l>Kwy|^QyE4|9v
z_NrDEw2`!Xduyi$dXRKOAc>T-m|2b1G0*;Yn^Hv<vay_FPI&LHJs7@~)htUU(zbaV
z3G>98>+aX4w_ev#)2!=sy{^$Ufl#p7dsp3lZ9Mh$x`=M5XTw`axN5V7<W{d{*Ns(^
zk|a6j-L)%OehSI^yie~M##aAq*H;>(eVFw!Ml(|_-nZ(1SHI3nhYhrPWB6;j)>~@$
zf_~haZ0teFy^U|}66tP3>trToxofVOX3q>smPC84O;2?HA(Bodhzf*sW1uzApyS*U
zN`YtZsPWX7np%ST2JgkzJ$k#>7&%yy|J{*6-RnIP>Bi)L8rfebf{mb0NxCmaI>Y2y
z#N}jit~tlr9<QsdtqMm*nZNDlPqv-ctKK`?I`jeWQ*A5y-QErDH$igi?JGKR1KP-b
zPRWKQxWCZ~cC7WPJBLp1MiM7fskFTsTT3`{qO6Q-KFWq%gUc@Qq|7EQt;GENU3T2%
znOlud*{kN8kTTy);(4#_yr=v!lFxeI@9cvx>y7Wngo%xlC8PQq`40BsOy=7`WJJe}
z#g~PPc*f_(Nu*5*)bIDlY%MF_I_te%kLs^^-|OlK8Q78i+IxF<^HaUupV8irds><u
z_K_Xsf^wN;l1nvHUPo{92|40&I38oO*Ms;oiHsj$*`I+3OKvJ_KFN|lX9>l2F_S!#
zNZIBaal_1z`5=nE?R~koXQT=#-ixD#FeF#}Tu|i$Di2zw*4|mX_qgxJ&OhPp+1;n#
z<4x_}J6g0$A(h{*)?sO2b%Io7R0<VpeH#@IzD-t{YQn$v9@yP*U8Pi4QD<1?s)c_c
z)p9w&miY?hR;zXNg@xOuTCDA?yd2Q^O79<bNBU7GYRsyT9_S10HhDqovrJa9=R7rQ
zrB~N?cIH?3OgUWI@ytrvG92FGso0cbxQj8@h~;c!&Q7Ekj6~YCbJ<KX=GvB#xoGE5
zf;OyPj9pA*a>nVSV}l@j4M~rCcHed38jR-hfZFMml$}FooYW8WHJ<znR#2+*7%`nm
z-)HACqTp9!xisc;<f1SmTjjVp^LDg0f1l(JNO&`&Ro)AIEjr<q?->m}!_pD&+C5J5
z{~+n`4yr(Hpas8LJ*KPm9`7T2T5y-XvgZk{cY4|WI|jJx=Ico0(8#f(G%m`?^-r^?
z<6iH({lj&lt47&8PV%s~dthkb43eTXop>^q+barl+N@?Nb>~G{LBdL#qu%<!st$WU
z9T)`{yRUg(7e0*!a*fTMNSS|6@)?k+O|@<E!ig$2$=OxhnxAFaqa;3`OnnYSq#Mtq
zvYE7<cAZ(_*yRFXVv;dvY-{$SR~V}8=e?ENnzCc*S@U6zKwTK+?wOw_d5+{Cy$=p`
zw||jIF4_Di5ZN=%a(pG3G(YP7Y$&Juyu|Q<asylNy!X!GZ#Ds!K|HXCxC<>f!Sexi
z;-tIHd+o@+LapYJ-X?Y!7esEy0uDAn8*Fkw<Bs6s;PNiv<Zj~)YGwvkW4qiOTtK-G
zOEs)UYYfLe8zYKwkKoR|?5On|)^D;gw0CN>ONYD*qvHjKo4A@tTbb1pM`L%Ko=Akb
zaM6I=+UO2ud1%ZJk?>TTA0`nY@`#!rA^9qZOVg6Ib8&1(B5BK>$+01(v25QCQm^`&
zGagHNHDlWvHs${@j`T^Ab6)@0@7-_%Sa3R!S3#a{{DLETL^ssc>XBfF?hH2QZru|U
z8@3OvIKL>cZ*6<(=VLGEs(O`=sPzbRXu$jC_(V;ca(AeuoeD6;s`oxHK2m5`>3c=d
zgjGwvL%CtK-lEn!urn%;*2uP&vhB)kQ0x8>jl`j77RGKAn<9@E82;)yzto?!QO*a^
zXN`Io=f(TjxKZd-ZW9Maf2h36L3OTN<qehZQu%I`?@{?)b*Xg==S?yQ6m5RDI-BlM
zWp1-tu2i|@pw^lkU<HO+Y87R=Tcx#n_2#mhOP#53fsmW?g+6uZxN>)iR9bxoLu$E9
z<(>;}_CtlJXdLD}!V3i3r`Gq#^7|C<#@Y1F4Oae*tZ?E5sN_yqj2l!wD&2hfK_(q)
z9S|L(6mT2bIIRW@2S-Ry^Rp<R?g^+1F9h@brRDc?QJeC^rI3}PTMOigi`Rm-K=Ih&
zrB1@w<&erfT~=IO$JW(7UB+Y3p$aXytdN?a*3q@JHJd9(r{c;n{N#nF%dl3~c5!ZA
z?g7c7yPv4#I4GY)HAK{ctz1W2rtsJ6r+Ss!UF=`TU!&G{`^CFus2-tpLebt|Gh_oR
zsHslKPJU3T(QiWltm#yd{Lp62#&XsNML0MV6x9_i3rpBXNTN>4^TTT0|H3oEJS_)M
zatVhUEeu#{EQ2R+1lw(FZ1>m8vEA5e6vItA0Xux+BzGSmE<dW)8x^K+S%)*#4XF$&
zrh3#QYMI*Ji^5)<SFioLy@fHgcAIkH#lgetqYnkw$-=(Cd4t;qo`%$dD8XaryVW_|
z(!DA_uGYuY`d&;?oTeAaaqQJX=<GVK7A8T7HM}w92xc6T@)4AeB8MeQ--5DOH0Bqf
zBXfI^8;}j+P%OdVZd5JxQ~EEFu|OUOsni$_A`ZaS%}TZVQH!+}&s~9h@EDh>uSK=*
zuZqUFs`fh6_H0aa06BP~r6YV*+v`z_Ggn-}1ac!Axf_tfDHv4)vb;%T{cYl655mEP
zN-1`{5p|=0SBJcx9Y};8fXX%R_w4Jt3)pp)5Q~sUY6{1JpIdGcpp!FWtLFQVm)rt!
zVl8r~7c*|=XeO7F?80WY0d0X+0jdtiFRZ{NcHtD8z*h_N@~ON@Obbd9nZ3le-WRUF
z4W3K;#8>ng?>{GgtOvXod)f;G(*k{OIdyX`m&y4^;z5*}y(9$^=m}*$OR`L|Y44t9
zR!kkyli01}v7}?e62Qv<+i)y|=eeXEi!TD#xs9kv$I_=(++@PejX1NhWOB-0$z{@x
z^v@B7nm^!hg!i}Emwdj&@ZRDrO}6SsyoV?E+%9-&7EMgLfzmkP%qP+bU`+7>Qmp$R
z35}}x9grx^py`s(n2Iw5kH_+UF}be}_f45^AbBInZ+nLi))zb`KT7f`k`a=>CHXkX
zlO#(ZGQI6CW>PU{+591ke^e^Y<Pr;sbS!Cpp2g3xn8)B^)-mg0!F??C0_(m<@?Dbe
zgOIx>W!-K56*CmZT+E`V9N7P74-V=_y`LQXqmWG$@HgJuZX9R|OzO$9kPhideJD@~
zKv0YDNL!irnHwJi-n{G3;Xtd%LpJa}&hrrxo&k9De|fl1f6ROS@IdGs3tad>)80Kt
zn)Q#o)guO+zbB5oFEq}oectUyqvO!?TYYF5R6O!OK}A%c>iulucVX&g?_EbP>tA`-
z9P7q#Q^!76CrAoaN&+J0oY#NzYdW@L;o_30;4ZyO$GLrF_~8@-zj)8x{K?^4kd$K!
z#{k&L+4Mq1w%M3v_E5HLXWr?3{P=;YUou(DAHHMSd$Os_`>hi@0(Ajz`9wSTdF_e2
zC%FhY6}ee|06BK`rP*?}vlz?Te%$6RPQX#6!&=sRp!c-Vg~BHCPlgpxnQ+g|tH2Kt
zgg2ag6qej;ymdl%dw1UY9fYF}+_n$Xzu{!lQxDxXs~ZQ<aM+jMIZkn&q}%)D?VbHG
zB%>6AImbfaX*ePzJOt)E$s$R@JAB8y9`W9NN3@XOP6!t}knp(xf8PL;P)Rtv7-d;2
zCqi!pP#wnaLA|9apmNia1s)4R@uCk>WvT@b$eET-5<NpFoCw%NIK|k6r81%A(AbE5
zAqoaxmC-QF7REUAb>TX*A>g+Xpnx7Qo#j`~eSd%sE`q1qRSLL1q%`q5CJNH#E`VPc
zIT#awf1fS73m6r+9{nm+flV+MyaXv_&{gUdf>K_o*1n`!RdOkgoxfM-tHj5~uvHAv
z;#)*dSP14jhq>W@Us*Q^-+H6%Mq<5CMR$8L@4M{iQs&z)NdL<IEnydMZKK)NSs&9X
zf?(oR9VX$|=faxesbXMysaEANLJb3y9LMJ#^KY_`5Hro+4v_^QcNIm@1kdPl5U6sa
z=S!w)K|Gorfw>^5m+Oca?oJLSqtr8wTg()e!o-<U-XN(9VU6!uIu`!<3#w%tvgZQ+
z#$s3fEoGDgYbZ$^JD9Ijz8rb{(0Nm0ln}x&>1qW7W7TtV9^x`JU5N4uJO71iVI0n8
zlb4JvZR&<WGKyvj=F@O9MlQB$IEe+QYiH0{&E%FH0~Il2bup8)jYT`=reawmGjF7Q
zn^jq9;PWcSHeQ{GWmfI<9_Pvl3JY6jNYO@E7%dTBH0BeoW5m)1{BiNo6R{++7NaU@
z!*>cDBjIAUd6{J@<J!imEkT)iguOEGOc#t;G6TAra2F*UL+3sbN3V1y0j^SojAbW{
z`CKNog~QW}mmI^iFB&%+S?IHzaq|UZ49NxC7?-Jzf`viD1UHF6<90hSk2YlO2za!Y
z5$4$(VDISI^}ZC`*s|@21SuRWhJq|WlE#z~&1F`den>610!weK8E1@WJd;F`i-4Pv
z%^<$y$kbS1&IY7G<ViwM1P(=M8wtqEz(_?gAwUILp&`RmsSJB;4#wVTBUiw@K6?^o
zan9j1aFgegE9l_R88_TqEDp}IF~>o03#Z^^p-Z&iK%|RYVHbc`77WC+<{%iH1`G41
z`CP);jN3R1oT~8uG?$aO&+al#+c_8@ITerv_)a_7446K&FfO0|n>Z;yHzgR_i)sQm
zv6Ly>nolHMfuK_kZR*^RNy8)|Z-``&lASQ5DmUaXNGn%`@Uv9M_Kc7kQKuj33J)jZ
zE}|Fir86SJTUib9lq3VO6pC`8lqfkX=EfL0d|468My4|^4kLnyd!tPlPY^NAVG2$q
z=ED}{h;@BbDE3^C$LmW?<2F`Vi5x?U&TJu>ShmepTz}{jEH<9VUK;jqxCtLjnPTAy
z7+oSt+{vo|E6pt~)%;32E-g|qH=E1=z0M71FEJ%R+fSP9XzYV3gDC`O3d-zcvIj)i
zx+|7kv4wky9zmhXfi#E6BIWDHn`RwZZ*kXV{Xz+)ZfuI=_?+f6xNu}OGJuoHWFTrw
z>PMz#i>gWlnayNfKM~LPfk|w-0Pi(0=9sOFd>70Jm^nrma}yX+H4y|*>vRhqi$oDj
z4Fzk1b-E!?g8)-=uu<Qs??g?lz8>MGCO+`dl{$<bwYph%1girAK6sqCx}ye0{p=Os
zBXBkNC;(9{0UUtB6v0T~u`QqnlsdjF1p<(CgjfT4MEgJyKNKK=Mt~OR0W*=TP$^iA
zGCZ$C)TmtANnD+c1W3T@MBtGDxB!~Kq~e$q09jBUj-U`Qfrbd?+UEc;ChQ{;PFWf%
zjAn348FrM$=nc#g!H;<vEwZ&en)7)85DzRi@R1L!_(yP35R(q$cR3h1hf&4y;Wz-6
zz+~q>s=2mmm8_+yr$ww5ql|H|4zcgB*J9F36~$>P6bvP-I1m8LHKRTfD`+gWEgJ3u
z%r$z3i<VR}zU^jlqgZiZJ<PUs+-9H;E3EQB7my6%3O>N9Ww>P>6ot)%Cy>p$-v(Z@
zWW#`xla~+)Dar$wu~rN`|0s|OVwkJsaW*}q5LPMmd1)6cYD0L8wor8M4Q$A2E9;0>
zQY4K<krcq<hD`j0bGbM!U<5&*u>U`aKR!@FV2uw{sH~i6T%2yPW5;2GuQcw%z8yLo
zkK0*Ve%QzK0#Jp2KLH$KMP*v>EmE=luK^dY5Vk1%rr_cg!WM-MbmD^xk*63|XCH+8
z5*-bQ23)s+)~*n1(YP0_A$p5ejIZP_+Orr)yrTq)Rnc(%;?M$gHb4J*NDu?gEezqP
zX!$t?C!8y+r)i<OWHSem#-K)Q02U)UG&iu;e@%SCh#lO$e|UU;(3mSF;(o%}YER7!
z&W@Ppf{b@*$T@`Z?6r7u#Uj$n_%RcQ!0|VpA}Wufvl!aEKqAIcLN*d4p#c^3KaWS_
zt~@hJW(o@lJbjpjA(qD)Vu*ah3G0za0g0FobF4?$nf5CcTQe`Rgm%_kC0Qe(Ih4`e
zl`Nik7|7BlZZV1eV=q3HGG7DYzj06?vM5~!K^9^i&4)0iU>Q6x<TFa)0uLb&QCdmO
zBG{D4QBb$rh%+rZvKR*ov5#XchPK$9$bd$0c45VK9P@X`CR_!?dq^lOcl#btV8ev9
z5=;8?O(zx**^v>kAI0F6Lyy0(f_{(CRGvgHRUIiC3^oVqf}yfnT^VTAL&3p7I|4Bb
z%hc)?9nm|BCbA}2PgAP<%7c1T2lW8jNG+lsafS~{KQWo)3`Wm|HH7{Q;9U~<KhRUx
zhM{MhN-vb~tomDgEmBdA;KyGWtdOUXb3)_7I=AdgWS!kqKpqs8BgKe;O!{6VFi$Z^
z@De-Cs+Wrj9KIqdK$lU>IJkvfUOOXBB;0&lv!c(Anq%iDX{K?_Fe;Qf?Qv*}+l?jT
z(T_NO@V>9CM<iRFg&3wKHsTPxh{`Zm(#B@2Q9QX+EPbiuyA`C75bD^NaWa<5!dW6>
z#2riGfj6B2#1!$wyLk6-hgV$SSI4zuD4mC|f`_SbUQa--)TSQ>;FgHNhw|&iYbe&@
z<ocSuV4PV5ipO^@M#lq=X3>c>=zqZ=+;RBK?T3#5m#?^ZUTe$)6lXDDE(3RkTF&?D
z-~wIkkoj)iFnv>5-$c%KS90kIKj(WwEmU)Rk6|%cEXl{^4xLd1ZNWT*=)X4P@H)UN
z^K1SOKdXo_r~K&Drn=b#=n}6LoLz<SxGi@UWh(DG0VhF^tiVB;Dn^6Ztb~Q3as=9@
zX93-L6A35rlFRJY9_Cwo@Y)8}pXagWkg6i0`v6+o^@RU64|U515RIYfuQ}kvEE_mf
z5GT(tZlMIJ{PWE2c=CitIXIMnb%k=85@1W5Rp7z3_fuuz=Hh&UHY2ui?EKs0p*98=
z$cjJ}Yy7{aE)EM|;bT<<v(OA#@Ofo3fRAQ0#W#$&2wVBIP-@&#Y=N5tltPS!wz&(^
z6le-payy<?gwzrMq&!U57cMA#&jJ}0k>**VW?aK!bje4Wh%NzU0-WM4OQI>{v2u*y
z=L?7rK^mHf33G&qi>Ku!CFk+d#e5~NTY>lSW$)W7e%%D$6^v=yo-h_&H|tD{jJO%R
z#9}$TUNE6{I&Kf+b%~LpJ{huX7uOB`><}Y3LyMw;hJ58SbYQ<_&F|kk9=~qfs+12e
z^aS3(i>3T)I}2|(VLKE4C6|f<bP!)yn8uyiT5go?&cX3waZx8oUmsl^$K5EZgHk}l
zSjZvhMNP<X%2?MG3N?-*!Csb)aF4{G#tO$omn`&uuI|$x{?N|>r-~neU{mG*vN)|u
zW#;@B5;v8_h9hsb<D~d+<cQjM9s5*(KyCPn)qh(;o<H@lZ5)-wCstR&4{^NZO%~BG
zvbv(w%(t@V+d#x7rUe^NsrW*b!e#R$>uC|)MTY;)w=hZm*GA=UdQ=e3F?()hf%w6D
zu;z5Zd<%O0kUA)emTG4t&|FrHH;_Wa0l`+?f%h~9fCIusTu=x1x+b{!;^qV=*#<H@
zYh~iIv%=c!?EDI}xIH^-a!}vbmv}}Z<G;~(fyJ=k+X;xWm+-Dt*;J2i^`cXwyTFh?
zq_d2Nak;b^@Rp{=+k+@J&vA`ZjK%N>OaKCXW|{ZlsSaK4{mWEGff6vuk-48lfZi-q
zr%0AbdP&wuXj#m+l0XwE^F1UFk~~Yomto9rk^F#Uf|C=eOd$$t9z)K&h2##Bvm{+w
z_2D@ilPMA!c$2ux2c2z9QQ%P}&#az5wR0OK6w-Cxk4|-tR@YTjR#((kRd-Z`D=KSS
kD;g_yRWwu#R6ka|qoSf>d&RbjGSoCwAE}tC*jC-}zk;9^f&c&j

delta 5139
zcmZ`-d2pN65#J}-vSrJ*e8#fmOLiRjPVA)QFp!YM&S{)4G$zVngg+&bEK82`<#3P?
zIVt4|JeUHdKw(;@E$u+V040=Zxq3jMFrA^LCxsR$@JFH3KU$dfw=3I8%aBMv?c063
z@9puux1XQ=wR-G6m9>9dQgp*FU*0uNGym-PU5Rh%`-;dU3IY|GHi{_<+Gc4x1?{tR
z9tHDfX$M6MC|Ec<bk3^SDON<2#S|<i#pd!T=Ay|G8VHu;7_eVT!7@?Ir<j{2%PCks
zC##@fg~%KftE9;)3RaQ2<!obi(nCQHra8$hq7&F+9fiGQ77Oho(?w<pRhy+Ut|zmM
zPQa%I3OADJ7G@Kf<ua_G6E+Gqi+-hu{A5<i{1!5+g=l3dFHj?s=V!^BCxSLIYlT=q
zrbmbXnRP<6%PzcPv4bON`h@NxvtA~2li46d51EZZ^pe>m#6mKgg;+$UUx+@our<)a
zDVio-XS+r}WV=m~US;>GYQ5Y3N8m+ym#FG9NAsRh_*vt4+ot@w-Fa5k>c2Sm<g@BF
zJyNt7Gfx&7zUt$O@Ue)Z4oc@UU6lc)C~4@Ai!OB(kXcC8dQI{6S@PcEe*0cEs;?;S
zc3A}!b<=S|7MgUa>o(P_A8?(;j>bynZDZS2mZGSQ(nQ5+I|*lN0M$jq0j@SfpcItF
zRQT{zfVXJ$F^8wE5}dU1m=QX#$SBrtuGy*^ON-Su-C26Aqn)R_^b@7Ant%;-gta30
zQlJ80l<G}oUggyL%Ib?f%<wY!^y#v#9e$9O0}z=^?MoaAxOvR_+YJ%%qEn${%2=jL
z+!t1@XEF0)k$Bix3&I%GyWLl*E&2_2Ly1qhRi!FXB{qlMr}N8~C_1yE+^ec(AM1EJ
zJ8IN2tk)A27ZtA%l3|B_v?7*2402k}uk1ktFROgLTIN@>Tx`r^s#dS8x?WwUU#e=t
ztPiW+P^J2()vqe8@2Y75=joa|n<bzHm>o-mOzW^^OtNO2A;CcU@Vq<J-MYLMHqP|b
zdhBYs-sD}N3bf_Da5kood%IMNe%tHE3I)C;ZV6u<>(9h+ldrLgHzq+zC6bmAVyRze
zeZJxplbPK3o4#G02WKZvji)wC)IqFGCMDnz;{p~-4J4ELPt7Cx>iRnMpuV$yC3g5m
z{Q-p?jx>6(m1tx4NGs1~2jk&Gp~I1sXqpMjI0asykfmG<<1yKR%rql=_G}A}TU=j<
z#)rd)jMJcuTNrND?=<etKL+x7y`iZMQQY73LF;miBWy-$O04EGcn|CFO2*gp(dLH6
zGLSj({mc}n&o};b^R?<XdeGm%8%v=sNN4+N&K&dqOsPpd)LLID31ew85sR@yMogz$
zYlq6&p5*>|rfvY_7Lw>@nfD!lxNl8P8TT^hA?9#!4klukMJB_>O?r#DHUB6$Z|VN|
zExkpcxa?Qso65&_@ObXDQ^rOaU>+Sl$Oi(Lc!1aJ+NV#>Z&OF~L-YGKp@h;n9f0{X
zu-`|c$Q}+3#sX&JeE0B=jXcL~tez<(tB}(8;N-WH$IN5lahu9Gb+FCb4uPCDX1-|5
zT7H#AnRK$|9y`uFhrXw6RBu_Zq|=E_TMXg&fyr<(WQ8RulGjKo9*RpaLh)(iVg1O0
z%1+}h%r(Bsz_|>t^D+hHhpl0=AuBY(Px(Gw5m=~t^twQ*=Tit$H2|`e**vx~Ri+lJ
zQd<jXkMgQ<HdTTz8(2+eJ`CKUlv|(b*xbQ^HLhcjyh&!6cQ9pz#wT)R!-t5`qCf5E
zam#7Y%i}8<PUsb#-5py&$|I5*kA;$b@`!9TikXXS(LvdN#Ecug`qs`vSh%QbA)hu~
zzg5|1F+r-<;N#4Aj^TO0@VS%G;L4WM&XCjge|KxVz$&L0a{FZJCjg5#FrJv4O2or)
zD>WuNw)j-Qxg87PP&CI5>f3wD+W7&IgiMA*@iF5BhQ{510Ef)@Im3$#FX><O`0HP0
zl3g}l0Z2}GmB9nCnDL}`^(JwHWqX(MR#PwNr+VMCSq}Z^!o}HtNre^2?S=BgRb`{S
zQfDYL)L67o>JN7t3JDVmsg^IgBBoHAsL+g)y^6Y6jM-qa&YVS*W<ENI&#_v`W^+^K
zOzR_ynpM8Odr^NjQpGOGA;L+^`Of8FJkIbi!w(oFJX}!5uK*So=u|j4ju=N`VM&D4
zf*Cm&F~hm*gAGc6Q{$mnIKYWHFRW*I==c%i4jt{gec1(=pYmZ$_A<2ywY<pQt9spK
zs@LA28tv7}uUhQmeEKfLmp33s!@nJ8miND}^r9upvR<+(Dea+j9Y4y)?X)wWGCs=G
zQ>KA3jg)Dk={mAXDOygr>CH+!&vGhcnrSrNLU~pdO%ze`giR^M)2b=$qiBt!u$khD
zE<dMSHLGkoaAZ_dZ6@od=|Qq8#CDB{A)Cmf<h!<WdJwP%pF1tGmwA--Q@TY`?I)aG
zqw(^&t$e~;;f43&sPW>YTS1vtQL7d7kOgO3xy?&wQC78VS)s+ljzI8<#w;7{w$c7P
zduINu`}5f;^q%;fM9Wt)<0;fU81KR1h`Hzx5vQn^yQc{UC1118cvpA6cdywwn01u(
z46S27eUygktm$)}Lutlym><~xnjOBfw4T@Ic(ln7>N%o}?SMZ3F+$`)D$|(D6n>g=
zR1;YZGy2)g0!laM*v(?9QP3@j`8l5v8wbaR{YN4)0TDw_Qq2KiiI=J~?dK$O?xX_}
z92CW8W@e+vBO>7z3z-f|=RWAPv5sdcIMb*o%<$m`f*ZJxGMz|w(_Fg$nTK>E)nKFf
z9>V=Hw2reM0jrDBHH76oX>{fdh)RH%YNP>3nJR1ovkCR8hnDqayXo*ovRb%7M{90^
zK$RdbcoV=o0QS^Qdz~EKHM{(@6UR&sWqK*yP3ayiF|$k)$X?{?n*7l;Dh!K&V|L6f
z*@+d~h2IH&7e;Vdbn)iFtnM5OiahGUs7(^YrkDfcRx3s4bM!wG;{x6uU)nR17eXEH
zNVAoNg}6><rHdeiuQSpt@8f)NAH;C{bCmlbZJb+ZF-Car`kc8-AVtm1Z0ZXbb<T}0
zzz9{)MIGYaQVerxVrQ4p0E%W-aXDmNXd71OrWFx~^hrnb%`5yXuEN<C;e*aV{%IKH
z!2i~-lv6i3vM*sJRwt54<GbJrZ|@vWHxC5Sgr)~xS6lUE7k-p&!<_%`tf!c_pCQQ>
z3IOXbMVFgM=5ExZENWr6n&BA3GYnCNbGO42Jd}$;x`_8Ml#6j=csggTk{U5%9rL-n
zFvGF16&5lXo(hL7<D~Asc)=#=0d})7et8fRk<^|@Jc71Kn#&l=9%JA;#rQKIz?Xm_
z9VXu-(n+IT9@qC>yg0j$Wj8Y1#E@aQnc*};uA64+NroRY^e{Zd@IwY^QSnseHWRk?
zB_>0uiSuqR7dDb>2j1cNZ!^5b@GihByD_&Lk76M1Tp2Ql!w1m5?a()_>{6%nlPkaO
zxB@m(-`1g39r^YFHDJ^Cuet#ZQtu_JZ1v)-M0%QSa51E;q~y!!)?Z%Jrq1d|)^s>_
zKwzZsx;deKuQ#vdx2^cv?>YLI-LL(Ff&NJ*;|vJ~u_^Cp4`3MJv=~Piaw<$M)0x5J
z>V2(-{4m!#^rBn(1Bgi8ij9!|&(N1_|4{n6bzaQ)#=7TvB*$Dx*d-c()VUXtTpJrQ
zjam*F&okEQudH8M`Vo`4F(mRj)xDv@_KDJ~Hq^uH*oN)%c()SF<XBF>l9ej-D;ugE
z8`$1Q`fnS8O*J6xXiidj5EGgQX?+VpJMl&9Yi_(&sd4?mrsZm$Ua<MEc!w+<UW{O`
z9zLjQ+A*mo7m5)!x`kn$e&y1}t}lWNuwTYDW?s%P%CMaw$gq=P7eHKyj#MhIzIr6E
zIM5F@dAP>L%*6QEm{dY)JTf)i6G`NrNq!QgMZ#;s)HMDM@b^fI^62X5LVluh+Np_H
zIFyVVN{>eS9q^Kj5q&yZqwM;@XoJerFGL%%(jrST7tfj;48>AmLmsPPR+5KTp6Ww9
zlt=pxrj9Xui{U84GYs718Gm4SkD(jx$qV!_+G=Addwe0oDu#`UD#+j*7`qulfB?y#
zKz=t-zF`ms9LgpC%aqIJ#-AWuxi9D%yK9*%;C4HUT+Xs$SA(<4SyWc%taMg5tDWtx
RjH}e?bQU@boO#Zw{{nP~)N=p;

diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index 54436b3..9a3d07b 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -11,7 +11,7 @@
 import runAnalysis as analysis
 
 analysis.actuallyRun('AML32','moving')
-if True:
+if False:
     analysis.actuallyRun('AML18','moving')
     analysis.actuallyRun('AML70','moving')
     analysis.actuallyRun('AML175','moving')

From d2d11d10845e55d6da264454059bbe739a458612 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 22 Nov 2019 11:16:21 -0500
Subject: [PATCH 22/25] Monika's speedy hampel filter.

Visually inspected the output on real data. Looks good.

I added some visualization stuff.
---
 prediction/dataHandler.py | 102 ++++++++++++++++----------------------
 prediction/runAnalysis.py |  13 +++--
 2 files changed, 52 insertions(+), 63 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index 3d36ecd..d939dec 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -398,11 +398,14 @@ def loadData(folder, dataPars, ew=1):
         G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
     else:
         vps = dataPars['volumeAcquisitionRate']
+
+        #remove outliers with a sliding window of 40 volumes (or 6 seconds)
+        R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))
+        G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))
+
         R = correctPhotobleaching(rRaw, vps)
         G = correctPhotobleaching(gRaw, vps)
 
-        R = nanOutliers_allNeurons(R,np.round(3.3*vps))
-        G = nanOutliers_allNeurons(G,np.round(3.3*vps))
 
 
     #
@@ -728,71 +731,54 @@ def expfunc(x, a, b, c):
 
 
-def nanOutliers_allNeurons(multiNeurons, window_size, n_sigmas=3):
-    """
-    Run nanOutliers on many neurons
-    
-    Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
-
-    This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.    
-    :return: 
-    """
-    assert multiNeurons.ndim == 2, '2D input data is expected'
-    assert multiNeurons.shape[1]>multiNeurons.shape[0], 'we expect more time points than neurons'
-
-    new_multiNeurons=np.copy(multiNeurons)
-
-    for i in np.arange(multiNeurons.shape[0]):
-        new_multiNeurons[i,:]=nanOutliers(multiNeurons[i,:],window_size,n_sigmas)[0]
-    return new_multiNeurons
 
+from skimage.util import view_as_windows
 
+def nanOutliers(data, window_size, n_sigmas=3):
+    """using a Hampel filter to detect outliers and replace them with nans.
 
+        This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.
 
-def nanOutliers(input_series, window_size, n_sigmas=3):
+    The algorithm assumes the underlying series should be Gaussian. This could be changed by changing the k parameter.
+    Data can contain nans, these will be ignored for median calculation.
+    data: (M, N) array of M timeseries with N samples each.
+    windowsize: integer. This is half of the typical implementation.
+    n_sigmas: float or integer
     """
-    Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN.
-
-    This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper.
-
-    See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d
-
-
-
-
-
-    :param input_series:
-    :param windowSize:
-    :param n_sigmas=3:
-    :return:
-    """
-    assert input_series.ndim == 1, 'input dimension greater than 1! nanOutliers can only deal with 1D data'
-
-    window_size = np.int(np.round(window_size))
-
-    #The following is identical to here: https://gist.githubusercontent.com/erykml/d15525855f2ef455bd7969240f6f4073/raw/c17741cdef7f69cebd3ddae191ecdd7db95d4051/hampel_filter_forloop.py
-    #Except for using nanmedian instead of median, and replacing with nans instead of with teh median
-    n = len(input_series)
-    new_series = input_series.copy()
-    k = 1.4826  # scale factor for Gaussian distribution
-
-    indices = []
-    thresh=0.0
-
-    # possibly use np.nanmedian
-    for i in range((window_size), (n - window_size)):
-        x0 = np.nanmedian(input_series[(i - window_size):(i + window_size)])
-        S0 = k * np.nanmedian(np.abs(input_series[(i - window_size):(i + window_size)] - x0))
-        if np.any(np.abs(input_series[i] - x0) > n_sigmas * S0):
-            new_series[i] = np.nan #this line is different
-            indices.append(i)
+    # deal with N = 1 timeseries to conform to (M=1,N) shape
+    if len(data.shape)<2:
+        data = np.reshape(data, (1,-1))
+    k = 1.4826 # scale factor for Gaussian distribution
+    M, N = data.shape # store data shape for quick use below
+    # pad array to achieve at least the same size as original
+    paddata = np.pad(data.copy(), pad_width= [(0,0),(window_size//2,window_size//2)], \
+                     constant_values=(np.median(data)), mode = 'constant')
+    # we use strides to create rolling windows. Not nice in memory, but good in performance.
+    # because its meant for images, it creates some empty axes of size 1.
+    tmpdata = view_as_windows(paddata, (1,window_size))
+    # crop data to center
+    tmpdata = tmpdata[:M, :N]
+    x0N = np.nanmedian(tmpdata, axis = (-1, -2))
+    s0N =k * np.nanmedian(np.abs(tmpdata - x0N[:,:,np.newaxis, np.newaxis]), axis = (-1,-2))
+    # hampel condition
+    hampel = (np.abs(data-x0N) - (n_sigmas*s0N))
+    indices = np.where(hampel>0)
+    # cast to float to allow nans in output data
+    newData = np.array(data, dtype=float)
+    newData[indices] = np.nan
 
     Debug = True
     if Debug:
+        chosen = np.squeeze( np.round(np.random.rand(1) * M-1).astype(int))
         import matplotlib.pyplot as plt
-        plt.plot(input_series,'r')
-        plt.plot(new_series,'ob')
+        plt.cla()
+        plt.plot(data[chosen, :],'r')
+        plt.plot(newData[chosen, :],'b')
+        plt.title('nanOutlier(), neuron: ' +str(chosen))
         plt.show()
 
+    return newData, indices
+
+
+
 
-    return new_series, indices
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 467a212..6e5e7ea 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -47,12 +47,15 @@ def actuallyRun(typ='AML32', condition = 'moving'):
 
 
     #original data parameters
-    dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
-            'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data
-            'rotate':False, # rotate Eigenworms using previously calculated rotation matrix
+    dataPars = {'medianWindow':50,  # smooth eigenworms with gauss filter of that size, must be odd
+            'gaussWindow':100,  # gauss window for angle velocity derivative. Acts on full (50Hz) data
+            'rotate':False,  # rotate Eigenworms using previously calculated rotation matrix
             'windowGCamp': 6,  # gauss window for red and green channel
-            'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data
-            }
+            'interpolateNans': 6,  #interpolate gaps smaller than this of nan values in calcium data
+
+            'volumeAcquisitionRate': 6.,  # rate at which volumes are acquired
+
+                }
 
 
     dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars)

From c49bf909669a682a85480d3922c71788d50b0f44 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 22 Nov 2019 13:47:25 -0500
Subject: [PATCH 23/25] Fixed algebra error in photobleaching correction

Previously the photobleaching correction was failing by producing
traces that blew up towards the latter half of the trace.

The bug was an algebra error. To fit the exponent, at Francesco's
suggestion, I rescaled the origional traces so that it was on the range
of zero to one. This dramatically improves fitting performance and
efficiency. So then I had to rescale the fitting parameters back.

Previously I had been only scaling the a scaler in y=a exp(-bx)+ c.
But of course c had been scaled to. So now I also rescale c. And that
fixes everything.

ADDITONAL:
fixed a bug about using max instead of nanmax in the fitting routines.
---
 prediction/dataHandler.py     | 57 +++++++++++++++++++++++------------
 prediction/metaRunAnalysis.py |  2 +-
 2 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index d939dec..001e8f0 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -400,11 +400,11 @@ def loadData(folder, dataPars, ew=1):
         vps = dataPars['volumeAcquisitionRate']
 
         #remove outliers with a sliding window of 40 volumes (or 6 seconds)
-        R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))
-        G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))
+        R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))[0]
+        G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))[0]
 
-        R = correctPhotobleaching(rRaw, vps)
-        G = correctPhotobleaching(gRaw, vps)
+        R = correctPhotobleaching(R, vps)
+        G = correctPhotobleaching(G, vps)
 
 
@@ -604,16 +604,21 @@ def correctPhotobleaching(raw, vps=6):
 
     photoCorr = np.zeros_like(raw)  # initialize photoCorr
 
-    showPlots = False
+    debug = False
+
+    performMedfilt  = True
 
 
-    if raw.ndim == 2:
 
-        # perform median filtering (Nan friendly)
-        from scipy.signal import medfilt
-        smoothed = medfilt(raw, [1, medfilt_window])
+    if raw.ndim == 2:
+        if performMedfilt:
+            # perform median filtering (Nan friendly)
+            from scipy.signal import medfilt
+            smoothed = medfilt(raw, [1, medfilt_window])
+        else:
+            smoothed = raw.copy()
 
-        N_neurons = smoothed.shape[0]
+        N_neurons = raw.shape[0]
 
         # initialize the exponential fit parameter outputs
         popt = np.zeros([N_neurons, 3])
@@ -624,13 +629,19 @@ def correctPhotobleaching(raw, vps=6):
             popt[row, :], pcov[:, :, row], xVals = fitPhotobleaching(smoothed[row, :], vps)
             photoCorr[row, :] = popt[row, 0] * raw[row, :] / expfunc(xVals, *popt[row, :])
 
+            showPlots = False
+            if debug:
+                if np.random.rand() > 0.85:
+                    showPlots = True
+
             if showPlots:
                 import matplotlib.pyplot as plt
-                plt.plot(xVals, smoothed[row, :], 'b-', label=['raw, row: '+np.str(row)])
+                plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)])
                 plt.plot(xVals, photoCorr[row, :], "r-",
                          label=['photoCorr, row: '+np.str(row)])
                 plt.xlabel('Time (s)')
                 plt.ylabel('ActivityTrace')
+                plt.title('correctPhotobleaching()')
                 plt.legend()
                 plt.show()
 
@@ -670,32 +681,36 @@ def fitPhotobleaching(activityTrace, vps):
     from scipy.optimize import curve_fit
 
     # set up some bounds on our exponential fitting parameter, y=a*exp(bx)+c
-    num_recordingLengths = 6  # the timescale of exponential decay should not be more than six recording lengths long
+    num_recordingLengths = 8 # the timescale of exponential decay should not be more than eight recording lengths long
     # because otherwise we could have recorded for way longer!!
 
     #Scale the activity trace to somethign around one.
     scaleFactor=np.nanmean(activityTrace)
     activityTrace=activityTrace/scaleFactor
 
-    bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0],  # lower bounds
-              [max(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)])  # upper bound
+    bounds = ([0, 1 / (num_recordingLengths * np.nanmax(xVals)), 0],  # lower bounds
+              [np.nanmax(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)])  # upper bound
 
     # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average
-    popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)]
+    popt_guess = [np.nanmax(activityTrace) / 2, 2 / np.nanmax(xVals), np.nanmean(activityTrace)]
 
     popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess,
                            bounds=bounds)
 
-    showPlots = False
+    if np.random.rand()>0.9:
+        showPlots = True
+    else:
+        showPlots = False
+
     if showPlots:
         import matplotlib.pyplot as plt
+        plt.cla()
         plt.plot(xVals, activityTrace, 'b-', label='data')
         plt.plot(xVals, expfunc(xVals, *popt), "r-",
                  label='fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
         plt.xlabel('Time (s)')
         plt.ylabel('ActivityTrace')
         plt.legend()
-        plt.show()
 
     ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those
     residual = activityTrace - expfunc(xVals, *popt)  # its ok to have nan's here
@@ -715,12 +730,16 @@ def fitPhotobleaching(activityTrace, vps):
         popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt)
 
     if showPlots:
+        import matplotlib.pyplot as plt
         plt.plot(xVals, expfunc(xVals, *popt), 'y-',
                  label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))
         plt.legend()
 
-    #rescale back to full size
+    #rescale the amplitude, a, back to full size
     popt[0]=scaleFactor*popt[0]
+
+    #rescale the c parameter
+    popt[2]=scaleFactor*popt[2]
     return popt, pcov, xVals
 
 
@@ -767,7 +786,7 @@ def nanOutliers(data, window_size, n_sigmas=3):
     newData = np.array(data, dtype=float)
     newData[indices] = np.nan
 
-    Debug = True
+    Debug = False
     if Debug:
         chosen = np.squeeze( np.round(np.random.rand(1) * M-1).astype(int))
         import matplotlib.pyplot as plt
diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index 9a3d07b..54436b3 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -11,7 +11,7 @@
 import runAnalysis as analysis
 
 analysis.actuallyRun('AML32','moving')
-if False:
+if True:
     analysis.actuallyRun('AML18','moving')
     analysis.actuallyRun('AML70','moving')
     analysis.actuallyRun('AML175','moving')

From 353b98aa9cc10e56ec8a25153408a7a939c8f525 Mon Sep 17 00:00:00 2001
From: Andrew Leifer <leifer@princeton.edu>
Date: Fri, 22 Nov 2019 15:21:26 -0500
Subject: [PATCH 24/25] Debugging: python photobleaching and nan-ing aversely
 affects R^2

moving photobleaching correiton and outlier detectiong and naning
from Jeff's matlab code to python appears to decrease velocity
decoding performance of our marquee dataset in fig 2v3. (but strangely
decoding performance of turning is preserved).

In previous commits, photobleaching correciton had been done according
to Xiaowen's method in the Phys Rev E paper (division not subtraction).

Visual inspection shows no obvious difference between the old matlab
based photoblecahign correciotn and the new division approach in python.
If anything, the new approach does a better job preserving variance
across time.

Remains unresolved why performance suffers.

One hypothesis: maybe I am currently being less agressive about nan'ing
entire columns then Jeff's matlab pipeline? Need to check.
---
 prediction/dataHandler.py     | 41 +++++++++++++++++++++++++++++------
 prediction/metaRunAnalysis.py |  2 +-
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py
index 001e8f0..0991f45 100755
--- a/prediction/dataHandler.py
+++ b/prediction/dataHandler.py
@@ -390,22 +390,49 @@ def loadData(folder, dataPars, ew=1):
     rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))]
     gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))]
 
+    rphotocorr = np.array(data['rPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))]
+    gphotocorr = np.array(data['gPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))]
 
     #load neural data
-    original=False
+    debug = True
+    original = False
     if original:
-        R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
-        G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))]
+        R = np.copy(rphotocorr)
+        G = np.copy(gphotocorr)
     else:
+        R = rRaw.copy()
+        G = gRaw.copy()
         vps = dataPars['volumeAcquisitionRate']
 
         #remove outliers with a sliding window of 40 volumes (or 6 seconds)
-        R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))[0]
-        G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))[0]
+        R = nanOutliers(R, np.round(2*3.3*vps).astype(int))[0]
+        G = nanOutliers(G, np.round(2*3.3*vps).astype(int))[0]
 
         R = correctPhotobleaching(R, vps)
         G = correctPhotobleaching(G, vps)
 
+        if debug:
+
+            nplots=2
+            chosen = np.round(np.random.rand(nplots)*R.shape[0]).astype(int)
+            import matplotlib.pyplot as plt
+            plt.cla()
+            plt.subplot(nplots, 1, 1,)
+            plt.plot(G[chosen[0],:],'g', label='gRaw w/ python photocorrection')
+            plt.plot(gphotocorr[chosen[0],:],'b', label='gPhotoCorr via matlab')
+            plt.title('Comparsion of Green Channel photocorrection methods for neuron ' + str(chosen[0]))
+            plt.legend()
+
+            plt.subplot(nplots,1,2)
+            plt.plot(R[chosen[0], :], 'r', label='rRaw w/ python photocorrection')
+            plt.plot(rphotocorr[chosen[0], :], 'm', label='rPhotoCorr via matlab')
+            plt.title('Comparsion of Red Channel photocorrection methods for neuron ' + str(chosen[0]))
+            plt.legend()
+            plt.show()
+
+
+
+
 
 
     #
@@ -696,8 +723,8 @@ def fitPhotobleaching(activityTrace, vps):
 
     popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess,
                            bounds=bounds)
-
-    if np.random.rand()>0.9:
+    debug = False
+    if np.logical_and(np.random.rand() > 0.9, debug):
         showPlots = True
     else:
         showPlots = False
diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py
index 54436b3..9a3d07b 100644
--- a/prediction/metaRunAnalysis.py
+++ b/prediction/metaRunAnalysis.py
@@ -11,7 +11,7 @@
 import runAnalysis as analysis
 
 analysis.actuallyRun('AML32','moving')
-if True:
+if False:
     analysis.actuallyRun('AML18','moving')
     analysis.actuallyRun('AML70','moving')
     analysis.actuallyRun('AML175','moving')

From eea8b8277d1dcaad247468c3a468a4d406e1aaf1 Mon Sep 17 00:00:00 2001
From: Monika Scholz <monika.k.scholz@gmail.com>
Date: Fri, 13 Dec 2019 09:30:49 +0100
Subject: [PATCH 25/25] add automatic detection of user and modify all imports
 to conform

---
 figures/main/fig1_figManifold.py       | 13 ++++++++-----
 figures/main/fig1_figManifoldMoving.py | 10 ++++++----
 figures/main/fig1v3.py                 | 10 ++++++----
 figures/main/fig2_expVarslm.py         | 11 +++++++----
 figures/main/fig2v3.py                 | 11 +++++++----
 figures/main/fig3v3.py                 | 11 ++++++-----
 figures/main/fig4v3.py                 | 11 ++++++-----
 figures/supp/S1.py                     | 12 +++++++-----
 figures/supp/S10.py                    | 13 +++++++------
 figures/supp/S2.py                     | 10 ++++++----
 figures/supp/S3.py                     | 10 ++++++----
 figures/supp/S4.py                     | 10 ++++++----
 figures/supp/S5.py                     | 11 +++++++----
 figures/supp/S6.py                     | 11 +++++++----
 figures/supp/S7_nStability.py          | 11 +++++++----
 figures/supp/S8_lagplots.py            | 11 +++++++----
 figures/supp/S9_fitTransition.py       | 10 ++++++----
 prediction/dataQualityCheck.py         | 11 +++++++----
 prediction/plotAnalysis.py             | 12 +++++++-----
 prediction/print Metadata.py           | 13 +++++++------
 prediction/runAnalysis.py              | 11 +++++++----
 prediction/sandbox.py                  | 13 ++++++++++++-
 prediction/userTracker.py              | 15 +++++++++++++++
 23 files changed, 167 insertions(+), 94 deletions(-)
 create mode 100644 prediction/userTracker.py

diff --git a/figures/main/fig1_figManifold.py b/figures/main/fig1_figManifold.py
index 2dd28c5..a3b35f8 100644
--- a/figures/main/fig1_figManifold.py
+++ b/figures/main/fig1_figManifold.py
@@ -8,10 +8,12 @@
 import numpy as np
 import matplotlib as mpl
 from sklearn.feature_selection import mutual_info_classif
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 # suddenly this isn't imported from stylesheet anymore...
@@ -29,10 +31,11 @@
 data = {}
 for typ in ['AML18', 'AML32']:
     for condition in ['immobilized']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
@@ -140,4 +143,4 @@
 
     
     #ax.view_init(elev=-40, azim=90)
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/figures/main/fig1_figManifoldMoving.py b/figures/main/fig1_figManifoldMoving.py
index 3f00fe1..e96bd02 100644
--- a/figures/main/fig1_figManifoldMoving.py
+++ b/figures/main/fig1_figManifoldMoving.py
@@ -12,6 +12,7 @@
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 # suddenly this isn't imported from stylesheet anymore...
@@ -29,10 +30,11 @@
 data = {}
 for typ in ['AML18', 'AML32']:
     for condition in ['moving']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
diff --git a/figures/main/fig1v3.py b/figures/main/fig1v3.py
index c30241b..87ee010 100644
--- a/figures/main/fig1v3.py
+++ b/figures/main/fig1v3.py
@@ -26,6 +26,7 @@
 #import singlePanels as sp
 #import makePlots as mp
 import prediction.dataHandler as dh
+from prediction import userTracker
 
 # suddenly this isn't imported from stylesheet anymore...
 mpl.rcParams["axes.labelsize"] = 14
@@ -42,10 +43,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']:
     for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/main/fig2_expVarslm.py b/figures/main/fig2_expVarslm.py
index c42b83f..a323c87 100644
--- a/figures/main/fig2_expVarslm.py
+++ b/figures/main/fig2_expVarslm.py
@@ -7,6 +7,7 @@
 """
 import numpy as np
 import matplotlib as mpl
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
@@ -14,6 +15,7 @@
 #
 #import makePlots as mp
 from prediction import dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 from scipy.stats import pearsonr
@@ -34,10 +36,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML175', 'AML70']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py
index e01ff3a..d921021 100755
--- a/figures/main/fig2v3.py
+++ b/figures/main/fig2v3.py
@@ -7,6 +7,7 @@
 """
 import numpy as np
 import matplotlib as mpl
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
@@ -14,6 +15,7 @@
 #
 #import makePlots as mp
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 from scipy.stats import pearsonr
@@ -33,10 +35,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML175', 'AML70']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/main/fig3v3.py b/figures/main/fig3v3.py
index 3b9a042..2c08550 100755
--- a/figures/main/fig3v3.py
+++ b/figures/main/fig3v3.py
@@ -16,7 +16,7 @@
 #from matplotlib_venn import venn2
 from scipy.cluster.hierarchy import dendrogram, linkage, fcluster, set_link_color_palette
 import prediction.dataHandler as dh
-
+from prediction import userTracker
 from prediction.stylesheet import *
 from prediction.pycpd import deformable_registration, rigid_registration
 
@@ -54,10 +54,11 @@ def registerWorms(R, X, dim=3, nr = True):
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/main/fig4v3.py b/figures/main/fig4v3.py
index c23e6a5..5a8509d 100755
--- a/figures/main/fig4v3.py
+++ b/figures/main/fig4v3.py
@@ -22,7 +22,7 @@
 #
 
 import prediction.dataHandler as dh
-
+from prediction import userTracker
 from prediction.stylesheet import *
 # stats
 from sklearn.metrics import r2_score,mean_squared_error
@@ -93,10 +93,11 @@ def main():
     data = {}
     for typ in ['AML32', 'AML70']:
         for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-            folder = '../../{}_{}/'.format(typ, condition)
-            dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-            outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-            outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+            path = userTracker.dataPath()
+            folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+            dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+            outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+            outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
             
             try:
                 # load multiple datasets
diff --git a/figures/supp/S1.py b/figures/supp/S1.py
index c97a8e7..a5ea76c 100755
--- a/figures/supp/S1.py
+++ b/figures/supp/S1.py
@@ -17,6 +17,7 @@
 from scipy.ndimage.filters import gaussian_filter1d
 import matplotlib.ticker as mtick
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 # suddenly this isn't imported from stylesheet anymore...
@@ -34,11 +35,12 @@
 data = {}
 for typ in ['AML18', 'AML175', 'Special']:
     for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
-        
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
+            
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
diff --git a/figures/supp/S10.py b/figures/supp/S10.py
index 76e077f..303db84 100755
--- a/figures/supp/S10.py
+++ b/figures/supp/S10.py
@@ -23,7 +23,7 @@
 #import svgutils as svg
 #
 import prediction.dataHandler as dh
-
+from prediction import userTracker
 from prediction.stylesheet import *
 from prediction.pycpd import deformable_registration, rigid_registration
 
@@ -59,11 +59,12 @@ def registerWorms(Xref, X, dim=3):
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
-        
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
+            
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
diff --git a/figures/supp/S2.py b/figures/supp/S2.py
index f020944..3d4f6a2 100755
--- a/figures/supp/S2.py
+++ b/figures/supp/S2.py
@@ -16,6 +16,7 @@
 from scipy.ndimage.filters import gaussian_filter1d
 import matplotlib.ticker as mtick
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 # suddenly this isn't imported from stylesheet anymore...
@@ -33,10 +34,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']:
     for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/supp/S3.py b/figures/supp/S3.py
index 5107074..71f0569 100755
--- a/figures/supp/S3.py
+++ b/figures/supp/S3.py
@@ -17,6 +17,7 @@
 from scipy.ndimage.filters import gaussian_filter1d
 import matplotlib.ticker as mtick
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 
@@ -35,10 +36,11 @@
 data = {}
 for typ in ['AML32',  'Special']:
     for condition in ['immobilized', 'transition']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/supp/S4.py b/figures/supp/S4.py
index abfc9b2..f4cdac7 100755
--- a/figures/supp/S4.py
+++ b/figures/supp/S4.py
@@ -16,6 +16,7 @@
 import matplotlib.ticker as mtick
 from scipy.stats import ttest_ind
 from prediction import dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 
@@ -65,10 +66,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175']:
     for condition in [ 'chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']:
-        folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
-        dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/supp/S5.py b/figures/supp/S5.py
index afbcd3f..e57fd7f 100755
--- a/figures/supp/S5.py
+++ b/figures/supp/S5.py
@@ -8,10 +8,12 @@
 import numpy as np
 import matplotlib as mpl
 from sklearn.feature_selection import mutual_info_classif
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 # suddenly this isn't imported from stylesheet anymore...
@@ -29,10 +31,11 @@
 data = {}
 for typ in ['AML32', 'AML18', 'AML70', 'AML175']:
     for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/supp/S6.py b/figures/supp/S6.py
index a82bf4d..520297f 100644
--- a/figures/supp/S6.py
+++ b/figures/supp/S6.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import matplotlib as mpl
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
@@ -15,6 +16,7 @@
 #
 #import makePlots as mp
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 from scipy.stats import pearsonr
@@ -34,10 +36,11 @@
 data = {}
 for typ in ['AML32', 'AML70']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
diff --git a/figures/supp/S7_nStability.py b/figures/supp/S7_nStability.py
index 437deb9..242b9e6 100644
--- a/figures/supp/S7_nStability.py
+++ b/figures/supp/S7_nStability.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import matplotlib as mpl
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
@@ -15,6 +16,7 @@
 #
 #import makePlots as mp
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 
@@ -76,10 +78,11 @@ def findBounds(x, y, ybound):
 data = {}
 for typ in ['AML32', 'AML70']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/figures/supp/S8_lagplots.py b/figures/supp/S8_lagplots.py
index 9d107e5..d18f336 100644
--- a/figures/supp/S8_lagplots.py
+++ b/figures/supp/S8_lagplots.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import matplotlib as mpl
+import os
 #
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
@@ -15,6 +16,7 @@
 #
 #import makePlots as mp
 import prediction.dataHandler as dh
+from prediction import userTracker
 # deliberate import all!
 from prediction.stylesheet import *
 from scipy.stats import pearsonr
@@ -34,10 +36,11 @@
 data = {}
 for typ in ['AML32', 'AML70']:
     for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         try:
             # load multiple datasets
             dataSets = dh.loadDictFromHDF(outLocData)
diff --git a/figures/supp/S9_fitTransition.py b/figures/supp/S9_fitTransition.py
index ee76056..864d18c 100644
--- a/figures/supp/S9_fitTransition.py
+++ b/figures/supp/S9_fitTransition.py
@@ -17,6 +17,7 @@
 import matplotlib.gridspec as gridspec
 from scipy.ndimage.filters import gaussian_filter1d
 import prediction.dataHandler as dh
+from prediction import userTracker
 from sklearn import linear_model
 from sklearn import preprocessing
 
@@ -35,10 +36,11 @@
 data = {}
 for typ in ['Special']:
     for condition in ['transition']:# ['moving', 'immobilized', 'chip']:
-        folder = '../../{}_{}/'.format(typ, condition)
-        dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         try:
             # load multiple datasets
diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py
index 88762b5..e4626c7 100755
--- a/prediction/dataQualityCheck.py
+++ b/prediction/dataQualityCheck.py
@@ -4,10 +4,12 @@
 import numpy as np
 import matplotlib.pylab as plt
 import h5py
+import os
 # custom modules 
 import dataHandler as dh
 import makePlots as mp
 import dimReduction as dr
+from prediction import userTracker
 #
 mpl.rcParams['interactive']  = False
 ###############################################    
@@ -23,10 +25,11 @@
 #    load data into dictionary
 #
 ##############################################
-folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
-dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
-outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+path = userTracker.dataPath()
+folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
 
 # data parameters
 dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
diff --git a/prediction/plotAnalysis.py b/prediction/plotAnalysis.py
index 721afed..ba6ed7b 100755
--- a/prediction/plotAnalysis.py
+++ b/prediction/plotAnalysis.py
@@ -3,11 +3,12 @@
 import numpy as np
 import matplotlib.pylab as plt
 import h5py
+import os
 # custom modules 
 import dataHandler as dh
 import makePlots as mp
 import dimReduction as dr
-
+from prediction import userTracker
     
 
 ###############################################    
@@ -26,10 +27,11 @@
 data = {}
 for typ in ['AML32', 'AML70']:
     for condition in ['moving', 'immobilized']:
-        folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
-        dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         
         
         # load multiple datasets
diff --git a/prediction/print Metadata.py b/prediction/print Metadata.py
index ef5d33c..c9dc415 100755
--- a/prediction/print Metadata.py	
+++ b/prediction/print Metadata.py	
@@ -7,8 +7,8 @@
 import dataHandler as dh
 import numpy as np
 import pprint
-
-
+import os
+from prediction import userTracker
 ################################################
 #
 # grab all the data we will need
@@ -50,10 +50,11 @@ def dump_keys(d, lvl=0):
 
 for typ in ['Special', 'AML32', 'AML18', 'AML175', 'AML70']:
     for condition in ['transition','immobilized','moving', 'chip']:
-        folder = '{}_{}/'.format(typ, condition)
-        dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-        outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition)
-        outLocData = "Analysis/{}_{}.hdf5".format(typ, condition)
+        path = userTracker.dataPath()
+        folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+        dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+        outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+        outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
         #print strains[typ], conditions[condition]
         try:
             # load multiple datasets
diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py
index 6e5e7ea..bc92b08 100755
--- a/prediction/runAnalysis.py
+++ b/prediction/runAnalysis.py
@@ -4,10 +4,12 @@
 import numpy as np
 import matplotlib.pylab as plt
 import h5py
+import os
 # custom modules 
 import dataHandler as dh
 import makePlots as mp
 import dimReduction as dr
+from prediction import userTracker
 
 ###############################################    
 # 
@@ -31,10 +33,11 @@ def actuallyRun(typ='AML32', condition = 'moving'):
     #    load data into dictionary
     #
     ##############################################
-    folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition)
-    dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)
-    outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition)
-    outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition)
+    path = userTracker.dataPath()
+    folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+    dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+    outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+    outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
 
     # data parameters
     dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd
diff --git a/prediction/sandbox.py b/prediction/sandbox.py
index 3d10a07..9fbda2b 100755
--- a/prediction/sandbox.py
+++ b/prediction/sandbox.py
@@ -6,6 +6,17 @@
 @author: mscholz
 """
 
+import os
+from prediction import userTracker
+path = userTracker.dataPath()
+print path
+folder = os.path.join(path, '{}_{}/'.format(typ, condition))
+dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition))
+outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition))
+outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition))
+print folder
+
+
 import numpy as np
 #import fourier_vec as fourier
 import matplotlib.pyplot as plt
@@ -795,4 +806,4 @@ def loadAtlas(dim3=False):
 ##
 ## projection on behavioral axes
 ##
-######################################
\ No newline at end of file
+######################################
diff --git a/prediction/userTracker.py b/prediction/userTracker.py
new file mode 100644
index 0000000..b7cb238
--- /dev/null
+++ b/prediction/userTracker.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+
+"""detect which user is using the machine and change the path to the data accordingly."""
+import socket
+
+def dataPath():
+	"""hardcoded or automatic detection of the users' data path."""
+	host = socket.gethostname()
+	if host== 'subsitute for andys computer name':
+		path = '/Users/leifer/workspace/PredictionCode/'
+	elif host=='nif3004':
+		path = '/media/scholz_la/hd2/Data/Data'
+	else:
+		raise Exception('This is not a known host. Edit util/userTracker.py to add your hostname and data paths.')
+	return path
\ No newline at end of file