diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0fe2c40 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +build/ diff --git a/figures/main/fig1_figManifold.py b/figures/main/fig1_figManifold.py index 2dd28c5..a3b35f8 100644 --- a/figures/main/fig1_figManifold.py +++ b/figures/main/fig1_figManifold.py @@ -8,10 +8,12 @@ import numpy as np import matplotlib as mpl from sklearn.feature_selection import mutual_info_classif +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +31,11 @@ data = {} for typ in ['AML18', 'AML32']: for condition in ['immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) @@ -140,4 +143,4 @@ #ax.view_init(elev=-40, azim=90) -plt.show() \ No newline at end of file +plt.show() diff --git a/figures/main/fig1_figManifoldMoving.py b/figures/main/fig1_figManifoldMoving.py index 3f00fe1..e96bd02 100644 --- a/figures/main/fig1_figManifoldMoving.py +++ b/figures/main/fig1_figManifoldMoving.py @@ -12,6 +12,7 @@ import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +30,11 @@ data = {} for typ in ['AML18', 'AML32']: for condition in ['moving']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/main/fig1v3.py b/figures/main/fig1v3.py index c30241b..87ee010 100644 --- a/figures/main/fig1v3.py +++ b/figures/main/fig1v3.py @@ -26,6 +26,7 @@ #import singlePanels as sp #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # suddenly this isn't imported from stylesheet anymore... mpl.rcParams["axes.labelsize"] = 14 @@ -42,10 +43,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig2_expVarslm.py b/figures/main/fig2_expVarslm.py index c42b83f..a323c87 100644 --- a/figures/main/fig2_expVarslm.py +++ b/figures/main/fig2_expVarslm.py @@ -7,6 +7,7 @@ """ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -14,6 +15,7 @@ # #import makePlots as mp from prediction import dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML175', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py index 06ca0cd..d921021 100755 --- a/figures/main/fig2v3.py +++ b/figures/main/fig2v3.py @@ -7,6 +7,7 @@ """ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -14,6 +15,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -33,10 +35,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML175', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets @@ -218,6 +221,7 @@ ax7.text(-140, 0, 'Velocity\n(rad/s)', color = R1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center') ax8.text(-160, 0, 'Body \n curvature \n(a.u.)', color = B1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center') + # move axis to the right ax7.yaxis.tick_right() ax8.yaxis.tick_right() @@ -493,6 +497,11 @@ plt.show() # get all the weights for the different samples + +import prediction.provenance as prov +prov.stamp(axV,.1,.3) + + #for typ, colors, ax in zip(['AML32', 'AML18'], [colorsExp, colorCtrl], [ax11, ax12]): # for condition in ['moving', 'immobilized']: # key = '{}_{}'.format(typ, condition) diff --git a/figures/main/fig3v3.py b/figures/main/fig3v3.py index 3b9a042..2c08550 100755 --- a/figures/main/fig3v3.py +++ b/figures/main/fig3v3.py @@ -16,7 +16,7 @@ #from matplotlib_venn import venn2 from scipy.cluster.hierarchy import dendrogram, linkage, fcluster, set_link_color_palette import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * from prediction.pycpd import deformable_registration, rigid_registration @@ -54,10 +54,11 @@ def registerWorms(R, X, dim=3, nr = True): data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig4v3.py b/figures/main/fig4v3.py index c23e6a5..5a8509d 100755 --- a/figures/main/fig4v3.py +++ b/figures/main/fig4v3.py @@ -22,7 +22,7 @@ # import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * # stats from sklearn.metrics import r2_score,mean_squared_error @@ -93,10 +93,11 @@ def main(): data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S1.py b/figures/supp/S1.py index c97a8e7..a5ea76c 100755 --- a/figures/supp/S1.py +++ b/figures/supp/S1.py @@ -17,6 +17,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -34,11 +35,12 @@ data = {} for typ in ['AML18', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) - + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) + try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S10.py b/figures/supp/S10.py index 76e077f..303db84 100755 --- a/figures/supp/S10.py +++ b/figures/supp/S10.py @@ -23,7 +23,7 @@ #import svgutils as svg # import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * from prediction.pycpd import deformable_registration, rigid_registration @@ -59,11 +59,12 @@ def registerWorms(Xref, X, dim=3): data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) - + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) + try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S2.py b/figures/supp/S2.py index f020944..3d4f6a2 100755 --- a/figures/supp/S2.py +++ b/figures/supp/S2.py @@ -16,6 +16,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -33,10 +34,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S3.py b/figures/supp/S3.py index 5107074..71f0569 100755 --- a/figures/supp/S3.py +++ b/figures/supp/S3.py @@ -17,6 +17,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -35,10 +36,11 @@ data = {} for typ in ['AML32', 'Special']: for condition in ['immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S4.py b/figures/supp/S4.py index 2ade581..f4cdac7 100755 --- a/figures/supp/S4.py +++ b/figures/supp/S4.py @@ -16,6 +16,7 @@ import matplotlib.ticker as mtick from scipy.stats import ttest_ind from prediction import dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -64,11 +65,12 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: - for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + for condition in [ 'chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S5.py b/figures/supp/S5.py index afbcd3f..e57fd7f 100755 --- a/figures/supp/S5.py +++ b/figures/supp/S5.py @@ -8,10 +8,12 @@ import numpy as np import matplotlib as mpl from sklearn.feature_selection import mutual_info_classif +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +31,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S6.py b/figures/supp/S6.py index a82bf4d..520297f 100644 --- a/figures/supp/S6.py +++ b/figures/supp/S6.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S7_nStability.py b/figures/supp/S7_nStability.py index 437deb9..242b9e6 100644 --- a/figures/supp/S7_nStability.py +++ b/figures/supp/S7_nStability.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -76,10 +78,11 @@ def findBounds(x, y, ybound): data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S8_lagplots.py b/figures/supp/S8_lagplots.py index 9d107e5..d18f336 100644 --- a/figures/supp/S8_lagplots.py +++ b/figures/supp/S8_lagplots.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S9_fitTransition.py b/figures/supp/S9_fitTransition.py index ee76056..864d18c 100644 --- a/figures/supp/S9_fitTransition.py +++ b/figures/supp/S9_fitTransition.py @@ -17,6 +17,7 @@ import matplotlib.gridspec as gridspec from scipy.ndimage.filters import gaussian_filter1d import prediction.dataHandler as dh +from prediction import userTracker from sklearn import linear_model from sklearn import preprocessing @@ -35,10 +36,11 @@ data = {} for typ in ['Special']: for condition in ['transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/prediction/__init__.pyc b/prediction/__init__.pyc index 1d4d504..7e7069e 100644 Binary files a/prediction/__init__.pyc and b/prediction/__init__.pyc differ diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 911f46c..0991f45 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -4,6 +4,7 @@ load and compare multiple data sets for behavior prediction. @author: monika scholz """ +from __future__ import division # make division give you floats, as is standard for python3 import scipy.io import os import numpy as np @@ -257,8 +258,11 @@ def transformEigenworms(pcs, dataPars): for pcindex, pc in enumerate(pcs): # mask nans by linearly interpolating mask = np.isnan(pc1) + if np.any(mask): pc[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), pc1[~mask]) + import warnings + warnings.warn('Found ' + str(np.sum(mask)) + 'NaNs that are being interpolated over in the behavior PC1, PC2 or PC3.') pcs[pcindex] = pc theta = np.unwrap(np.arctan2(pcs[2], pcs[1])) @@ -318,7 +322,8 @@ def preprocessNeuralData(R, G, dataPars): # smooth with GCamp6 halftime = 1s RS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in R]) - GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G]) + GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G]) + print("windowGCaMP:", str(dataPars['windowGCamp'])) # YR = GS/RS ## meansubtract = False#False#True @@ -381,10 +386,55 @@ def loadData(folder, dataPars, ew=1): ethomask = np.isnan(etho) if np.any(ethomask): etho[ethomask] = 0 - + + rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))] + gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))] + + rphotocorr = np.array(data['rPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))] + gphotocorr = np.array(data['gPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))] + #load neural data - R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] - G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] + debug = True + original = False + if original: + R = np.copy(rphotocorr) + G = np.copy(gphotocorr) + else: + R = rRaw.copy() + G = gRaw.copy() + vps = dataPars['volumeAcquisitionRate'] + + #remove outliers with a sliding window of 40 volumes (or 6 seconds) + R = nanOutliers(R, np.round(2*3.3*vps).astype(int))[0] + G = nanOutliers(G, np.round(2*3.3*vps).astype(int))[0] + + R = correctPhotobleaching(R, vps) + G = correctPhotobleaching(G, vps) + + if debug: + + nplots=2 + chosen = np.round(np.random.rand(nplots)*R.shape[0]).astype(int) + import matplotlib.pyplot as plt + plt.cla() + plt.subplot(nplots, 1, 1,) + plt.plot(G[chosen[0],:],'g', label='gRaw w/ python photocorrection') + plt.plot(gphotocorr[chosen[0],:],'b', label='gPhotoCorr via matlab') + plt.title('Comparsion of Green Channel photocorrection methods for neuron ' + str(chosen[0])) + plt.legend() + + plt.subplot(nplots,1,2) + plt.plot(R[chosen[0], :], 'r', label='rRaw w/ python photocorrection') + plt.plot(rphotocorr[chosen[0], :], 'm', label='rPhotoCorr via matlab') + plt.title('Comparsion of Red Channel photocorrection methods for neuron ' + str(chosen[0])) + plt.legend() + plt.show() + + + + + + # Ratio = np.array(data['Ratio2'])[:,:len(np.array(data['hasPointsTime']))] Y, dR, GS, RS, RM = preprocessNeuralData(R, G, dataPars) @@ -557,3 +607,224 @@ def loadDictFromHDF(filePath): f.close() return d +def correctPhotobleaching(raw, vps=6): + """ Apply photobleaching correction to raw signals, works on a whole heatmap of neurons + + Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off + Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3 + Chen, Randi, Leifer and Bialek, Phys Rev E 2019 + + takes raw neural signals N (rows) neruons x T (cols) time points + we require that each neuron is a row and that there are more time points than rows + + """ + + # Make sure the data we are getting makes sense. + assert raw.ndim <= 2, "raw fluorescent traces passed to correctPhotobleaching() have the wrong number of dimensions" + + if raw.ndim == 2: + assert raw.shape[1] > raw.shape[ + 0], "raw fluorescent traces passed to correctPhotobleaching() have the wrong size! Perhaps it is transposed?" + + window_s = 12.6 # time of median filter window in seconds + medfilt_window = np.int(np.ceil(window_s * vps / 2.) * 2 + 1) # make it an odd number of frames + + photoCorr = np.zeros_like(raw) # initialize photoCorr + + debug = False + + performMedfilt = True + + + + if raw.ndim == 2: + if performMedfilt: + # perform median filtering (Nan friendly) + from scipy.signal import medfilt + smoothed = medfilt(raw, [1, medfilt_window]) + else: + smoothed = raw.copy() + + N_neurons = raw.shape[0] + + # initialize the exponential fit parameter outputs + popt = np.zeros([N_neurons, 3]) + pcov = np.zeros([3, 3, N_neurons]) + + # Exponential Fit on each neuron + for row in np.arange(N_neurons): + popt[row, :], pcov[:, :, row], xVals = fitPhotobleaching(smoothed[row, :], vps) + photoCorr[row, :] = popt[row, 0] * raw[row, :] / expfunc(xVals, *popt[row, :]) + + showPlots = False + if debug: + if np.random.rand() > 0.85: + showPlots = True + + if showPlots: + import matplotlib.pyplot as plt + plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)]) + plt.plot(xVals, photoCorr[row, :], "r-", + label=['photoCorr, row: '+np.str(row)]) + plt.xlabel('Time (s)') + plt.ylabel('ActivityTrace') + plt.title('correctPhotobleaching()') + plt.legend() + plt.show() + + elif raw.ndim == 1: + smoothed = medfilt(raw, medfilt_window) + popt, pcov, xVals = fitPhotobleaching(smoothed, vps) + photoCorr= popt[0] * raw / expfunc(xVals, *popt) + + else: + raise ValueError('The number dimensions of the data in correctPhotobleaching are not 1 or 2') + + return photoCorr + + + +def fitPhotobleaching(activityTrace, vps): + """" + Fit an exponential. + + Accepts a single neuron's activity trace. + + Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off + Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3 + Chen, Randi, Leifer and Bialek, Phys Rev E 2019 + + + """ + assert activityTrace.ndim == 1, "fitPhotobleaching only works on single neuron traces" + + # Now we will fit an exponential, following along with the tuotiral here: + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html + xVals = np.array(np.arange(activityTrace.shape[-1])) / np.float(vps) + + # Identify just the data that is not a NaN + nonNaNs = np.logical_not(np.isnan(activityTrace)) + + from scipy.optimize import curve_fit + + # set up some bounds on our exponential fitting parameter, y=a*exp(bx)+c + num_recordingLengths = 8 # the timescale of exponential decay should not be more than eight recording lengths long + # because otherwise we could have recorded for way longer!! + + #Scale the activity trace to somethign around one. + scaleFactor=np.nanmean(activityTrace) + activityTrace=activityTrace/scaleFactor + + bounds = ([0, 1 / (num_recordingLengths * np.nanmax(xVals)), 0], # lower bounds + [np.nanmax(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)]) # upper bound + + # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average + popt_guess = [np.nanmax(activityTrace) / 2, 2 / np.nanmax(xVals), np.nanmean(activityTrace)] + + popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess, + bounds=bounds) + debug = False + if np.logical_and(np.random.rand() > 0.9, debug): + showPlots = True + else: + showPlots = False + + if showPlots: + import matplotlib.pyplot as plt + plt.cla() + plt.plot(xVals, activityTrace, 'b-', label='data') + plt.plot(xVals, expfunc(xVals, *popt), "r-", + label='fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) + plt.xlabel('Time (s)') + plt.ylabel('ActivityTrace') + plt.legend() + + ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those + residual = activityTrace - expfunc(xVals, *popt) # its ok to have nan's here + + nSigmas = 3 # we want to exclude points that are three standard deviations away from the fit + + # Make a new mask that excludes the outliers + excOutliers = np.copy(nonNaNs) + excOutliers[(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False + + # Refit excluding the outliers, use the previous fit as initial guess + # note we relax the bounds here a bit + + try: + popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt,bounds=bounds) + except: + popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt) + + if showPlots: + import matplotlib.pyplot as plt + plt.plot(xVals, expfunc(xVals, *popt), 'y-', + label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) + plt.legend() + + #rescale the amplitude, a, back to full size + popt[0]=scaleFactor*popt[0] + + #rescale the c parameter + popt[2]=scaleFactor*popt[2] + return popt, pcov, xVals + + +def expfunc(x, a, b, c): + # type: (xVals, a, b, c) -> yVals + return a * np.exp(-b * x) + c + + + + + +from skimage.util import view_as_windows + +def nanOutliers(data, window_size, n_sigmas=3): + """using a Hampel filter to detect outliers and replace them with nans. + + This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. + + The algorithm assumes the underlying series should be Gaussian. This could be changed by changing the k parameter. + Data can contain nans, these will be ignored for median calculation. + data: (M, N) array of M timeseries with N samples each. + windowsize: integer. This is half of the typical implementation. + n_sigmas: float or integer + """ + # deal with N = 1 timeseries to conform to (M=1,N) shape + if len(data.shape)<2: + data = np.reshape(data, (1,-1)) + k = 1.4826 # scale factor for Gaussian distribution + M, N = data.shape # store data shape for quick use below + # pad array to achieve at least the same size as original + paddata = np.pad(data.copy(), pad_width= [(0,0),(window_size//2,window_size//2)], \ + constant_values=(np.median(data)), mode = 'constant') + # we use strides to create rolling windows. Not nice in memory, but good in performance. + # because its meant for images, it creates some empty axes of size 1. + tmpdata = view_as_windows(paddata, (1,window_size)) + # crop data to center + tmpdata = tmpdata[:M, :N] + x0N = np.nanmedian(tmpdata, axis = (-1, -2)) + s0N =k * np.nanmedian(np.abs(tmpdata - x0N[:,:,np.newaxis, np.newaxis]), axis = (-1,-2)) + # hampel condition + hampel = (np.abs(data-x0N) - (n_sigmas*s0N)) + indices = np.where(hampel>0) + # cast to float to allow nans in output data + newData = np.array(data, dtype=float) + newData[indices] = np.nan + + Debug = False + if Debug: + chosen = np.squeeze( np.round(np.random.rand(1) * M-1).astype(int)) + import matplotlib.pyplot as plt + plt.cla() + plt.plot(data[chosen, :],'r') + plt.plot(newData[chosen, :],'b') + plt.title('nanOutlier(), neuron: ' +str(chosen)) + plt.show() + + return newData, indices + + + + diff --git a/prediction/dataHandler.pyc b/prediction/dataHandler.pyc index 79a9073..c453db3 100644 Binary files a/prediction/dataHandler.pyc and b/prediction/dataHandler.pyc differ diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py index 314120e..e4626c7 100755 --- a/prediction/dataQualityCheck.py +++ b/prediction/dataQualityCheck.py @@ -4,10 +4,12 @@ import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr +from prediction import userTracker # mpl.rcParams['interactive'] = False ############################################### @@ -16,17 +18,18 @@ # ############################################### typ = 'AML32' # possible values AML32, AML18, AML70 -condition = 'immobilized'# # Moving, immobilized, chip +condition = 'moving'# # Moving, immobilized, chip first = True # if true, create new HDF5 file ############################################### # # load data into dictionary # ############################################## -folder = '../{}_{}/'.format(typ, condition) -dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition) +path = userTracker.dataPath() +folder = os.path.join(path, '{}_{}/'.format(typ, condition)) +dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) +outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) +outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd diff --git a/prediction/dimReduction.py b/prediction/dimReduction.py index b1b5a1c..f5da67f 100755 --- a/prediction/dimReduction.py +++ b/prediction/dimReduction.py @@ -1268,7 +1268,7 @@ def predictNeuralDynamicsfromBehavior(data, splits, pars): train, test = splits[label]['Train'], splits[label]['Test'] # create dimensionality-reduced behavior - pca with 10 eigenworms cl = data['CL'] - eigenworms = dh.loadEigenBasis(filename = 'utility/Eigenworms.dat', nComp=4, new=True) + eigenworms = dh.loadEigenBasis(filename = '../utility/Eigenworms.dat', nComp=4, new=True) pcsNew, meanAngle, lengths, refPoint = dh.calculateEigenwormsFromCL(cl, eigenworms) behavior = pcsNew.T # nevermind, use the behaviors we use for lasso instead diff --git a/prediction/dimReduction.pyc b/prediction/dimReduction.pyc index bd9ea36..063ef0d 100644 Binary files a/prediction/dimReduction.pyc and b/prediction/dimReduction.pyc differ diff --git a/prediction/makePlots.pyc b/prediction/makePlots.pyc index feed789..448d7a7 100644 Binary files a/prediction/makePlots.pyc and b/prediction/makePlots.pyc differ diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py new file mode 100644 index 0000000..9a3d07b --- /dev/null +++ b/prediction/metaRunAnalysis.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Jul 10 14:14:34 2019 +This is a meta script to run runAnalysis over and over again. +@author: leifer +""" + +#### + +import runAnalysis as analysis + +analysis.actuallyRun('AML32','moving') +if False: + analysis.actuallyRun('AML18','moving') + analysis.actuallyRun('AML70','moving') + analysis.actuallyRun('AML175','moving') + analysis.actuallyRun('AML32','immobilized') + analysis.actuallyRun('AML18','immobilized') + analysis.actuallyRun('AML70','immobilized') + + analysis.actuallyRun('AML32','chip') + analysis.actuallyRun('Special','transition') #Note these are identicail.. + # I copied and pasted the input files.. + + analysis.actuallyRun('AML70','chip') + + execfile('../figures/main/fig2v3.py') + execfile('../figures/supp/S4.py') + execfile('../figures/supp/S2.py') + execfile('../figures/main/fig1v3.py') + execfile('../figures/main/fig2_expVarslm.py') diff --git a/prediction/noiseModel.py b/prediction/noiseModel.py new file mode 100644 index 0000000..0088a71 --- /dev/null +++ b/prediction/noiseModel.py @@ -0,0 +1,217 @@ +### noiseModel.py +# We suspect that the neural dynamics we observe in freely moving GCaMP worms have three components: +# 1) Neural Signals representing locomotion +# 2) Neural signals representing other biological processes (sensory processing, working memory, etc) +# 3) Noise, including from motion artifact +# +# We know for sure that (1) & (3) exist. We can decode locomotion so we know that (1) must be persent. +# And we know that noise (3) is present because we see it in the GFP moving animals. +# +# To bolster our argument that neural signals representing other biological processes are present in our recordings, +# we will attempt to reject the following null hypothesis: +# +# Null hypothesis is that ONLY neural signals representing locomotion AND noise are present. E.g. only (1) and (3) and +# not (2). +# +# +# We will build uop our null model by taking a real freely moving GFP worm that therefore has only noise (3) and we +# will synthetically generate pure locmootory signals (1) and adjust the relative proportion of noise to +# locomotory signals. +# +# We will study the predicitive performance of our decoder on this model and also the percent variance explaiend by the +# locomotory siganl to try to assess whether the null model fits our experimental observations. + + +## Find a good GFP recording to use. + +dataset = '/Users/leifer/workspace/PredictionCode/AML18_moving/BrainScanner20160506_160928_MS/heatData.mat' + +# Import the recording based on the output of Jeff's 3dBrain matlab analysis pipeline +import scipy.io as sio +mat_contents = sio.loadmat(dataset) +rPhotoCorr = mat_contents['rPhotoCorr'] +rRaw = mat_contents['rRaw'] +gPhotoCorr = mat_contents['gPhotoCorr'] +gRaw = mat_contents['gRaw'] +Ratio2 = mat_contents['Ratio2'] + +# Import Monika's ICA'd version (with my modification s.t. there is no z-scoring) +# (Note this section is copy pasted from S2.py) +import prediction.dataHandler as dh +import numpy as np +data = {} +for typ in ['AML18','AML32']: + for condition in ['moving']: + folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) + dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) + outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) + outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + + try: + # load multiple datasets + dataSets = dh.loadDictFromHDF(outLocData) + keyList = np.sort(dataSets.keys()) + results = dh.loadDictFromHDF(outLoc) + # store in dictionary by typ and condition + key = '{}_{}'.format(typ, condition) + data[key] = {} + data[key]['dsets'] = keyList + data[key]['input'] = dataSets + data[key]['analysis'] = results + except IOError: + print typ, condition, 'not found.' + pass +print 'Done reading data.' + + + + +import matplotlib.pyplot as plt +from prediction import provenance as prov + + +ordIndx=data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ordering'] + +fig1=plt.figure() +plt.subplot(4, 1, 1) +plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('rPhotoCorr') + +plt.subplot(4,1,2) +plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('gPhotoCorr') + +plt.subplot(4,1,3) +plt.imshow(np.divide(gPhotoCorr,rPhotoCorr)[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('gPhotoCorr/rPhotoCorr') + + +ax=plt.subplot(4, 1, 4) +plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto') +plt.colorbar() +prov.stamp(ax,0,-.3) +plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset) +plt.show() + + + +fig2=plt.figure() +ax21=plt.subplot(5, 1, 1) +plt.imshow(rRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(rRaw)-2*np.nanstd(rRaw), vmax=np.nanmean(rRaw)+2*np.nanstd(rRaw)) +plt.colorbar() +plt.title('rRaw') + + +ax22=plt.subplot(5,1,2) +plt.plot(10*np.sum(np.isnan(rRaw) ,axis=0)) #plot how many nans at each time point (x10 fo visibility) +plt.plot(np.nanmean(rRaw, axis=0)) # plot the average rRaw intesnity +plt.title('rRaw Average Intensity, and number of neurons with NaN') +ax22.set_ylim( np.nanmean(rRaw)-1*np.nanstd(rRaw), np.nanmean(rRaw)+2*np.nanstd(rRaw) ) +ax22.set_xlim(0,rRaw.shape[1]) +pos21 = ax21.get_position() +pos22 = ax22.get_position() +ax22.set_position([pos21.x0,pos22.y0,pos21.width,pos22.height]) + + + +plt.subplot(5,1,3) +plt.imshow(gRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(gRaw)-2*np.nanstd(gRaw), vmax=np.nanmean(gRaw)+2*np.nanstd(gRaw)) +plt.colorbar() +plt.title('gRaw') + +plt.subplot(5,1,4) +plt.imshow(np.divide(gRaw, rRaw)[ordIndx, :], aspect='auto') +plt.colorbar() +plt.title('gRaw/rRaw') + + +ax=plt.subplot(5, 1, 5) +plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto') +plt.colorbar() +prov.stamp(ax,0,-.3) +plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset) +plt.xlabel('Volumes (6 vol /s )') +plt.show() + + + + + + +## Plot velocity and body curvature +vel=data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['AngleVelocity'] +bodycurv= data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['Eigenworm3'] + +if True: + plt.subplots(4,1,sharex=True) + ax1=plt.subplot(4, 1, 1) + plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto') + plt.colorbar() + plt.title('rPhotoCorr') + + ax2=plt.subplot(4,1,2) + plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto') + plt.colorbar() + plt.title('gPhotoCorr') + + + + ax3=plt.subplot(4,1,3) + plt.plot(vel) + plt.title('Velocity') + ax3.set_xlim(ax1.get_xlim()) + + ax4=plt.subplot(4, 1, 4) + plt.plot(bodycurv) + prov.stamp(ax4,0,-.3) + plt.title('Body Curvature') + ax4.set_xlim(ax1.get_xlim()) + + # align the axes + pos1 = ax1.get_position() + pos2 = ax2.get_position() + pos3 = ax3.get_position() + pos4 = ax4.get_position() + ax3.set_position([pos1.x0,pos3.y0,pos1.width,pos3.height]) + ax4.set_position([pos1.x0,pos4.y0,pos1.width,pos4.height]) + + plt.show() + + + +if False: #Just checking the difference between TimeFull and Time + plt.figure() + plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull'], label='TimeFull') + plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time'],label='Time') + plt.legend() + plt.show() + + plt.figure() + plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull']), + label='TimeFull') + plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time']), + label='Time') + plt.legend() + plt.show() + +## Extract neural weights learned by the SLM from a GCaMP Recording + + + +## Shuffle the neural Weights + +## Generate synthetic pure locomotory signals based on the GFP recording + +## Combine the GFP and synthetic locomotory signals for a given relative strength (lambda) + +## Figure out how normalization should occur. [Has somethign to do with lambda.] + +## Measure Decoder Performance (R^2) + +## Measure variance explained + +#Get the heatmap values + diff --git a/prediction/plotAnalysis.py b/prediction/plotAnalysis.py index 2067d72..ba6ed7b 100755 --- a/prediction/plotAnalysis.py +++ b/prediction/plotAnalysis.py @@ -3,11 +3,12 @@ import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr - +from prediction import userTracker ############################################### @@ -17,6 +18,7 @@ ############################################### typ = 'AML70' # possible values AML32, AML18, AML70 condition = 'moving' # Moving, immobilized, chip +overview = True ############################################### # # load data into dictionary @@ -25,10 +27,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'immobilized']: - folder = '{}_{}/'.format(typ, condition) - dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) # load multiple datasets diff --git a/prediction/print Metadata.py b/prediction/print Metadata.py index ef5d33c..c9dc415 100755 --- a/prediction/print Metadata.py +++ b/prediction/print Metadata.py @@ -7,8 +7,8 @@ import dataHandler as dh import numpy as np import pprint - - +import os +from prediction import userTracker ################################################ # # grab all the data we will need @@ -50,10 +50,11 @@ def dump_keys(d, lvl=0): for typ in ['Special', 'AML32', 'AML18', 'AML175', 'AML70']: for condition in ['transition','immobilized','moving', 'chip']: - folder = '{}_{}/'.format(typ, condition) - dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) #print strains[typ], conditions[condition] try: # load multiple datasets diff --git a/prediction/provenance.py b/prediction/provenance.py new file mode 100644 index 0000000..858163e --- /dev/null +++ b/prediction/provenance.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +""" +Module to stamp plots with current git hash and creation date. +This helps keep track of what the state of the code was in when it produced a plot. + +19 July 2019 +Andrew Leifer +leifer@princeton.edu +""" + +def getStampString(): + """ + + :return: string with date, time, hash, url and path of current code + """ + ## Stamp with code version and date info + import git + repo = git.Repo(search_parent_directories=True) + hash = repo.head.object.hexsha + gitpath = repoPath = repo._working_tree_dir + giturl = repo.remotes.origin.url + gitbranch = repo.active_branch + from datetime import datetime + timestamp = datetime.today().strftime('%Y-%m-%d %H:%M') + + return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath + '\nBranch: ' + str(gitbranch)) + +def stamp(ax, x,y,notes=''): + """ + :param ax: matplotlib axes h + :param x: fractional location in the plot + :param y: fractional location in the plot + :return: + """ + + # these are matplotlib.patch.Patch properties + props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) + + # place a text box in upper left in axes coords + ax.text(x, y, getStampString()+'\n'+notes, transform=ax.transAxes, fontsize=8, + verticalalignment='top', bbox=props) + + diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 0ae88b6..bc92b08 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -1,332 +1,359 @@ +from __future__ import division #give me floating point when I divide (standard in python3) # standard modules import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr +from prediction import userTracker ############################################### # # run parameters # ############################################### -typ = 'AML32' # possible values AML32, AML18, AML70 -condition = 'moving' # Moving, immobilized, chip -first = True # if 0true, create new HDF5 file -transient = 0 -save = False -############################################### -# -# load data into dictionary -# -############################################## -folder = '{}_{}/'.format(typ, condition) -dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "Analysis/{}_{}.hdf5".format(typ, condition) - -# data parameters -dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd - 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd - 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix + +def actuallyRun(typ='AML32', condition = 'moving'): +# typ possible values AML32, AML18, AML70, AML175 +# condition possible values moving, immobilized, chip + + + #typ = 'AML175' # possible values AML32, AML18, AML70, AML175 + #condition = 'moving' # Moving, immobilized, chip + + first = True # if 0true, create new HDF5 file + transient = 0 + save = True + ############################################### + # + # load data into dictionary + # + ############################################## + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) + + # data parameters + dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd + 'gaussWindow': 75, # gaussianfilter1D is uesed to calculate theta dot from theta in transformEigenworms + 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix + 'windowGCamp': 5, # gauss window for red and green channel + 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data + 'volumeAcquisitionRate': 6., #rate at which volumes are acquired + } + + + #original data parameters + dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd + 'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data + 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix 'windowGCamp': 6, # gauss window for red and green channel - 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data - } - -dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) -keyList = np.sort(dataSets.keys()) -if save: - dh.saveDictToHDF(outLocData, dataSets) - -## results dictionary -resultDict = {} -for kindex, key in enumerate(keyList): - resultDict[key] = {} - resultDict[key]['pars'] = dataPars -# analysis parameters - -pars ={'nCompPCA':20, # no of PCA components - 'PCAtimewarp':False, #timewarp so behaviors are equally represented - 'trainingCut': 0.6, # what fraction of data to use for training - 'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle - 'linReg': 'simple', # ordinary or ransac least squares - 'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart - 'useRank': 0, # use the rank transformed version of neural data for all analyses - 'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses - 'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses - 'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses - 'useClust':False,# use clusters in the fitting procedure. - 'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate - } - -behaviors = ['AngleVelocity', 'Eigenworm3'] + 'interpolateNans': 6, #interpolate gaps smaller than this of nan values in calcium data -############################################### -# -# check which calculations to perform -# -############################################## -createIndicesTest = 1#True -periodogram = 1 -half_period = 1 -hierclust = 0 -bta = 0 -pca = 1#False -kato_pca = 1#False -half_pca = 1 -corr = 1 -predNeur = 0 -predPCA = 0 -svm = 0 -lasso = 0 -elasticnet = 0 -lagregression = 0 -# this requires moving animals -if condition != 'immobilized': - predNeur = 1 + 'volumeAcquisitionRate': 6., # rate at which volumes are acquired + + } + + + dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) + keyList = np.sort(dataSets.keys()) + if save: + dh.saveDictToHDF(outLocData, dataSets) + + ## results dictionary + resultDict = {} + for kindex, key in enumerate(keyList): + resultDict[key] = {} + resultDict[key]['pars'] = dataPars + # analysis parameters + + pars ={'nCompPCA':10, # no of PCA components + 'PCAtimewarp':False, #timewarp so behaviors are equally represented + 'trainingCut': 0.6, # what fraction of data to use for training + 'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle + 'linReg': 'simple', # ordinary or ransac least squares + 'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart + 'useRank': 0, # use the rank transformed version of neural data for all analyses + 'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses + 'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses + 'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses + 'useClust':False,# use clusters in the fitting procedure. + 'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate + } + + + + behaviors = ['AngleVelocity', 'Eigenworm3'] + + ############################################### + # + # check which calculations to perform + # + ############################################## + createIndicesTest = 1#True + periodogram = 0 + half_period = 0 + hierclust = 0 + bta = 0 + pca = 1#False + kato_pca = 0#False + half_pca = 0 + corr = 1 + predNeur = 0 + predPCA = 0 svm = 0 - lasso = 1 - elasticnet = 1#True - predPCA = 1 + lasso = 0 + elasticnet = 0 lagregression = 0 + # this requires moving animals + if condition != 'immobilized': + predNeur = 1 + svm = 0 + lasso = 0 + elasticnet = 1#True + predPCA = 1 + lagregression = 0 -############################################### -# -# create training and test set indices -# -############################################## -if createIndicesTest: - for kindex, key in enumerate(keyList): - resultDict[key] = {'Training':{}} - for label in behaviors: - train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label) - if transient: - train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0] - # after 4:30 min - test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0] - resultDict[key]['Training']['Half'] ={'Train':train} - resultDict[key]['Training']['Half']['Test'] = test - else: - # add half split - midpoint = np.mean(dataSets[key]['Neurons']['Time']) - trainhalf = np.where(dataSets[key]['Neurons']['Time']midpoint)[0] - resultDict[key]['Training']['Half'] ={'Train':trainhalf} - resultDict[key]['Training']['Half']['Test'] = testhalf - resultDict[key]['Training'][label] = {'Train':train } - resultDict[key]['Training'][label]['Test']=test - - - print "Done generating trainingsets" + ############################################### + # + # create training and test set indices + # + ############################################## + if createIndicesTest: + for kindex, key in enumerate(keyList): + resultDict[key] = {'Training':{}} + for label in behaviors: + train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label) + if transient: + train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0] + # after 4:30 min + test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0] + resultDict[key]['Training']['Half'] ={'Train':train} + resultDict[key]['Training']['Half']['Test'] = test + else: + # add half split + midpoint = np.mean(dataSets[key]['Neurons']['Time']) + trainhalf = np.where(dataSets[key]['Neurons']['Time']midpoint)[0] + resultDict[key]['Training']['Half'] ={'Train':trainhalf} + resultDict[key]['Training']['Half']['Test'] = testhalf + resultDict[key]['Training'][label] = {'Train':train } + resultDict[key]['Training'][label]['Test']=test -############################################### -# -# calculate the periodogram of the neural signals -# -############################################## -if periodogram: - print 'running periodogram(s)' - for kindex, key in enumerate(keyList): - resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None) -# for half the sample each -if half_period: - print 'running half periodogram(s)' - for kindex, key in enumerate(keyList): - splits = resultDict[key]['Training'] - resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train']) - resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test']) -############################################### -# -# correlation neurons and behavior -# -############################################## -if corr: - print 'running Correlation.' - for kindex, key in enumerate(keyList): - resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors) - #half1 = resultDict[key]['Training'][behaviors[0]]['Train'] - #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1) - -############################################### -# -# run svm to predict discrete behaviors -# -############################################## -if svm: - for kindex, key in enumerate(keyList): - print 'running SVM.' - splits = resultDict[key]['Training'] - resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits ) + print "Done generating trainingsets" -############################################### -# -# run PCA and store results -# -############################################## -#%% -if pca: - print 'running PCA' - for kindex, key in enumerate(keyList): - resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars) - resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) - - - #correlate behavior and PCA - #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None) -############################################### -# -# run Kato PCA -# -############################################## -#%% -if kato_pca: - print 'running Kato et. al PCA' - for kindex, key in enumerate(keyList): - resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True) - splits = resultDict[key]['Training'] - resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True) - - resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True) - -############################################### -# -# run split first-second half PCA -# -############################################## -#%% -if half_pca: - print 'half-split PCA' - for kindex, key in enumerate(keyList): - # run PCA on each half - splits = resultDict[key]['Training'] - resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train']) - resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test']) - resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key]) -#%% -############################################### -# -# predict neural dynamics from behavior -# -############################################## -if predPCA: - for kindex, key in enumerate(keyList): - print 'predicting behavior PCA' - splits = resultDict[key]['Training'] - resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \ - splits, pars, behaviors) -#%% -############################################### -# -# predict neural dynamics from behavior -# -############################################## -if predNeur: - for kindex, key in enumerate(keyList): - print 'predicting neural dynamics from behavior' - splits = resultDict[key]['Training'] - resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars) - plt.show() -#%% -############################################### -# -# use agglomerative clustering to connect similar neurons -# -############################################## -if hierclust: - for kindex, key in enumerate(keyList): - print 'running clustering' - resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars) -#%% -############################################### -# -# use behavior triggered averaging to create non-othogonal axes -# -############################################## -if bta: - for kindex, key in enumerate(keyList): - print 'running BTA' - resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars) -#%% -############################################### -# -# linear regression using LASSO -# -############################################## -if lasso: - print "Performing LASSO.", - for kindex, key in enumerate(keyList): - - splits = resultDict[key]['Training'] - resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors) - # calculate how much more neurons contribute - tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey]) - - tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey] - - # do converse calculation -- give it only the neurons non-zero in previous case - subset = {} - subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0] - subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0] - resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset) - - -#%% -############################################### -# -# linear regression using elastic Net -# -############################################## -if elasticnet: - for kindex, key in enumerate(keyList): - print 'Running Elastic Net', key - splits = resultDict[key]['Training'] - resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors) - # calculate how much more neurons contribute - tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, ) - for tmpKey in tmpDict.keys(): - resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey]) - - tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey] - # do converse calculation -- give it only the neurons non-zero in previous case - subset = {} - subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0] - subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0] - resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset) - - # run scrambled control - #print 'Running Elastic Net scrambled' - #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True) - - -#%% -############################################### -# -# lag-time fits of neural activity -# -############################################## -if lagregression: - for kindex, key in enumerate(keyList): - print 'Running lag calculation', key - splits = resultDict[key]['Training'] - #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3)) - resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet') + ############################################### + # + # calculate the periodogram of the neural signals + # + ############################################## + if periodogram: + print 'running periodogram(s)' + for kindex, key in enumerate(keyList): + resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None) + # for half the sample each + if half_period: + print 'running half periodogram(s)' + for kindex, key in enumerate(keyList): + splits = resultDict[key]['Training'] + resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train']) + resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test']) -#%% -############################################### -# -# save data as HDF5 file -# -############################################## -if save: - dh.saveDictToHDF(outLoc, resultDict) + ############################################### + # + # correlation neurons and behavior + # + ############################################## + if corr: + print 'running Correlation.' + for kindex, key in enumerate(keyList): + resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors) + #half1 = resultDict[key]['Training'][behaviors[0]]['Train'] + #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1) + + ############################################### + # + # run svm to predict discrete behaviors + # + ############################################## + if svm: + for kindex, key in enumerate(keyList): + print 'running SVM.' + splits = resultDict[key]['Training'] + resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits ) + + ############################################### + # + # run PCA and store results + # + ############################################## + #%% + if pca: + print 'running PCA' + for kindex, key in enumerate(keyList): + resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars) + # resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) + + + #correlate behavior and PCA + #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None) + ############################################### + # + # run Kato PCA + # + ############################################## + #%% + if kato_pca: + print 'running Kato et. al PCA' + for kindex, key in enumerate(keyList): + resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True) + splits = resultDict[key]['Training'] + resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True) + + resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True) + + ############################################### + # + # run split first-second half PCA + # + ############################################## + #%% + if half_pca: + print 'half-split PCA' + for kindex, key in enumerate(keyList): + # run PCA on each half + splits = resultDict[key]['Training'] + resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train']) + resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test']) + resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key]) + #%% + ############################################### + # + # predict neural dynamics from behavior + # + ############################################## + if predPCA: + for kindex, key in enumerate(keyList): + print 'predicting behavior PCA' + splits = resultDict[key]['Training'] + resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \ + splits, pars, behaviors) + #%% + ############################################### + # + # predict neural dynamics from behavior + # + ############################################## + if predNeur: + for kindex, key in enumerate(keyList): + print 'predicting neural dynamics from behavior' + splits = resultDict[key]['Training'] + resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars) + plt.show() + #%% + ############################################### + # + # use agglomerative clustering to connect similar neurons + # + ############################################## + if hierclust: + for kindex, key in enumerate(keyList): + print 'running clustering' + resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars) + #%% + ############################################### + # + # use behavior triggered averaging to create non-othogonal axes + # + ############################################## + if bta: + for kindex, key in enumerate(keyList): + print 'running BTA' + resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars) + #%% + ############################################### + # + # linear regression using LASSO + # + ############################################## + if lasso: + print "Performing LASSO.", + for kindex, key in enumerate(keyList): + + splits = resultDict[key]['Training'] + resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors) + # calculate how much more neurons contribute + tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey]) + + tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey] + + # do converse calculation -- give it only the neurons non-zero in previous case + subset = {} + subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0] + subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0] + resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset) + + + #%% + ############################################### + # + # linear regression using elastic Net + # + ############################################## + if elasticnet: + for kindex, key in enumerate(keyList): + print 'Running Elastic Net', key + splits = resultDict[key]['Training'] + resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors) + # calculate how much more neurons contribute + tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, ) + for tmpKey in tmpDict.keys(): + resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey]) + + tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey] + # do converse calculation -- give it only the neurons non-zero in previous case + subset = {} + subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0] + subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0] + resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset) + + # run scrambled control + #print 'Running Elastic Net scrambled' + #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True) + + + #%% + ############################################### + # + # lag-time fits of neural activity + # + ############################################## + if lagregression: + for kindex, key in enumerate(keyList): + print 'Running lag calculation', key + splits = resultDict[key]['Training'] + #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3)) + resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet') + + #%% + ############################################### + # + # save data as HDF5 file + # + ############################################## + if save: + dh.saveDictToHDF(outLoc, resultDict) diff --git a/prediction/sandbox.py b/prediction/sandbox.py index 3d10a07..9fbda2b 100755 --- a/prediction/sandbox.py +++ b/prediction/sandbox.py @@ -6,6 +6,17 @@ @author: mscholz """ +import os +from prediction import userTracker +path = userTracker.dataPath() +print path +folder = os.path.join(path, '{}_{}/'.format(typ, condition)) +dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) +outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) +outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) +print folder + + import numpy as np #import fourier_vec as fourier import matplotlib.pyplot as plt @@ -795,4 +806,4 @@ def loadAtlas(dim3=False): ## ## projection on behavioral axes ## -###################################### \ No newline at end of file +###################################### diff --git a/prediction/stylesheet.pyc b/prediction/stylesheet.pyc index 8ff3935..08589e9 100644 Binary files a/prediction/stylesheet.pyc and b/prediction/stylesheet.pyc differ diff --git a/prediction/userTracker.py b/prediction/userTracker.py new file mode 100644 index 0000000..b7cb238 --- /dev/null +++ b/prediction/userTracker.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +"""detect which user is using the machine and change the path to the data accordingly.""" +import socket + +def dataPath(): + """hardcoded or automatic detection of the users' data path.""" + host = socket.gethostname() + if host== 'subsitute for andys computer name': + path = '/Users/leifer/workspace/PredictionCode/' + elif host=='nif3004': + path = '/media/scholz_la/hd2/Data/Data' + else: + raise Exception('This is not a known host. Edit util/userTracker.py to add your hostname and data paths.') + return path \ No newline at end of file