From dece99d7a06daa561b0ec67a68745e0e92bbde84 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Wed, 10 Jul 2019 11:54:20 -0400 Subject: [PATCH 01/25] adding a gitignore to ignore binary stuff --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0fe2c40 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +build/ From 70c277801a5d65716cd678208c3e599eeabdee4e Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Wed, 10 Jul 2019 12:05:00 -0400 Subject: [PATCH 02/25] fixed paths in runAnalysis for my mac --- prediction/runAnalysis.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 0ae88b6..cfbd95c 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -17,16 +17,16 @@ condition = 'moving' # Moving, immobilized, chip first = True # if 0true, create new HDF5 file transient = 0 -save = False +save = True ############################################### # # load data into dictionary # ############################################## -folder = '{}_{}/'.format(typ, condition) -dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "Analysis/{}_{}.hdf5".format(typ, condition) +folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) +dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) +outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) +outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd From ea6fc5c7a007ed18f0dc43de7d3daf5157f3a02b Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Wed, 10 Jul 2019 13:55:56 -0400 Subject: [PATCH 03/25] updated paths for S4.py --- figures/supp/S4.py | 10 +++++----- prediction/dataHandler.pyc | Bin 19334 -> 19334 bytes prediction/dimReduction.py | 2 +- prediction/dimReduction.pyc | Bin 34403 -> 34406 bytes prediction/makePlots.pyc | Bin 49872 -> 49872 bytes prediction/plotAnalysis.py | 9 +++++---- prediction/runAnalysis.py | 2 +- prediction/stylesheet.pyc | Bin 11948 -> 11948 bytes 8 files changed, 12 insertions(+), 11 deletions(-) diff --git a/figures/supp/S4.py b/figures/supp/S4.py index 2ade581..abfc9b2 100755 --- a/figures/supp/S4.py +++ b/figures/supp/S4.py @@ -64,11 +64,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: - for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + for condition in [ 'chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: + folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) + dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) + outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) + outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) try: # load multiple datasets diff --git a/prediction/dataHandler.pyc b/prediction/dataHandler.pyc index 79a907304cd584301d9e2e4b88e3506e16107d05..619e87f96a1c199b42f07d4075a8efb515c3f784 100644 GIT binary patch delta 19 acmZph&e%4ck%Rd&FP99XTI@!SI&T0ulLd4D delta 19 acmZph&e%4ck%Rd&FV{`OJ24wM>bwC(3I`Sd diff --git a/prediction/dimReduction.py b/prediction/dimReduction.py index b1b5a1c..f5da67f 100755 --- a/prediction/dimReduction.py +++ b/prediction/dimReduction.py @@ -1268,7 +1268,7 @@ def predictNeuralDynamicsfromBehavior(data, splits, pars): train, test = splits[label]['Train'], splits[label]['Test'] # create dimensionality-reduced behavior - pca with 10 eigenworms cl = data['CL'] - eigenworms = dh.loadEigenBasis(filename = 'utility/Eigenworms.dat', nComp=4, new=True) + eigenworms = dh.loadEigenBasis(filename = '../utility/Eigenworms.dat', nComp=4, new=True) pcsNew, meanAngle, lengths, refPoint = dh.calculateEigenwormsFromCL(cl, eigenworms) behavior = pcsNew.T # nevermind, use the behaviors we use for lasso instead diff --git a/prediction/dimReduction.pyc b/prediction/dimReduction.pyc index bd9ea36f1f30cd63c5e9c7ef1704baa30f0d16bb..740d5a54af933412795271bbaf6c87ac0d9023b9 100644 GIT binary patch delta 333 zcmaFd!}P3&iG%qwFV`(*wb+duM|~JeHlOq16J|`?d^f(8kuhU(a^gIJ0!9Xgpb!v| z1tQWm3nqoLaA$+qoeV&td~$JG9%IGiM`>z6{haBmSr{uPZ_Tz504s?GDT#onW6Tkj zssib&1`%u^q6S1L1En>DCtnPcWxEVgSvxr}Cy1vGBvk?;>KP|bP*>c1A*Y>H1ninL zplJHXjK>{Ye>|}Z%AcHFRL|J8`E}6^4t7bP0eX7+n~zs3FfmTw{G#?fGvmC;HyTf} ztpypiZu0UbS;p;~k2alQWL!0QQp;c4WRSQchyXjd9K_NFIimr@Y6B4+AYvMbm<1yC TgNOq_LW5y)zq-=ocdexWpdw#Z delta 330 zcmaFX!}Pd^iG%qwFW1rE4lx@!j`}bbZ$9V4C(M|-`EGnGBV+pH+ls7rMsGhNN^XsA;94ul$12&(kR$yYBw)u7KduGPDlW#YkWLpC= zXYJ%wO|p#JHlJuZ!^pUD^3;~Uwn-pyM-TyaZ5fEA4RS+0h}8-r+Cjur5HS-(>;n<| QfrJLbiX$sb!w0ndA1_y7O^ diff --git a/prediction/makePlots.pyc b/prediction/makePlots.pyc index feed789a0bd6871aaba9ff5a956b35ffdf378893..448d7a74e02441c7f88ff7a13252dadba5386d32 100644 GIT binary patch delta 19 bcmcc6%6y@fnS=Q=FW2`K%CQ?cP8W(up8#$Kf0RTWo2Mz!L From 48b39c5c93eba1d352cf38c2714242301229ca15 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 31 Oct 2019 14:36:22 -0400 Subject: [PATCH 04/25] Miminum set of changes that allow me to regenerate figures I'm a little dissapointed to see that in the newly regenerated Fig 2 I get an R^2 of .44 instead of the expected .49. Not sure whats going on here because as far as I can tell this should be the code exaclty as Monika had left it, with only superficial changes such as wrapping things in a function and pointing to the correct filenams. But its pretty close. I plan to use this as a jumping off point to reimpement the minumum changes I want to change how e do normalizaiton, interpolation and smoothing. --- prediction/metaRunAnalysis.py | 33 ++ prediction/noiseModel.py | 217 ++++++++++++ prediction/provenance.py | 43 +++ prediction/runAnalysis.py | 623 +++++++++++++++++----------------- 4 files changed, 608 insertions(+), 308 deletions(-) create mode 100644 prediction/metaRunAnalysis.py create mode 100644 prediction/noiseModel.py create mode 100644 prediction/provenance.py diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py new file mode 100644 index 0000000..0882f6d --- /dev/null +++ b/prediction/metaRunAnalysis.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Jul 10 14:14:34 2019 +This is a meta script to run runAnalysis over and over again. +@author: leifer +""" + +#### + +import runAnalysis as analysis + +analysis.actuallyRun('AML32','moving') +analysis.actuallyRun('AML18','moving') +analysis.actuallyRun('AML70','moving') +analysis.actuallyRun('AML175','moving') + + +analysis.actuallyRun('AML32','immobilized') +analysis.actuallyRun('AML18','immobilized') +analysis.actuallyRun('AML70','immobilized') + +analysis.actuallyRun('AML32','chip') +analysis.actuallyRun('Special','transition') #Note these are identicail.. + # I copied and pasted the input files.. + +analysis.actuallyRun('AML70','chip') + +execfile('../figures/main/fig2v3.py') +execfile('../figures/supp/S4.py') +execfile('../figures/supp/S2.py') +execfile('../figures/main/fig1v3.py') +execfile('../figures/main/fig2_expVarslm.py') diff --git a/prediction/noiseModel.py b/prediction/noiseModel.py new file mode 100644 index 0000000..0088a71 --- /dev/null +++ b/prediction/noiseModel.py @@ -0,0 +1,217 @@ +### noiseModel.py +# We suspect that the neural dynamics we observe in freely moving GCaMP worms have three components: +# 1) Neural Signals representing locomotion +# 2) Neural signals representing other biological processes (sensory processing, working memory, etc) +# 3) Noise, including from motion artifact +# +# We know for sure that (1) & (3) exist. We can decode locomotion so we know that (1) must be persent. +# And we know that noise (3) is present because we see it in the GFP moving animals. +# +# To bolster our argument that neural signals representing other biological processes are present in our recordings, +# we will attempt to reject the following null hypothesis: +# +# Null hypothesis is that ONLY neural signals representing locomotion AND noise are present. E.g. only (1) and (3) and +# not (2). +# +# +# We will build uop our null model by taking a real freely moving GFP worm that therefore has only noise (3) and we +# will synthetically generate pure locmootory signals (1) and adjust the relative proportion of noise to +# locomotory signals. +# +# We will study the predicitive performance of our decoder on this model and also the percent variance explaiend by the +# locomotory siganl to try to assess whether the null model fits our experimental observations. + + +## Find a good GFP recording to use. + +dataset = '/Users/leifer/workspace/PredictionCode/AML18_moving/BrainScanner20160506_160928_MS/heatData.mat' + +# Import the recording based on the output of Jeff's 3dBrain matlab analysis pipeline +import scipy.io as sio +mat_contents = sio.loadmat(dataset) +rPhotoCorr = mat_contents['rPhotoCorr'] +rRaw = mat_contents['rRaw'] +gPhotoCorr = mat_contents['gPhotoCorr'] +gRaw = mat_contents['gRaw'] +Ratio2 = mat_contents['Ratio2'] + +# Import Monika's ICA'd version (with my modification s.t. there is no z-scoring) +# (Note this section is copy pasted from S2.py) +import prediction.dataHandler as dh +import numpy as np +data = {} +for typ in ['AML18','AML32']: + for condition in ['moving']: + folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) + dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) + outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) + outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + + try: + # load multiple datasets + dataSets = dh.loadDictFromHDF(outLocData) + keyList = np.sort(dataSets.keys()) + results = dh.loadDictFromHDF(outLoc) + # store in dictionary by typ and condition + key = '{}_{}'.format(typ, condition) + data[key] = {} + data[key]['dsets'] = keyList + data[key]['input'] = dataSets + data[key]['analysis'] = results + except IOError: + print typ, condition, 'not found.' + pass +print 'Done reading data.' + + + + +import matplotlib.pyplot as plt +from prediction import provenance as prov + + +ordIndx=data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ordering'] + +fig1=plt.figure() +plt.subplot(4, 1, 1) +plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('rPhotoCorr') + +plt.subplot(4,1,2) +plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('gPhotoCorr') + +plt.subplot(4,1,3) +plt.imshow(np.divide(gPhotoCorr,rPhotoCorr)[ordIndx,:],aspect='auto') +plt.colorbar() +plt.title('gPhotoCorr/rPhotoCorr') + + +ax=plt.subplot(4, 1, 4) +plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto') +plt.colorbar() +prov.stamp(ax,0,-.3) +plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset) +plt.show() + + + +fig2=plt.figure() +ax21=plt.subplot(5, 1, 1) +plt.imshow(rRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(rRaw)-2*np.nanstd(rRaw), vmax=np.nanmean(rRaw)+2*np.nanstd(rRaw)) +plt.colorbar() +plt.title('rRaw') + + +ax22=plt.subplot(5,1,2) +plt.plot(10*np.sum(np.isnan(rRaw) ,axis=0)) #plot how many nans at each time point (x10 fo visibility) +plt.plot(np.nanmean(rRaw, axis=0)) # plot the average rRaw intesnity +plt.title('rRaw Average Intensity, and number of neurons with NaN') +ax22.set_ylim( np.nanmean(rRaw)-1*np.nanstd(rRaw), np.nanmean(rRaw)+2*np.nanstd(rRaw) ) +ax22.set_xlim(0,rRaw.shape[1]) +pos21 = ax21.get_position() +pos22 = ax22.get_position() +ax22.set_position([pos21.x0,pos22.y0,pos21.width,pos22.height]) + + + +plt.subplot(5,1,3) +plt.imshow(gRaw[ordIndx, :], aspect='auto', vmin=np.nanmean(gRaw)-2*np.nanstd(gRaw), vmax=np.nanmean(gRaw)+2*np.nanstd(gRaw)) +plt.colorbar() +plt.title('gRaw') + +plt.subplot(5,1,4) +plt.imshow(np.divide(gRaw, rRaw)[ordIndx, :], aspect='auto') +plt.colorbar() +plt.title('gRaw/rRaw') + + +ax=plt.subplot(5, 1, 5) +plt.imshow(data['AML18_moving']['input']['BrainScanner20160506_160928']['Neurons']['ActivityFull'],aspect='auto') +plt.colorbar() +prov.stamp(ax,0,-.3) +plt.title('"ActivityFull" (ICAd, plus Andys modified normalization) \n' +dataset) +plt.xlabel('Volumes (6 vol /s )') +plt.show() + + + + + + +## Plot velocity and body curvature +vel=data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['AngleVelocity'] +bodycurv= data['AML18_moving']['input']['BrainScanner20160506_160928']['Behavior']['Eigenworm3'] + +if True: + plt.subplots(4,1,sharex=True) + ax1=plt.subplot(4, 1, 1) + plt.imshow(rPhotoCorr[ordIndx,:],aspect='auto') + plt.colorbar() + plt.title('rPhotoCorr') + + ax2=plt.subplot(4,1,2) + plt.imshow(gPhotoCorr[ordIndx,:],aspect='auto') + plt.colorbar() + plt.title('gPhotoCorr') + + + + ax3=plt.subplot(4,1,3) + plt.plot(vel) + plt.title('Velocity') + ax3.set_xlim(ax1.get_xlim()) + + ax4=plt.subplot(4, 1, 4) + plt.plot(bodycurv) + prov.stamp(ax4,0,-.3) + plt.title('Body Curvature') + ax4.set_xlim(ax1.get_xlim()) + + # align the axes + pos1 = ax1.get_position() + pos2 = ax2.get_position() + pos3 = ax3.get_position() + pos4 = ax4.get_position() + ax3.set_position([pos1.x0,pos3.y0,pos1.width,pos3.height]) + ax4.set_position([pos1.x0,pos4.y0,pos1.width,pos4.height]) + + plt.show() + + + +if False: #Just checking the difference between TimeFull and Time + plt.figure() + plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull'], label='TimeFull') + plt.plot(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time'],label='Time') + plt.legend() + plt.show() + + plt.figure() + plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['TimeFull']), + label='TimeFull') + plt.plot(np.diff(data['AML32_moving']['input']['BrainScanner20170610_105634']['Neurons']['Time']), + label='Time') + plt.legend() + plt.show() + +## Extract neural weights learned by the SLM from a GCaMP Recording + + + +## Shuffle the neural Weights + +## Generate synthetic pure locomotory signals based on the GFP recording + +## Combine the GFP and synthetic locomotory signals for a given relative strength (lambda) + +## Figure out how normalization should occur. [Has somethign to do with lambda.] + +## Measure Decoder Performance (R^2) + +## Measure variance explained + +#Get the heatmap values + diff --git a/prediction/provenance.py b/prediction/provenance.py new file mode 100644 index 0000000..769b8d7 --- /dev/null +++ b/prediction/provenance.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +""" +Module to stamp plots with current git hash and creation date. +This helps keep track of what the state of the code was in when it produced a plot. + +19 July 2019 +Andrew Leifer +leifer@princeton.edu +""" + +def getStampString(): + """ + + :return: string with date, time, hash, url and path of current code + """ + ## Stamp with code version and date info + import git + repo = git.Repo(search_parent_directories=True) + hash = repo.head.object.hexsha + gitpath = repoPath = repo._working_tree_dir + giturl = repo.remotes.origin.url + + from datetime import datetime + timestamp = datetime.today().strftime('%Y-%m-%d %H:%M') + + return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath) + +def stamp(ax, x,y,notes): + """ + :param ax: matplotlib axes h + :param x: fractional location in the plot + :param y: fractional location in the plot + :return: + """ + + # these are matplotlib.patch.Patch properties + props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) + + # place a text box in upper left in axes coords + ax.text(x, y, getStampString()+'\n'+notes, transform=ax.transAxes, fontsize=8, + verticalalignment='top', bbox=props) + + diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index c47b4b8..8058402 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -13,320 +13,327 @@ # run parameters # ############################################### -typ = 'AML175' # possible values AML32, AML18, AML70, AML175 -condition = 'moving' # Moving, immobilized, chip -first = True # if 0true, create new HDF5 file -transient = 0 -save = True -############################################### -# -# load data into dictionary -# -############################################## -folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) -dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) - -# data parameters -dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd - 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd - 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix - 'windowGCamp': 6, # gauss window for red and green channel - 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data - } - -dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) -keyList = np.sort(dataSets.keys()) -if save: - dh.saveDictToHDF(outLocData, dataSets) - -## results dictionary -resultDict = {} -for kindex, key in enumerate(keyList): - resultDict[key] = {} - resultDict[key]['pars'] = dataPars -# analysis parameters - -pars ={'nCompPCA':20, # no of PCA components - 'PCAtimewarp':False, #timewarp so behaviors are equally represented - 'trainingCut': 0.6, # what fraction of data to use for training - 'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle - 'linReg': 'simple', # ordinary or ransac least squares - 'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart - 'useRank': 0, # use the rank transformed version of neural data for all analyses - 'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses - 'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses - 'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses - 'useClust':False,# use clusters in the fitting procedure. - 'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate - } - -behaviors = ['AngleVelocity', 'Eigenworm3'] -############################################### -# -# check which calculations to perform -# -############################################## -createIndicesTest = 1#True -periodogram = 1 -half_period = 1 -hierclust = 0 -bta = 0 -pca = 1#False -kato_pca = 1#False -half_pca = 1 -corr = 1 -predNeur = 0 -predPCA = 0 -svm = 0 -lasso = 0 -elasticnet = 0 -lagregression = 0 -# this requires moving animals -if condition != 'immobilized': - predNeur = 1 +def actuallyRun(typ='AML32', condition = 'moving'): +# typ possible values AML32, AML18, AML70, AML175 +# condition possible values moving, immobilized, chip + + + #typ = 'AML175' # possible values AML32, AML18, AML70, AML175 + #condition = 'moving' # Moving, immobilized, chip + + first = True # if 0true, create new HDF5 file + transient = 0 + save = True + ############################################### + # + # load data into dictionary + # + ############################################## + folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) + dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) + outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) + outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + + # data parameters + dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd + 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd + 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix + 'windowGCamp': 6, # gauss window for red and green channel + 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data + } + + dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) + keyList = np.sort(dataSets.keys()) + if save: + dh.saveDictToHDF(outLocData, dataSets) + + ## results dictionary + resultDict = {} + for kindex, key in enumerate(keyList): + resultDict[key] = {} + resultDict[key]['pars'] = dataPars + # analysis parameters + + pars ={'nCompPCA':20, # no of PCA components + 'PCAtimewarp':False, #timewarp so behaviors are equally represented + 'trainingCut': 0.6, # what fraction of data to use for training + 'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle + 'linReg': 'simple', # ordinary or ransac least squares + 'trainingSample': 1, # take only samples that are at least n apart to have independence. 4sec = gcamp_=->24 apart + 'useRank': 0, # use the rank transformed version of neural data for all analyses + 'useDeconv': 0, # use the deconvolved transformed version of neural data for all analyses + 'useRaw': 0, # use the deconvolved transformed version of neural data for all analyses + 'nCluster': 10, # use the deconvolved transformed version of neural data for all analyses + 'useClust':False,# use clusters in the fitting procedure. + 'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate + } + + behaviors = ['AngleVelocity', 'Eigenworm3'] + + ############################################### + # + # check which calculations to perform + # + ############################################## + createIndicesTest = 1#True + periodogram = 1 + half_period = 1 + hierclust = 0 + bta = 0 + pca = 1#False + kato_pca = 1#False + half_pca = 1 + corr = 1 + predNeur = 0 + predPCA = 0 svm = 0 - lasso = 1 - elasticnet = 1#True - predPCA = 1 + lasso = 0 + elasticnet = 0 lagregression = 0 + # this requires moving animals + if condition != 'immobilized': + predNeur = 1 + svm = 0 + lasso = 1 + elasticnet = 1#True + predPCA = 1 + lagregression = 0 -############################################### -# -# create training and test set indices -# -############################################## -if createIndicesTest: - for kindex, key in enumerate(keyList): - resultDict[key] = {'Training':{}} - for label in behaviors: - train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label) - if transient: - train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0] - # after 4:30 min - test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0] - resultDict[key]['Training']['Half'] ={'Train':train} - resultDict[key]['Training']['Half']['Test'] = test - else: - # add half split - midpoint = np.mean(dataSets[key]['Neurons']['Time']) - trainhalf = np.where(dataSets[key]['Neurons']['Time']midpoint)[0] - resultDict[key]['Training']['Half'] ={'Train':trainhalf} - resultDict[key]['Training']['Half']['Test'] = testhalf - resultDict[key]['Training'][label] = {'Train':train } - resultDict[key]['Training'][label]['Test']=test - - - print "Done generating trainingsets" + ############################################### + # + # create training and test set indices + # + ############################################## + if createIndicesTest: + for kindex, key in enumerate(keyList): + resultDict[key] = {'Training':{}} + for label in behaviors: + train, test = dr.createTrainingTestIndices(dataSets[key], pars, label=label) + if transient: + train = np.where(dataSets[key]['Neurons']['Time']<4*60)[0] + # after 4:30 min + test = np.where((dataSets[key]['Neurons']['Time']>7*60)*(dataSets[key]['Neurons']['Time']<14*60))[0] + resultDict[key]['Training']['Half'] ={'Train':train} + resultDict[key]['Training']['Half']['Test'] = test + else: + # add half split + midpoint = np.mean(dataSets[key]['Neurons']['Time']) + trainhalf = np.where(dataSets[key]['Neurons']['Time']midpoint)[0] + resultDict[key]['Training']['Half'] ={'Train':trainhalf} + resultDict[key]['Training']['Half']['Test'] = testhalf + resultDict[key]['Training'][label] = {'Train':train } + resultDict[key]['Training'][label]['Test']=test -############################################### -# -# calculate the periodogram of the neural signals -# -############################################## -if periodogram: - print 'running periodogram(s)' - for kindex, key in enumerate(keyList): - resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None) -# for half the sample each -if half_period: - print 'running half periodogram(s)' - for kindex, key in enumerate(keyList): - splits = resultDict[key]['Training'] - resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train']) - resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test']) -############################################### -# -# correlation neurons and behavior -# -############################################## -if corr: - print 'running Correlation.' - for kindex, key in enumerate(keyList): - resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors) - #half1 = resultDict[key]['Training'][behaviors[0]]['Train'] - #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1) - -############################################### -# -# run svm to predict discrete behaviors -# -############################################## -if svm: - for kindex, key in enumerate(keyList): - print 'running SVM.' - splits = resultDict[key]['Training'] - resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits ) + print "Done generating trainingsets" -############################################### -# -# run PCA and store results -# -############################################## -#%% -if pca: - print 'running PCA' - for kindex, key in enumerate(keyList): - resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars) - resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) - - - #correlate behavior and PCA - #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None) -############################################### -# -# run Kato PCA -# -############################################## -#%% -if kato_pca: - print 'running Kato et. al PCA' - for kindex, key in enumerate(keyList): - resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True) - splits = resultDict[key]['Training'] - resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True) - - resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True) - -############################################### -# -# run split first-second half PCA -# -############################################## -#%% -if half_pca: - print 'half-split PCA' - for kindex, key in enumerate(keyList): - # run PCA on each half - splits = resultDict[key]['Training'] - resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train']) - resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test']) - resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key]) -#%% -############################################### -# -# predict neural dynamics from behavior -# -############################################## -if predPCA: - for kindex, key in enumerate(keyList): - print 'predicting behavior PCA' - splits = resultDict[key]['Training'] - resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \ - splits, pars, behaviors) -#%% -############################################### -# -# predict neural dynamics from behavior -# -############################################## -if predNeur: - for kindex, key in enumerate(keyList): - print 'predicting neural dynamics from behavior' - splits = resultDict[key]['Training'] - resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars) - plt.show() -#%% -############################################### -# -# use agglomerative clustering to connect similar neurons -# -############################################## -if hierclust: - for kindex, key in enumerate(keyList): - print 'running clustering' - resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars) -#%% -############################################### -# -# use behavior triggered averaging to create non-othogonal axes -# -############################################## -if bta: - for kindex, key in enumerate(keyList): - print 'running BTA' - resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars) -#%% -############################################### -# -# linear regression using LASSO -# -############################################## -if lasso: - print "Performing LASSO.", - for kindex, key in enumerate(keyList): - - splits = resultDict[key]['Training'] - resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors) - # calculate how much more neurons contribute - tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey]) - - tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey] - - # do converse calculation -- give it only the neurons non-zero in previous case - subset = {} - subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0] - subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0] - resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset) - - -#%% -############################################### -# -# linear regression using elastic Net -# -############################################## -if elasticnet: - for kindex, key in enumerate(keyList): - print 'Running Elastic Net', key - splits = resultDict[key]['Training'] - resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors) - # calculate how much more neurons contribute - tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, ) - for tmpKey in tmpDict.keys(): - resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey]) - - tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors) - for tmpKey in tmpDict.keys(): - resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey] - # do converse calculation -- give it only the neurons non-zero in previous case - subset = {} - subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0] - subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0] - resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset) - - # run scrambled control - #print 'Running Elastic Net scrambled' - #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True) - - -#%% -############################################### -# -# lag-time fits of neural activity -# -############################################## -if lagregression: - for kindex, key in enumerate(keyList): - print 'Running lag calculation', key - splits = resultDict[key]['Training'] - #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3)) - resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet') + ############################################### + # + # calculate the periodogram of the neural signals + # + ############################################## + if periodogram: + print 'running periodogram(s)' + for kindex, key in enumerate(keyList): + resultDict[key]['Period'] = dr.runPeriodogram(dataSets[key], pars, testset = None) + # for half the sample each + if half_period: + print 'running half periodogram(s)' + for kindex, key in enumerate(keyList): + splits = resultDict[key]['Training'] + resultDict[key]['Period1Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Train']) + resultDict[key]['Period2Half'] = dr.runPeriodogram(dataSets[key], pars, testset = splits[behaviors[0]]['Test']) -#%% -############################################### -# -# save data as HDF5 file -# -############################################## -if save: - dh.saveDictToHDF(outLoc, resultDict) + ############################################### + # + # correlation neurons and behavior + # + ############################################## + if corr: + print 'running Correlation.' + for kindex, key in enumerate(keyList): + resultDict[key]['Correlation'] = dr.behaviorCorrelations(dataSets[key], behaviors) + #half1 = resultDict[key]['Training'][behaviors[0]]['Train'] + #resultDict[key]['CorrelationHalf'] = dr.behaviorCorrelations(dataSets[key], behaviors, subset = half1) + + ############################################### + # + # run svm to predict discrete behaviors + # + ############################################## + if svm: + for kindex, key in enumerate(keyList): + print 'running SVM.' + splits = resultDict[key]['Training'] + resultDict[key]['SVM'] = dr.discreteBehaviorPrediction(dataSets[key], pars, splits ) + + ############################################### + # + # run PCA and store results + # + ############################################## + #%% + if pca: + print 'running PCA' + for kindex, key in enumerate(keyList): + resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars) + resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) + + + #correlate behavior and PCA + #resultDict[key]['PCACorrelation']=dr.PCACorrelations(dataSets[key],resultDict[key], behaviors, flag = 'PCA', subset = None) + ############################################### + # + # run Kato PCA + # + ############################################## + #%% + if kato_pca: + print 'running Kato et. al PCA' + for kindex, key in enumerate(keyList): + resultDict[key]['katoPCA'] = dr.runPCANormal(dataSets[key], pars, deriv = True) + splits = resultDict[key]['Training'] + resultDict[key]['katoPCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train'], deriv=True) + + resultDict[key]['katoPCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Test'], deriv=True) + + ############################################### + # + # run split first-second half PCA + # + ############################################## + #%% + if half_pca: + print 'half-split PCA' + for kindex, key in enumerate(keyList): + # run PCA on each half + splits = resultDict[key]['Training'] + resultDict[key]['PCAHalf1'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset = splits['Half']['Train']) + resultDict[key]['PCAHalf2'] = dr.runPCANormal(dataSets[key], pars, whichPC=0, testset =splits['Half']['Test']) + resultDict[key]['PCArankCorr'] = dr.rankCorrPCA(resultDict[key]) + #%% + ############################################### + # + # predict neural dynamics from behavior + # + ############################################## + if predPCA: + for kindex, key in enumerate(keyList): + print 'predicting behavior PCA' + splits = resultDict[key]['Training'] + resultDict[key]['PCAPred'] = dr.predictBehaviorFromPCA(dataSets[key], \ + splits, pars, behaviors) + #%% + ############################################### + # + # predict neural dynamics from behavior + # + ############################################## + if predNeur: + for kindex, key in enumerate(keyList): + print 'predicting neural dynamics from behavior' + splits = resultDict[key]['Training'] + resultDict[key]['RevPred'] = dr.predictNeuralDynamicsfromBehavior(dataSets[key], splits, pars) + plt.show() + #%% + ############################################### + # + # use agglomerative clustering to connect similar neurons + # + ############################################## + if hierclust: + for kindex, key in enumerate(keyList): + print 'running clustering' + resultDict[key]['clust'] = dr.runHierarchicalClustering(dataSets[key], pars) + #%% + ############################################### + # + # use behavior triggered averaging to create non-othogonal axes + # + ############################################## + if bta: + for kindex, key in enumerate(keyList): + print 'running BTA' + resultDict[key]['BTA'] =dr.runBehaviorTriggeredAverage(dataSets[key], pars) + #%% + ############################################### + # + # linear regression using LASSO + # + ############################################## + if lasso: + print "Performing LASSO.", + for kindex, key in enumerate(keyList): + + splits = resultDict[key]['Training'] + resultDict[key]['LASSO'] = dr.runLasso(dataSets[key], pars, splits, plot=0, behaviors = behaviors) + # calculate how much more neurons contribute + tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key],splits, pars, fitmethod = 'LASSO', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['LASSO'][tmpKey].update(tmpDict[tmpKey]) + + tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'LASSO', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['LASSO'][tmpKey]=tmpDict[tmpKey] + + # do converse calculation -- give it only the neurons non-zero in previous case + subset = {} + subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['LASSO']['Eigenworm3']['weights'])>0)[0] + subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['LASSO']['AngleVelocity']['weights'])>0)[0] + resultDict[key]['ConversePredictionLASSO'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'LASSO', subset = subset) + + + #%% + ############################################### + # + # linear regression using elastic Net + # + ############################################## + if elasticnet: + for kindex, key in enumerate(keyList): + print 'Running Elastic Net', key + splits = resultDict[key]['Training'] + resultDict[key]['ElasticNet'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors) + # calculate how much more neurons contribute + tmpDict = dr.scoreModelProgression(dataSets[key], resultDict[key], splits,pars, fitmethod = 'ElasticNet', behaviors = behaviors, ) + for tmpKey in tmpDict.keys(): + resultDict[key]['ElasticNet'][tmpKey].update(tmpDict[tmpKey]) + + tmpDict = dr.reorganizeLinModel(dataSets[key], resultDict[key], splits, pars, fitmethod = 'ElasticNet', behaviors = behaviors) + for tmpKey in tmpDict.keys(): + resultDict[key]['ElasticNet'][tmpKey]=tmpDict[tmpKey] + # do converse calculation -- give it only the neurons non-zero in previous case + subset = {} + subset['AngleVelocity'] = np.where(np.abs(resultDict[key]['ElasticNet']['Eigenworm3']['weights'])>0)[0] + subset['Eigenworm3'] = np.where(np.abs(resultDict[key]['ElasticNet']['AngleVelocity']['weights'])>0)[0] + resultDict[key]['ConversePredictionEN'] = dr.runLinearModel(dataSets[key], resultDict[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], fitmethod = 'ElasticNet', subset = subset) + + # run scrambled control + #print 'Running Elastic Net scrambled' + #resultDict[key]['ElasticNetRandomized'] = dr.runElasticNet(dataSets[key], pars,splits, plot=0, behaviors = behaviors, scramble=True) + + + #%% + ############################################### + # + # lag-time fits of neural activity + # + ############################################## + if lagregression: + for kindex, key in enumerate(keyList): + print 'Running lag calculation', key + splits = resultDict[key]['Training'] + #resultDict[key]['LagLASSO'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3)) + resultDict[key]['LagEN'] = dr.timelagRegression(dataSets[key], pars, splits, plot = False, behaviors = ['AngleVelocity', 'Eigenworm3'], lags = np.arange(-18,19, 3), flag='ElasticNet') + + #%% + ############################################### + # + # save data as HDF5 file + # + ############################################## + if save: + dh.saveDictToHDF(outLoc, resultDict) From 66baa05d9e41385a42f3d400df6cc627dd6a0d1e Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 31 Oct 2019 15:24:43 -0400 Subject: [PATCH 05/25] Changed GCAMP smoothing from 6 to 5. --- prediction/dataHandler.py | 3 ++- prediction/runAnalysis.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 911f46c..e60ddc6 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -318,7 +318,8 @@ def preprocessNeuralData(R, G, dataPars): # smooth with GCamp6 halftime = 1s RS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in R]) - GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G]) + GS =np.array([gaussian_filter1d(line,dataPars['windowGCamp']) for line in G]) + print("windowGCaMP:", str(dataPars['windowGCamp'])) # YR = GS/RS ## meansubtract = False#False#True diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 8058402..0f20af1 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -39,8 +39,8 @@ def actuallyRun(typ='AML32', condition = 'moving'): dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix - 'windowGCamp': 6, # gauss window for red and green channel - 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data + 'windowGCamp': 5, # gauss window for red and green channel + 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data } dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) From ea64fc8045d96dbde79521b116c777f3adcb0294 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 31 Oct 2019 15:33:37 -0400 Subject: [PATCH 06/25] Turned off periodogram and some of the other analyses that I don't think are particularly relevant.. --- prediction/runAnalysis.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 0f20af1..f334419 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -77,13 +77,13 @@ def actuallyRun(typ='AML32', condition = 'moving'): # ############################################## createIndicesTest = 1#True - periodogram = 1 - half_period = 1 + periodogram = 0 + half_period = 0 hierclust = 0 bta = 0 pca = 1#False - kato_pca = 1#False - half_pca = 1 + kato_pca = 0#False + half_pca = 0 corr = 1 predNeur = 0 predPCA = 0 @@ -95,7 +95,7 @@ def actuallyRun(typ='AML32', condition = 'moving'): if condition != 'immobilized': predNeur = 1 svm = 0 - lasso = 1 + lasso = 0 elasticnet = 1#True predPCA = 1 lagregression = 0 From 73091b2afbd46474d377a4a607dca57b789f83bd Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 7 Nov 2019 11:20:04 -0500 Subject: [PATCH 07/25] photobleaching correction by dividing off exponential fit I've added the photobleachign correction code. I am photobleaching following the logic outlined in Xiaowen Chen's Phys Rev E paper 2019. This is slightly different then what Jeff did in the 3dBrain MATLAB code. In that code he fit the exponential and then subtracted it off. Here I divide it off, but rescale the trace to match the original. Note: in its current state, the pipeline is not utilizing the photocorrected traces, but now at least I have the machinery to create it. --- prediction/dataHandler.py | 148 +++++++++++++++++++++++++++++++++++++- prediction/runAnalysis.py | 2 + 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index e60ddc6..ca2d881 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -4,6 +4,7 @@ load and compare multiple data sets for behavior prediction. @author: monika scholz """ +from __future__ import division # make division give you floats, as is standard for python3 import scipy.io import os import numpy as np @@ -257,8 +258,11 @@ def transformEigenworms(pcs, dataPars): for pcindex, pc in enumerate(pcs): # mask nans by linearly interpolating mask = np.isnan(pc1) + if np.any(mask): pc[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), pc1[~mask]) + import warnings + warnings.warn('Found ' + str(np.sum(mask)) + 'NaNs that are being interpolated over in the behavior PC1, PC2 or PC3.') pcs[pcindex] = pc theta = np.unwrap(np.arctan2(pcs[2], pcs[1])) @@ -382,7 +386,12 @@ def loadData(folder, dataPars, ew=1): ethomask = np.isnan(etho) if np.any(ethomask): etho[ethomask] = 0 - + + rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))] + gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))] + rPhotoCorr=correctPhotobleaching(rRaw,dataPars['volumeAcquisitionRate']) + gPhotoCorr = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate']) + #load neural data R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] @@ -558,3 +567,140 @@ def loadDictFromHDF(filePath): f.close() return d +def correctPhotobleaching(raw, vps=6): + """ Apply photobleaching correction to raw signals, works on a whole heatmap of neurons + + Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off + Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3 + Chen, Randi, Leifer and Bialek, Phys Rev E 2019 + + takes raw neural signals N (rows) neruons x T (cols) time points + we require that each neuron is a row and that there are more time points than rows + + """ + + # Make sure the data we are getting makes sense. + assert raw.ndim <= 2, "raw fluorescent traces passed to correctPhotobleaching() have the wrong number of dimensions" + + if raw.ndim == 2: + assert raw.shape[1] > raw.shape[ + 0], "raw fluorescent traces passed to correctPhotobleaching() have the wrong size! Perhaps it is transposed?" + + window_s = 12.6 # time of median filter window in seconds + medfilt_window = np.int(np.ceil(window_s * vps / 2.) * 2 + 1) # make it an odd number of frames + + photoCorr = np.zeros_like(raw) # initialize photoCorr + + showPlots = True + + + if raw.ndim == 2: + + # perform median filtering (Nan friendly) + from scipy.signal import medfilt + smoothed = medfilt(raw, [1, medfilt_window]) + + N_neurons = smoothed.shape[0] + + # initialize the exponential fit parameter outputs + popt = np.zeros([N_neurons, 3]) + pcov = np.zeros([3, 3, N_neurons]) + + # Exponential Fit on each neuron + for row in np.arange(N_neurons): + popt[row, :], pcov[:, :, row], xVals = fitPhotobleaching(smoothed[row, :], vps) + photoCorr[row, :] = popt[row, 0] * raw[row, :] / expfunc(xVals, *popt[row, :]) + + if showPlots: + import matplotlib.pyplot as plt + plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)]) + plt.plot(xVals, photoCorr[row, :], "r-", + label=['photoCorr, row: '+np.str(row)]) + plt.xlabel('Time (s)') + plt.ylabel('ActivityTrace') + plt.legend() + plt.show() + + elif raw.ndim == 1: + smoothed = medfilt(raw, medfilt_window) + popt, pcov, xVals = fitPhotobleaching(smoothed, vps) + photoCorr= popt[0] * raw / expfunc(xVals, *popt) + + else: + raise ValueError('The number dimensions of the data in correctPhotobleaching are not 1 or 2') + + return photoCorr + + +def fitPhotobleaching(activityTrace, vps): + """" + Fit an exponential. + + Accepts a single neuron's activity trace. + + Use Xiaowen's photobleaching correction method which fits an exponential and then divides it off + Note we are following along with the explicit noise model from Xiaowen's Phys Rev E paper Eq 1 page 3 + Chen, Randi, Leifer and Bialek, Phys Rev E 2019 + + + """ + assert activityTrace.ndim == 1, "fitPhotobleaching only works on single neuron traces" + + # Now we will fit an exponential, following along with the tuotiral here: + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html + xVals = np.array(np.arange(activityTrace.shape[-1])) / np.float(vps) + + # Identify just the data that is not a NaN + nonNaNs = np.logical_not(np.isnan(activityTrace)) + + from scipy.optimize import curve_fit + + # set up some bounds on our exponential fitting parameter, y=a*exp(bx)+c + num_recordingLengths = 6 # the timescale of exponential decay should not be more than six recording lengths long + # because otherwise we could have recorded for way longer!! + + bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0], # lower bounds + [max(activityTrace[nonNaNs]), 0.5, 2 * np.nanmean(activityTrace)]) # upper bound + + # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average + popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)] + + popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess, + bounds=bounds) + + showPlots = False + if showPlots: + import matplotlib.pyplot as plt + plt.plot(xVals, activityTrace, 'b-', label='data') + plt.plot(xVals, expfunc(xVals, *popt), "r-", + label='fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) + plt.xlabel('Time (s)') + plt.ylabel('ActivityTrace') + plt.legend() + plt.show() + + ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those + residual = xVals - expfunc(xVals, *popt) # its ok to have nan's here + + nSigmas = 3 # we want to exclude points that are three standard deviations away from the fit + + # Make a new mask that excludes the outliers + excOutliers = np.copy(nonNaNs) + excOutliers[np.nonzero(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False + + # Refit excluding the outliers, use the previous fit as initial guess + popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt, + bounds=bounds) + if showPlots: + plt.plot(xVals, expfunc(xVals, *popt), 'y-', + label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) + plt.legend() + + return popt, pcov, xVals + + +def expfunc(x, a, b, c): + # type: (xVals, a, b, c) -> yVals + return a * np.exp(-b * x) + c + + diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index f334419..ea31017 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -1,3 +1,4 @@ +from __future__ import division #give me floating point when I divide (standard in python3) # standard modules import numpy as np @@ -41,6 +42,7 @@ def actuallyRun(typ='AML32', condition = 'moving'): 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix 'windowGCamp': 5, # gauss window for red and green channel 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data + 'volumeAcquisitionRate': 6., #rate at which volumes are acquired } dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) From d26ead8743e8c708fd67e6b088c3e8637fa1adad Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 7 Nov 2019 11:20:49 -0500 Subject: [PATCH 08/25] Adjusted the gaussian window used to calculate theta dot from 100 to 75 --- prediction/runAnalysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index ea31017..8b3dc82 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -38,7 +38,7 @@ def actuallyRun(typ='AML32', condition = 'moving'): # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd - 'gaussWindow':100, # sgauss window for angle velocity derivative. must be odd + 'gaussWindow': 75, # gaussianfilter1D is uesed to calculate theta dot from theta in transformEigenworms 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix 'windowGCamp': 5, # gauss window for red and green channel 'interpolateNans': 1,#interpolate gaps smaller than this of nan values in calcium data From 44aeca5fe7f49f3ad0551f331582ba850aea7237 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 7 Nov 2019 16:13:37 -0500 Subject: [PATCH 09/25] Improved robustness and fixed errors in photobleaching correction Now I can run the analysis all the way through and generate fig 2. Performance however is extremely low, probably because by using the raw traces I am skipping over some outlier removal that Jeff must have been taking care of. Also turned of the PCARaw analysis which seems to be unused and was throwing an error. --- prediction/dataHandler.py | 41 +++++++++++++++++++++++++---------- prediction/metaRunAnalysis.py | 6 ++++- prediction/runAnalysis.py | 2 +- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index ca2d881..67f1eec 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -389,12 +389,20 @@ def loadData(folder, dataPars, ew=1): rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))] gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))] - rPhotoCorr=correctPhotobleaching(rRaw,dataPars['volumeAcquisitionRate']) - gPhotoCorr = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate']) + #load neural data - R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] - G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] +# R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] +# G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] + + R = correctPhotobleaching(rRaw, dataPars['volumeAcquisitionRate']) + G = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate']) + + if np.any(np.isnan(R)): + print("New rPHotoCorr has some NaNs in it ") + if np.any(np.isnan(G)): + print("New gPHotoCorr has some NaNs in it ") + # Ratio = np.array(data['Ratio2'])[:,:len(np.array(data['hasPointsTime']))] Y, dR, GS, RS, RM = preprocessNeuralData(R, G, dataPars) @@ -591,7 +599,7 @@ def correctPhotobleaching(raw, vps=6): photoCorr = np.zeros_like(raw) # initialize photoCorr - showPlots = True + showPlots = False if raw.ndim == 2: @@ -613,7 +621,7 @@ def correctPhotobleaching(raw, vps=6): if showPlots: import matplotlib.pyplot as plt - plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)]) + plt.plot(xVals, smoothed[row, :], 'b-', label=['raw, row: '+np.str(row)]) plt.plot(xVals, photoCorr[row, :], "r-", label=['photoCorr, row: '+np.str(row)]) plt.xlabel('Time (s)') @@ -659,8 +667,12 @@ def fitPhotobleaching(activityTrace, vps): num_recordingLengths = 6 # the timescale of exponential decay should not be more than six recording lengths long # because otherwise we could have recorded for way longer!! + #Scale the activity trace to somethign around one. + scaleFactor=np.nanmean(activityTrace) + activityTrace=activityTrace/scaleFactor + bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0], # lower bounds - [max(activityTrace[nonNaNs]), 0.5, 2 * np.nanmean(activityTrace)]) # upper bound + [max(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)]) # upper bound # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)] @@ -680,22 +692,29 @@ def fitPhotobleaching(activityTrace, vps): plt.show() ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those - residual = xVals - expfunc(xVals, *popt) # its ok to have nan's here + residual = activityTrace - expfunc(xVals, *popt) # its ok to have nan's here nSigmas = 3 # we want to exclude points that are three standard deviations away from the fit # Make a new mask that excludes the outliers excOutliers = np.copy(nonNaNs) - excOutliers[np.nonzero(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False + excOutliers[(np.abs(residual) > (nSigmas * np.nanstd(residual)))] = False # Refit excluding the outliers, use the previous fit as initial guess - popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt, - bounds=bounds) + # note we relax the bounds here a bit + + try: + popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt,bounds=bounds) + except: + popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt) + if showPlots: plt.plot(xVals, expfunc(xVals, *popt), 'y-', label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) plt.legend() + #rescale back to full size + popt[0]=scaleFactor*popt[0] return popt, pcov, xVals diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index 0882f6d..f4bafb5 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -10,16 +10,20 @@ import runAnalysis as analysis + + +print("Doing the Moving Worms") analysis.actuallyRun('AML32','moving') analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') - +print("Doing the Immobilized Worms") analysis.actuallyRun('AML32','immobilized') analysis.actuallyRun('AML18','immobilized') analysis.actuallyRun('AML70','immobilized') +print("Doing worms in Chips") analysis.actuallyRun('AML32','chip') analysis.actuallyRun('Special','transition') #Note these are identicail.. # I copied and pasted the input files.. diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 8b3dc82..5de3119 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -182,7 +182,7 @@ def actuallyRun(typ='AML32', condition = 'moving'): print 'running PCA' for kindex, key in enumerate(keyList): resultDict[key]['PCA'] = dr.runPCANormal(dataSets[key], pars) - resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) + # resultDict[key]['PCARaw'] = dr.runPCANormal(dataSets[key], pars, useRaw=True) #correlate behavior and PCA From 5242d487a5cc2ec70290faf987ba88ee1d5fa864 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 8 Nov 2019 20:34:46 -0500 Subject: [PATCH 10/25] Place holder for outlier detection and NaNing Plan is to use a Hampel filter but replacing the outliers with NaNs. See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d --- prediction/dataHandler.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 67f1eec..df7cb25 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -395,13 +395,13 @@ def loadData(folder, dataPars, ew=1): # R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] # G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] - R = correctPhotobleaching(rRaw, dataPars['volumeAcquisitionRate']) - G = correctPhotobleaching(gRaw, dataPars['volumeAcquisitionRate']) + vps=dataPars['volumeAcquisitionRate'] + R = correctPhotobleaching(rRaw, vps) + G = correctPhotobleaching(gRaw, vps) + + R = nanOutliers(rRaw,np.round(3.3*vps)) + G = nanOutliers(gRaw,np.round(3.3*vps)) - if np.any(np.isnan(R)): - print("New rPHotoCorr has some NaNs in it ") - if np.any(np.isnan(G)): - print("New gPHotoCorr has some NaNs in it ") # Ratio = np.array(data['Ratio2'])[:,:len(np.array(data['hasPointsTime']))] @@ -640,6 +640,7 @@ def correctPhotobleaching(raw, vps=6): return photoCorr + def fitPhotobleaching(activityTrace, vps): """" Fit an exponential. @@ -722,4 +723,18 @@ def expfunc(x, a, b, c): # type: (xVals, a, b, c) -> yVals return a * np.exp(-b * x) + c +def nanOutliers(trace, windowSize): + """ + Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with the mean. + + This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. + See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d + + + :param trace: + :param windowSize: + :return: + """ + print("Need to replace outliers with NaNs") + return trace From f5570a0a54ac1ddaef21e5e4201ed8c4bb880e43 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 8 Nov 2019 20:38:58 -0500 Subject: [PATCH 11/25] typo --- prediction/dataHandler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index df7cb25..5b4d569 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -725,7 +725,7 @@ def expfunc(x, a, b, c): def nanOutliers(trace, windowSize): """ - Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with the mean. + Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. From 28adc7cb03ef00481d1eedcd63a51038f0c2b8e4 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 14 Nov 2019 14:50:46 -0500 Subject: [PATCH 12/25] nanOutliers function. no errors, but not fully tested. added a nanoutliers funciotn that uses a modified hampel filter to replace 3sigma outliers in a specificed framewindow size (here 3.3s, or 20 volumes) with NaNs. The functions run without error. but i haven't fully tested the results. Note that the function is super slow and seems to seriously slow down the analysis. --- prediction/dataHandler.py | 62 ++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 5b4d569..c868f8a 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -399,8 +399,8 @@ def loadData(folder, dataPars, ew=1): R = correctPhotobleaching(rRaw, vps) G = correctPhotobleaching(gRaw, vps) - R = nanOutliers(rRaw,np.round(3.3*vps)) - G = nanOutliers(gRaw,np.round(3.3*vps)) + R = nanOutliers_allNeurons(rRaw,np.round(3.3*vps)) + G = nanOutliers_allNeurons(gRaw,np.round(3.3*vps)) # @@ -723,18 +723,66 @@ def expfunc(x, a, b, c): # type: (xVals, a, b, c) -> yVals return a * np.exp(-b * x) + c -def nanOutliers(trace, windowSize): + + + +def nanOutliers_allNeurons(multiNeurons, window_size, n_sigmas=3): + """ + Run nanOutliers on many neurons + + Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. + + This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. + :return: """ - Use a Hamdel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. + assert multiNeurons.ndim == 2, '2D input data is expected' + assert multiNeurons.shape[1]>multiNeurons.shape[0], 'we expect more time points than neurons' + + new_multiNeurons=np.copy(multiNeurons) + + for i in np.arange(multiNeurons.shape[0]): + new_multiNeurons[i,:]=nanOutliers(multiNeurons[i,:],window_size,n_sigmas)[0] + return new_multiNeurons + + + + +def nanOutliers(input_series, window_size, n_sigmas=3): + """ + Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d - :param trace: + + + + :param input_series: :param windowSize: + :param n_sigmas=3: :return: """ - print("Need to replace outliers with NaNs") - return trace + assert input_series.ndim == 1, 'input dimension greater than 1! nanOutliers can only deal with 1D data' + + window_size = np.int(np.round(window_size)) + + #The following is identical to here: https://gist.githubusercontent.com/erykml/d15525855f2ef455bd7969240f6f4073/raw/c17741cdef7f69cebd3ddae191ecdd7db95d4051/hampel_filter_forloop.py + #Except for using nanmedian instead of median, and replacing with nans instead of with teh median + n = len(input_series) + new_series = input_series.copy() + k = 1.4826 # scale factor for Gaussian distribution + + indices = [] + thresh=0.0 + + # possibly use np.nanmedian + for i in range((window_size), (n - window_size)): + x0 = np.nanmedian(input_series[(i - window_size):(i + window_size)]) + S0 = k * np.nanmedian(np.abs(input_series[(i - window_size):(i + window_size)] - x0)) + if np.any(np.abs(input_series[i] - x0) > n_sigmas * S0): + new_series[i] = np.nan #this line is different + indices.append(i) + + return new_series, indices From d42483dfe25581bf0f74778eeda7448fdbb12503 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 14 Nov 2019 17:03:23 -0500 Subject: [PATCH 13/25] Now pipeline uses raw, does photobleaching, and outlier removal Needs to test. --- prediction/dataHandler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index c868f8a..d847faa 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -399,8 +399,8 @@ def loadData(folder, dataPars, ew=1): R = correctPhotobleaching(rRaw, vps) G = correctPhotobleaching(gRaw, vps) - R = nanOutliers_allNeurons(rRaw,np.round(3.3*vps)) - G = nanOutliers_allNeurons(gRaw,np.round(3.3*vps)) + R = nanOutliers_allNeurons(R,np.round(3.3*vps)) + G = nanOutliers_allNeurons(G,np.round(3.3*vps)) # From 467a92b9d05c7b4ae5995faeec0b51059b83f675 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 11:17:56 -0500 Subject: [PATCH 14/25] Made an if statement to turn off new photobleaching and outlier detection Code should be functionally the same as the state Monika left it for the latest manuscript. Note fig 2v3 still generates 0.44 R2 instaed of 0.49 as expected (but still consistent with how I recieved the code in early june). --- prediction/dataHandler.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index d847faa..bdff394 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -392,15 +392,17 @@ def loadData(folder, dataPars, ew=1): #load neural data -# R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] -# G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] - - vps=dataPars['volumeAcquisitionRate'] - R = correctPhotobleaching(rRaw, vps) - G = correctPhotobleaching(gRaw, vps) + original=True + if original: + R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] + G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] + else: + vps = dataPars['volumeAcquisitionRate'] + R = correctPhotobleaching(rRaw, vps) + G = correctPhotobleaching(gRaw, vps) - R = nanOutliers_allNeurons(R,np.round(3.3*vps)) - G = nanOutliers_allNeurons(G,np.round(3.3*vps)) + R = nanOutliers_allNeurons(R,np.round(3.3*vps)) + G = nanOutliers_allNeurons(G,np.round(3.3*vps)) # From 3830991dab494d0f24a9ba68c8398dbf992e41e7 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 11:18:39 -0500 Subject: [PATCH 15/25] Changed folder directories of data quality check to make it run. --- prediction/dataQualityCheck.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py index 314120e..500e85a 100755 --- a/prediction/dataQualityCheck.py +++ b/prediction/dataQualityCheck.py @@ -16,17 +16,22 @@ # ############################################### typ = 'AML32' # possible values AML32, AML18, AML70 -condition = 'immobilized'# # Moving, immobilized, chip +condition = 'moving'# # Moving, immobilized, chip first = True # if true, create new HDF5 file ############################################### # # load data into dictionary # ############################################## -folder = '../{}_{}/'.format(typ, condition) -dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition) +#folder = '../{}_{}/'.format(typ, condition) +#dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) +#outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition) +#outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition) + +folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) +dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) +outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) +outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd @@ -88,7 +93,7 @@ periodogram = 0 nestedvalidation = 0 lasso = 0 -elasticnet = 0#True +elasticnet = 1#True lagregression = 0 positionweights = 0#True resultsPredictionOverview = 0 From 1846ecc7632377c2da7ad0ee8a442a8420ef647e Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 11:19:24 -0500 Subject: [PATCH 16/25] Adjusted the parameters back to the original --- prediction/runAnalysis.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 5de3119..467a212 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -45,6 +45,16 @@ def actuallyRun(typ='AML32', condition = 'moving'): 'volumeAcquisitionRate': 6., #rate at which volumes are acquired } + + #original data parameters + dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd + 'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data + 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix + 'windowGCamp': 6, # gauss window for red and green channel + 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data + } + + dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) keyList = np.sort(dataSets.keys()) if save: @@ -57,7 +67,7 @@ def actuallyRun(typ='AML32', condition = 'moving'): resultDict[key]['pars'] = dataPars # analysis parameters - pars ={'nCompPCA':20, # no of PCA components + pars ={'nCompPCA':10, # no of PCA components 'PCAtimewarp':False, #timewarp so behaviors are equally represented 'trainingCut': 0.6, # what fraction of data to use for training 'trainingType': 'middle', # simple, random or middle.select random or consecutive data for training. Middle is a testset in the middle @@ -71,6 +81,8 @@ def actuallyRun(typ='AML32', condition = 'moving'): 'periods': np.arange(0, 300) # relevant periods in seconds for timescale estimate } + + behaviors = ['AngleVelocity', 'Eigenworm3'] ############################################### From a3e24f93bbabc947ffe91b7e7e7d54ea68b836c3 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 11:19:47 -0500 Subject: [PATCH 17/25] Set meta run analysis to only deal with moving worms for now --- prediction/metaRunAnalysis.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index f4bafb5..4f162c5 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -14,24 +14,25 @@ print("Doing the Moving Worms") analysis.actuallyRun('AML32','moving') -analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') +analysis.actuallyRun('AML18','moving') -print("Doing the Immobilized Worms") -analysis.actuallyRun('AML32','immobilized') -analysis.actuallyRun('AML18','immobilized') -analysis.actuallyRun('AML70','immobilized') +if True: + print("Doing the Immobilized Worms") + analysis.actuallyRun('AML32','immobilized') + analysis.actuallyRun('AML18','immobilized') + analysis.actuallyRun('AML70','immobilized') -print("Doing worms in Chips") -analysis.actuallyRun('AML32','chip') -analysis.actuallyRun('Special','transition') #Note these are identicail.. - # I copied and pasted the input files.. + print("Doing worms in Chips") + analysis.actuallyRun('AML32','chip') + analysis.actuallyRun('Special','transition') #Note these are identicail.. + # I copied and pasted the input files.. -analysis.actuallyRun('AML70','chip') + analysis.actuallyRun('AML70','chip') -execfile('../figures/main/fig2v3.py') -execfile('../figures/supp/S4.py') -execfile('../figures/supp/S2.py') -execfile('../figures/main/fig1v3.py') -execfile('../figures/main/fig2_expVarslm.py') +#execfile('../figures/main/fig2v3.py') +#execfile('../figures/supp/S4.py') +#execfile('../figures/supp/S2.py') +#execfile('../figures/main/fig1v3.py') +#execfile('../figures/main/fig2_expVarslm.py') From 3f0e900b5f733d4e0688153ed266db0c359fc0f0 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 11:27:10 -0500 Subject: [PATCH 18/25] Doing some tests to find out why fig2 only generates R2=0.44, no 0.49 Made minimum changes necessary to run metaRunAnalysis and regenerate the analysis for AML32 moving. And also to run dataQualityCheck. Code should otherwise be as Monika left it in June. --- prediction/dataQualityCheck.py | 10 +++++----- prediction/metaRunAnalysis.py | 32 +++++++++++++++++--------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py index 314120e..88762b5 100755 --- a/prediction/dataQualityCheck.py +++ b/prediction/dataQualityCheck.py @@ -16,17 +16,17 @@ # ############################################### typ = 'AML32' # possible values AML32, AML18, AML70 -condition = 'immobilized'# # Moving, immobilized, chip +condition = 'moving'# # Moving, immobilized, chip first = True # if true, create new HDF5 file ############################################### # # load data into dictionary # ############################################## -folder = '../{}_{}/'.format(typ, condition) -dataLog = '../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "../Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "../Analysis/{}_{}.hdf5".format(typ, condition) +folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) +dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) +outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) +outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index 0882f6d..a0c51b9 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -11,23 +11,25 @@ import runAnalysis as analysis analysis.actuallyRun('AML32','moving') -analysis.actuallyRun('AML18','moving') -analysis.actuallyRun('AML70','moving') -analysis.actuallyRun('AML175','moving') +if False: + analysis.actuallyRun('AML18','moving') + analysis.actuallyRun('AML70','moving') + analysis.actuallyRun('AML175','moving') -analysis.actuallyRun('AML32','immobilized') -analysis.actuallyRun('AML18','immobilized') -analysis.actuallyRun('AML70','immobilized') -analysis.actuallyRun('AML32','chip') -analysis.actuallyRun('Special','transition') #Note these are identicail.. - # I copied and pasted the input files.. + analysis.actuallyRun('AML32','immobilized') + analysis.actuallyRun('AML18','immobilized') + analysis.actuallyRun('AML70','immobilized') -analysis.actuallyRun('AML70','chip') + analysis.actuallyRun('AML32','chip') + analysis.actuallyRun('Special','transition') #Note these are identicail.. + # I copied and pasted the input files.. -execfile('../figures/main/fig2v3.py') -execfile('../figures/supp/S4.py') -execfile('../figures/supp/S2.py') -execfile('../figures/main/fig1v3.py') -execfile('../figures/main/fig2_expVarslm.py') + analysis.actuallyRun('AML70','chip') + + execfile('../figures/main/fig2v3.py') + execfile('../figures/supp/S4.py') + execfile('../figures/supp/S2.py') + execfile('../figures/main/fig1v3.py') + execfile('../figures/main/fig2_expVarslm.py') From 8745a9335b2ead2a278dbbb63e963f0d3b3a8bf6 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 15 Nov 2019 14:05:48 -0500 Subject: [PATCH 19/25] updated stamp on figure w/ provenance --- figures/main/fig2v3.py | 6 ++++++ prediction/metaRunAnalysis.py | 5 +---- prediction/provenance.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py index 06ca0cd..dcd64c6 100755 --- a/figures/main/fig2v3.py +++ b/figures/main/fig2v3.py @@ -218,6 +218,7 @@ ax7.text(-140, 0, 'Velocity\n(rad/s)', color = R1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center') ax8.text(-160, 0, 'Body \n curvature \n(a.u.)', color = B1, rotation=0, verticalalignment='center', fontsize=12, multialignment='center') + # move axis to the right ax7.yaxis.tick_right() ax8.yaxis.tick_right() @@ -493,6 +494,11 @@ plt.show() # get all the weights for the different samples + +import prediction.provenance as prov +prov.stamp(axV,.1,.3,'branch: minimumLocal') + + #for typ, colors, ax in zip(['AML32', 'AML18'], [colorsExp, colorCtrl], [ax11, ax12]): # for condition in ['moving', 'immobilized']: # key = '{}_{}'.format(typ, condition) diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index a0c51b9..54436b3 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -11,13 +11,10 @@ import runAnalysis as analysis analysis.actuallyRun('AML32','moving') - -if False: +if True: analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') - - analysis.actuallyRun('AML32','immobilized') analysis.actuallyRun('AML18','immobilized') analysis.actuallyRun('AML70','immobilized') diff --git a/prediction/provenance.py b/prediction/provenance.py index 769b8d7..ba67b98 100644 --- a/prediction/provenance.py +++ b/prediction/provenance.py @@ -25,7 +25,7 @@ def getStampString(): return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath) -def stamp(ax, x,y,notes): +def stamp(ax, x,y,notes=''): """ :param ax: matplotlib axes h :param x: fractional location in the plot From bed516f98d0961e380e3789bef203a917470bea4 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 21 Nov 2019 11:04:35 -0500 Subject: [PATCH 20/25] Now updated the provenance module to include branch name --- figures/main/fig2v3.py | 2 +- prediction/provenance.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py index dcd64c6..e01ff3a 100755 --- a/figures/main/fig2v3.py +++ b/figures/main/fig2v3.py @@ -496,7 +496,7 @@ import prediction.provenance as prov -prov.stamp(axV,.1,.3,'branch: minimumLocal') +prov.stamp(axV,.1,.3) #for typ, colors, ax in zip(['AML32', 'AML18'], [colorsExp, colorCtrl], [ax11, ax12]): diff --git a/prediction/provenance.py b/prediction/provenance.py index ba67b98..858163e 100644 --- a/prediction/provenance.py +++ b/prediction/provenance.py @@ -19,11 +19,11 @@ def getStampString(): hash = repo.head.object.hexsha gitpath = repoPath = repo._working_tree_dir giturl = repo.remotes.origin.url - + gitbranch = repo.active_branch from datetime import datetime timestamp = datetime.today().strftime('%Y-%m-%d %H:%M') - return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath) + return str(timestamp + '\n' + str(hash) + '\n' + giturl + '\n' + gitpath + '\nBranch: ' + str(gitbranch)) def stamp(ax, x,y,notes=''): """ From 4c6b01c81c197ea003a2ed00ef295e2c311dd357 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Thu, 21 Nov 2019 15:24:23 -0500 Subject: [PATCH 21/25] testing hampel filter with debug output --- prediction/dataHandler.py | 10 +++++++++- prediction/dataHandler.pyc | Bin 20534 -> 27715 bytes prediction/metaRunAnalysis.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index bdff394..3d36ecd 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -392,7 +392,7 @@ def loadData(folder, dataPars, ew=1): #load neural data - original=True + original=False if original: R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] @@ -787,4 +787,12 @@ def nanOutliers(input_series, window_size, n_sigmas=3): new_series[i] = np.nan #this line is different indices.append(i) + Debug = True + if Debug: + import matplotlib.pyplot as plt + plt.plot(input_series,'r') + plt.plot(new_series,'ob') + plt.show() + + return new_series, indices diff --git a/prediction/dataHandler.pyc b/prediction/dataHandler.pyc index c3e868c1c279434ffc57e225ae43d34416a8af68..c453db316763fbeff1ace772180051e5d7224371 100644 GIT binary patch delta 11894 zcmcIqdvIJ=c|T`YT3atmwpP#8M>H0S0b51lmG6?Er-q=%g)#6etuzfetAY3MIo(CUNloSB|NhVC3_#nXr1(Go1xtra~oKRH{|Yw5n7@ z%|y@)Q&y>mw3=yCcDu5wCEcN{0cF)nx>H$o>LIMpP5+Iw~2acy`%2(;;lR~h^U=zj!{=*Uw; zWuMUa`(gRF1G>li^7dzRi}$9A`Es6?Deu24_MzoirP&s_tQEelQcEGVUe0tlkCaxq zTfKKwo(dSs`)TFffiC4uSB(c}^;xg3YN*=Xrk3i|Wrf}A_C8VdIOaH8{Y;vpN5LMt zW5jiQ1FCtwOmUX5tJP9~g;nc8zhqh#G!^AJHp@tAL0o3-wQRJUCF3mNx(^FYxoqdg zR>sU~?}3_oz|S{o9t{n!X^ofMk&G_9NKwy|^QyE4|9v z_NrDEw2`!Xduyi$dXRKOAc>T-m|2b1G0*;Yn^Hv98>+aX4w_ev#)2!=sy{^$Ufl#p7dsp3lZ9Mh$x`=M5XTw`axN5V7(eVFw!Ml(|_-nZ(1SHI3nhYhrPWB6;j)>~@$ zf_~haZ0teFy^U|}66tP3>trToxofVOX3q>smPC84O;2?HA(Bodhzf*sW1uzApyS*U zN`YtZsPWX7np%ST2JgkzJ$k#>7&%yy|J{*6-RnIP>Bi)L8rfebf{mb0NxCmaI>Y2y z#N}jit~tlr9y7Wngo%xlC8PQq`40BsOy=7`WJJe} z#g~PPc*f_(Nu*5*)bIDlY%MF_I_te%kLs^^-|OlK8Q78i+IxF<^HaUupV8irds>l2F_S!# zNZIBaal_1z`5=nE?R~koXQT=#-ixD#FeF#}Tu|i$Di2zw*4|mX_qgxJ&OhPp+1;n# z<4x_}J6g0$A(h{*)?sO2b%Io7R0nVSV}l@j4M~rCcHed38jR-hfZFMml$}FooYW8WHJm}!_pD&+C5J5 z{~+n`4yr(Hpas8LJ*KPm9`7T2T5y-XvgZk{cY4|WI|jJx=Ico0(8#f(G%m`?^-r^? z<6iH({lj<47&8PV%s~dthkb43eTXop>^q+barl+N@?Nb>~G{LBdL#qu%f!Sexi z;-tIHd+o@+LapYJ-X?Y!7esEy0uDAn8*FkwONYD*qvHjKo4A@tTbb1pM`L%Ko=Akb zaM6I=+UO2ud1%ZJk?>TTA0`nY@`#!rA^9qZOVg6Ib8&1(B5BK>$+01(v25QCQm^`& zGagHNHDlWvHs${@j`T^Ab6)@0@7-_%Sa3R!S3#a{{DLETL^ssc>XBfF?hH2QZru|U z8@3OvIKL>cZ*6<(=VLGEs(O`=sPzbRXu$jC_(V;ca(AeuoeD6;s`oxHK2m5`>3c=d zgjGwvL%CtK-lEn!urn%;*2uP&vhB)kQ0x8>jl`j77RGKAn<9@E82;)yzto?!QO*a^ zXN`Io=f(TjxKZd-ZW9Maf2h36L3OTN)iR9bxoLu$E9 z<(>;}_CtlJXdLD}!V3i3r`Gq#^7|C<#@Y1F4Oae*tZ?E5sN_yqj2l!wD&2hfK_(q) z9S|L(6mT2bIIRW@2S-Ry^Rp4^TTT0|H3oEJS_)M zatVhUEeu#{EQ2R+1lw(FZ1>m8vEA5e6vItA0Xux+BzGSmEuSK=* zuZqUFs`fh6_H0aa06BP~r6YV*+v`z_Ggn-}1ac!Axf_tfDHv4)vb;%T{cYl655mEP zN-1`{5p|=0SBJcx9Y};8fXX%R_w4Jt3)pp)5Q~sUY6{1JpIdGcpp!FWtLFQVm)rt! zVl8r~7c*|=XeO7F?80WY0d0X+0jdtiFRZ{NcHtD8z*h_N@~ON@Obbd9nZ3le-WRUF z4W3K;#8>ng?>{GgtOvXod)f;G(*k{OIdyX`m&y4^;z5*}y(9$^=m}*$OR`L|Y44t9 zR!kkyli01}v7}?e62Qv<+i)y|=eeXEi!TD#xs9kv$I_=(++@PejX1NhWOB-0$z{@x z^v@B7nm^!hg!i}Emwdj&@ZRDrO}6SsyoV?E+%9-&7EMgLfzmkP%qP+bU`+7>Qmp$R z35}}x9grx^py`s(n2Iw5kH_+UF}be}_f45^AbBInZ+nLi))zb`KT7f`k`a=>CHXkX zlO#(ZGQI6CW>PU{+591ke^e^YW!-K56*CmZT+E`V9N7P74-V=_y`LQXqmWG$@HgJuZX9R|OzO$9kPhideJD@~ zKv0YDNL!irnHwJi-n{G3;Xtd%LpJa}&hrrxo&k9De|fl1f6ROS@IdGs3tad>)80Kt zn)Q#o)guO+zbB5oFEq}oectUyqvO!?TYYF5R6O!OK}A%c>iulucVX&g?_EbP>tA`- z9P7q#Q^!76CrAoaN&+J0oY#NzYdW@L;o_30;4ZyO$GLrF_~8@-zj)8x{K?^4kd$K! z#{k&L+4Mq1w%M3v_E5HLXWr?3{P=;YUou(DAHHMSd$Os_`>hi@0(Ajz`9wSTdF_e2 zC%FhY6}ee|06BK`rP*?}vlz?Te%$6RPQX#6!&=sRp!c-Vg~BHCPlgpxnQ+g|tH2Kt zgg2ag6qej;ymdl%dw1UY9fYF}+_n$Xzu{!lQxDxXs~ZQ6AImbfaX*ePzJOt)E$s$R@JAB8y9`W9NN3@XOP6!t}knp(xf8PL;P)Rtv7-d;2 zCqi!pP#wnaLA|9apmNia1s)4R@uCk>WvT@b$eET-5TX*A>g+Xpnx7Qo#j`~eSd%sE`q1qRSLL1q%`q5CJNH#E`VPc zIT#awf1fS73m6r+9{nm+flV+MyaXv_&{gUdf>K_o*1n`!RdOkgoxfM-tHj5~uvHAv z;#)*dSP14jhq>W@Us*Q^-+H6%Mq<5CMR$8L@4M{iQs&z)NdL^5m+Oca?oJLSqtr8wTg()e!o-1qW7W7TtV9^x`JU5N4uJO71iVI0n8 zlb4JvZR&cDBjIAUd6{J@610!weK8E1@WJd;F`i-4Pv z%^<$y$kbS1&IY7Go03#Z^^p-Z&iK%|RYVHbc`77WC+<{%iH1`G41 z`CP);jN3R1oT~8uG?$aO&+al#+c_8@ITerv_)a_7446K&FfO0|n>Z;yHzgR_i)sQm zv6Ly>nolHMfuK_kZR*^RNy8)|Z-``&lASQ5DmUaXNGn%`@Uv9M_Kc7kQKuj33J)jZ zE}|Fir86SJTUib9lq3VO6pC`8lqfkX=EfL0d|468My4|^4kLnyd!tPlPY^NAVG2$q z=ED}{h;@BbDE3^C$LmW?<2F`Vi5x?U&TJu>ShmepTz}{jEH<9VUK;jqxCtLjnPTAy z7+oSt+{vo|E6pt~)%;32E-g|qH=E1=z0M71FEJ%R+fSP9XzYV3gDC`O3d-zcvIj)i zx+|7kv4wky9zmhXfi#E6BIWDHn`RwZZ*kXV{Xz+)ZfuI=_?+f6xNu}OGJuoHWFTrw z>PMz#i>gWlnayNfKM~LPfk|w-0Pi(0=9sOFd>70Jm^nrma}yX+H4y|*>vRhqi$oDj z4Fzk1b-E!?g8)-=uuMGCO+`dl{$s=2mmm8_+yr$ww5ql|H|4zcgB*J9F36~$>P6bvP-I1m8LHKRTfD`+gWEgJ3u z%r$z3iN9Ww>P>6ot)%Cy>p$-v(Z@ zWW#`xla~+)Dar$wu~rN`|0s|OVwkJsaW*}q5LPMmd1)6cYD0L8wor8M4Q$A2E9;0> zQY4KU`aKR!@FV2uw{sH~i6T%2yPW5;2GuQcw%z8yLo zkK0*Ve%QzK0#Jp2KLH$KMP*v>EmE=luK^dY5Vk1%rr_cg!WM-MbmD^xk*63|XCH+8 z5*-bQ23)s+)~*n1(YP0_A$p5ejIZP_+Orr)yrTq)Rnc(%;?M$gHb4J*NDu?gEezqP zX!$t?C!8y+r)iQ6xkAI0F6Lyy0(f_{(CRGvgHRUIiC3^oVqf}yfnT^VTAL&3p7I|4Bb z%hc)?9nm|BCbA}2PgAP<%7c1T2lW8jNG+lsafS~{KQWo)3`Wm|HH7{Q;9U~IJkvfUOOXBB;0&lv!c(Anq%iDX{K?_Fe;Qf?Qv*}+l?jT z(T_NO@V>9CM z#2riGfj6B2#1!$wyLk6-hgV$SSI4zuD4mC|f`_SbUQa--)TSQ>;FgHNhw|&iYbe&@ zc>=zqZ=+;RBK?T3#5m#?^ZUTe$)6lXDDE(3RkTF&?D z-~wIkkoj)iFnv>5-$c%KS90kIKj(WwEmU)Rk6|%cEXl{^4xLd1ZNWT*=)X4P@H)UN z^K1SOKdXo_r~K&Drn=b#=n}6LoLz**VW?aK!bje4Wh%NzU0-WM4OQI>{v2u*y z=L?7rK^mHf33G&qi>Ku!CFk+d#e5~NTY>lSW$)W7e%%D$6^v=yo-h_&H|tD{jJO%R z#9}$TUNE6{I&Kf+b%~LpJ{huX7uOB`><}Y3LyMw;hJ58SbYQ<_&F|kk9=~qfs+12e z^aS3(i>3T)I}2|(VLKE4C6|fr-~neU{mG*vN)|u zW#;@B5;v8_h9hsbuC|)MTY;)w=hZm*GA=UdQ=e3F?()hf%w6D zu;z5Zd<%O0kUA)emTG4t&|FrHH;_Wa0l`+?f%h~9fCIusTu=x1x+b{!;^qV=*#OaKCXW|{ZlsSaK4{mWEGff6vuk-48lfZi-q zr%0AbdP&wuXj#m+l0XwE^F1UFk~~Yomto9rk^F#Uf|C=eOd$$t9z)K&h2##Bvm{+w z_2D@ilPMA!c$2ux2c2z9QQ%P}&#az5wR0OK6w-Cxk4|-tR@YTjR#((kRd-Z`D=KSS kD;g_yRWwu#R6ka|qoSf>d&RbjGSoCwAE}tC*jC-}zk;9^f&c&j delta 5139 zcmZ`-d2pN65#J}-vSrJ*e8#fmOLiRjPVA)QFp!YM&S{)4G$zVngg+&bEK82`<#3P? zIVt4|JeUHdKw(;@E$u+V040=Zxq3jMFrA^LCxsR$@JFH3KU$dfw=3I8%aBMv?c063 z@9puux1XQ=wR-G6m9>9dQgp*FU*0uNGym-PU5Rh%`-;dU3IY|GHi{_<+Gc4x1?{tR z9tHDfX$M6MC|Ecm#6mKgg;+$UUx+@our<)a zDVio-XS+r}WV=m~US;>GYQ5Y3N8m+ym#FG9NAsRh_*vt4+ot@w-Fa5k>c2Smo(P_A8?(;j>bynZDZS2mZGSQ(nQ5+I|*lN0M$jq0j@SfpcItF zRQT{zfVXJ$F^8wE5}dU1m=QX#$SBrtuGy*^ON-Su-C26Aqn)R_^b@7Ant%;-gta30 zQlJ80lTx`r^s#dS8x?WwUU#e=t ztPiW+P^J2()vqe8@2Y75=joa|no-mOzW^^OtNO2A;CcU@Vq(9h+ldrLgHzq+zC6bmAVyRze zeZJxplbPK3o4#G02WKZvji)wC)IqFGCMDnz;{p~-4J4ELPt7Cx>iRnMpuV$yC3g5m z{Q-p?jx>6(m1tx4NGs1~2jk&Gp~I1sXqpMjI0asykfmG<<1yKR%rql=_G}A}TU=j< z#)rd)jMJcuTNrND?=|rfvY_7Lw>@nfD!lxNl8P8TT^hA?9#!4klukMJB_>O?r#DHUB6$Z|VN| zExkpcxa?Qso65&_@ObXDQ^rOaU>+Sl$Oi(Lc!1aJ+NV#>Z&OF~L-YGKp@h;n9f0{X zu-`|c$Q}+3#sX&JeE0B=jXcL~tez<(tB}(8;N-WH$IN5lahu9Gb+FCb4uPCDX1-|5 zT7H#AnRK$|9y`uFhrXw6RBu_Zq|=E_TMXg&fyr<(WQ8RulGjKo9*RpaLh)(iVg1O0 z%1+}h%r(Bsz_|>t^D+hHhpl0=AuBY(Px(Gw5m=~t^twQ*=Tit$H2|`e**vx~Ri+lJ zQdWuNw)j-Qxg87PP&CI5>f3wD+W7&IgiMA*@iF5BhQ{510Ef)@Im3$#FX>JN7t3JDVmsg^IgBBoHAsL+g)y^6Y6jM-qa&YVS*WKA3jg)Dk={mAXDOygr>CH+!&vGhcnrSrNLU~pdO%ze`giR^M)2b=$qiBt!u$khD zEYTS1vtQL7d7kOgO3xy?&wQC78VS)s+ljzI8<#w;7{w$c7P zduINu`}5f;^q%;fM9Wt)<0;fU81KR1h`Hzx5vQn^yQc{UC1118cvpA6cdywwn01u( z46S27eUygktm$)}Lutlym><~xnjOBfw4T@Ic(ln7>N%o}?SMZ3F+$`)D$|(D6n>g= zR1;YZGy2)g0!laM*v(?9QP3@j`8l5v8wbaR{YN4)0TDw_Qq2KiiI=J~?dK$O?xX_} z92CW8W@e+vBO>7z3z-f|=RWAPv5sdcIMb*o%<$m`f*ZJxGMz|w(_Fg$nTK>E)nKFf z9>V=Hw2reM0jrDBHH76oX>{fdh)RH%YNP>3nJR1ovkCR8hnDqayXo*ovRb%7M{90^ zK$RdbcoV=o0QS^Qdz~EKHM{(@6UR&sWqK*yP3ayiF|$k)$X?{?n*7l;Dh!K&V|L6f z*@+d~h2IH&7e;Vdbn)iFtnM5OiahGUs7(^YrkDfcRx3s4bM!wG;{x6uU)nR17eXEH zNVAoNg}6> z3IOXbMVFgM=5ExZENWr6n&BA3GYnCNbGO42Jd}$;x`_8Ml#6j=csggTk{U5%9rL-n zFvGF16&5lXo(hL7yg0j$Wj8Y1#E@aQnc*};uA64+NroRY^e{Zd@IwY^QSnseHWRk? zB_>0uiSuqR7dDb>2j1cNZ!^5b@GihByD_&Lk76M1Tp2Ql!w1m5?a()_>{6%nlPkaO zxB@m(-`1g39r^YFHDJ^Cuet#ZQtu_JZ1v)-M0%QSa51E;q~y!!)?Z%Jrq1d|)^s>_ zKwzZsx;deKuQ#vdx2^cv?>YLI-LL(Ff&NJ*;|vJ~u_^Cp4`3MJv=~Piaw<$M)0x5J z>V2(-{4m!#^rBn(1Bgi8ij9!|&(N1_|4{n6bzaQ)#=7TvB*$Dx*d-c()VUXtTpJrQ zjam*F&okEQudH8M`Vo`4F(mRj)xDv@_KDJ~Hq^uH*oN)%c()SFl9ej-D;ugE z8`$1Q`fnS8O*J6xXiidj5EGgQX?+VpJMl&9Yi_(&sd4?mrsZm$UaL%*6QEm{dY)JTf)i6G`NrNq!QgMZ#;s)HMDM@b^fI^62X5LVluh+Np_H zIFyVVN{>eS9q^Kj5q&yZqwM;@XoJerFGL%%(jrST7tfj;48>AmLmsPPR+5KTp6Ww9 zlt=pxrj9Xui{U84GYs718Gm4SkD(jx$qV!_+G=Addwe0oDu#`UD#+j*7`qulfB?y# zKz=t-zF`ms9LgpC%aqIJ#-AWuxi9D%yK9*%;C4HUT+Xs$SA(<4SyWc%taMg5tDWtx RjH}e?bQU@boO#Zw{{nP~)N=p; diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index 54436b3..9a3d07b 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -11,7 +11,7 @@ import runAnalysis as analysis analysis.actuallyRun('AML32','moving') -if True: +if False: analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') From d2d11d10845e55d6da264454059bbe739a458612 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 22 Nov 2019 11:16:21 -0500 Subject: [PATCH 22/25] Monika's speedy hampel filter. Visually inspected the output on real data. Looks good. I added some visualization stuff. --- prediction/dataHandler.py | 102 ++++++++++++++++---------------------- prediction/runAnalysis.py | 13 +++-- 2 files changed, 52 insertions(+), 63 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 3d36ecd..d939dec 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -398,11 +398,14 @@ def loadData(folder, dataPars, ew=1): G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] else: vps = dataPars['volumeAcquisitionRate'] + + #remove outliers with a sliding window of 40 volumes (or 6 seconds) + R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int)) + G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int)) + R = correctPhotobleaching(rRaw, vps) G = correctPhotobleaching(gRaw, vps) - R = nanOutliers_allNeurons(R,np.round(3.3*vps)) - G = nanOutliers_allNeurons(G,np.round(3.3*vps)) # @@ -728,71 +731,54 @@ def expfunc(x, a, b, c): -def nanOutliers_allNeurons(multiNeurons, window_size, n_sigmas=3): - """ - Run nanOutliers on many neurons - - Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. - - This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. - :return: - """ - assert multiNeurons.ndim == 2, '2D input data is expected' - assert multiNeurons.shape[1]>multiNeurons.shape[0], 'we expect more time points than neurons' - - new_multiNeurons=np.copy(multiNeurons) - - for i in np.arange(multiNeurons.shape[0]): - new_multiNeurons[i,:]=nanOutliers(multiNeurons[i,:],window_size,n_sigmas)[0] - return new_multiNeurons +from skimage.util import view_as_windows +def nanOutliers(data, window_size, n_sigmas=3): + """using a Hampel filter to detect outliers and replace them with nans. + This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. -def nanOutliers(input_series, window_size, n_sigmas=3): + The algorithm assumes the underlying series should be Gaussian. This could be changed by changing the k parameter. + Data can contain nans, these will be ignored for median calculation. + data: (M, N) array of M timeseries with N samples each. + windowsize: integer. This is half of the typical implementation. + n_sigmas: float or integer """ - Use a Hampel like filter to find outliers, but instead of replacing them with the median, replace them with NaN. - - This is based on Xiaowen Chen's MATLAB routine for preprocessing from her Phys Rev E 2019 paper. - - See: https://towardsdatascience.com/outlier-detection-with-hampel-filter-85ddf523c73d - - - - - - :param input_series: - :param windowSize: - :param n_sigmas=3: - :return: - """ - assert input_series.ndim == 1, 'input dimension greater than 1! nanOutliers can only deal with 1D data' - - window_size = np.int(np.round(window_size)) - - #The following is identical to here: https://gist.githubusercontent.com/erykml/d15525855f2ef455bd7969240f6f4073/raw/c17741cdef7f69cebd3ddae191ecdd7db95d4051/hampel_filter_forloop.py - #Except for using nanmedian instead of median, and replacing with nans instead of with teh median - n = len(input_series) - new_series = input_series.copy() - k = 1.4826 # scale factor for Gaussian distribution - - indices = [] - thresh=0.0 - - # possibly use np.nanmedian - for i in range((window_size), (n - window_size)): - x0 = np.nanmedian(input_series[(i - window_size):(i + window_size)]) - S0 = k * np.nanmedian(np.abs(input_series[(i - window_size):(i + window_size)] - x0)) - if np.any(np.abs(input_series[i] - x0) > n_sigmas * S0): - new_series[i] = np.nan #this line is different - indices.append(i) + # deal with N = 1 timeseries to conform to (M=1,N) shape + if len(data.shape)<2: + data = np.reshape(data, (1,-1)) + k = 1.4826 # scale factor for Gaussian distribution + M, N = data.shape # store data shape for quick use below + # pad array to achieve at least the same size as original + paddata = np.pad(data.copy(), pad_width= [(0,0),(window_size//2,window_size//2)], \ + constant_values=(np.median(data)), mode = 'constant') + # we use strides to create rolling windows. Not nice in memory, but good in performance. + # because its meant for images, it creates some empty axes of size 1. + tmpdata = view_as_windows(paddata, (1,window_size)) + # crop data to center + tmpdata = tmpdata[:M, :N] + x0N = np.nanmedian(tmpdata, axis = (-1, -2)) + s0N =k * np.nanmedian(np.abs(tmpdata - x0N[:,:,np.newaxis, np.newaxis]), axis = (-1,-2)) + # hampel condition + hampel = (np.abs(data-x0N) - (n_sigmas*s0N)) + indices = np.where(hampel>0) + # cast to float to allow nans in output data + newData = np.array(data, dtype=float) + newData[indices] = np.nan Debug = True if Debug: + chosen = np.squeeze( np.round(np.random.rand(1) * M-1).astype(int)) import matplotlib.pyplot as plt - plt.plot(input_series,'r') - plt.plot(new_series,'ob') + plt.cla() + plt.plot(data[chosen, :],'r') + plt.plot(newData[chosen, :],'b') + plt.title('nanOutlier(), neuron: ' +str(chosen)) plt.show() + return newData, indices + + + - return new_series, indices diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 467a212..6e5e7ea 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -47,12 +47,15 @@ def actuallyRun(typ='AML32', condition = 'moving'): #original data parameters - dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd - 'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data - 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix + dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd + 'gaussWindow':100, # gauss window for angle velocity derivative. Acts on full (50Hz) data + 'rotate':False, # rotate Eigenworms using previously calculated rotation matrix 'windowGCamp': 6, # gauss window for red and green channel - 'interpolateNans': 6,#interpolate gaps smaller than this of nan values in calcium data - } + 'interpolateNans': 6, #interpolate gaps smaller than this of nan values in calcium data + + 'volumeAcquisitionRate': 6., # rate at which volumes are acquired + + } dataSets = dh.loadMultipleDatasets(dataLog, pathTemplate=folder, dataPars = dataPars) From c49bf909669a682a85480d3922c71788d50b0f44 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 22 Nov 2019 13:47:25 -0500 Subject: [PATCH 23/25] Fixed algebra error in photobleaching correction Previously the photobleaching correction was failing by producing traces that blew up towards the latter half of the trace. The bug was an algebra error. To fit the exponent, at Francesco's suggestion, I rescaled the origional traces so that it was on the range of zero to one. This dramatically improves fitting performance and efficiency. So then I had to rescale the fitting parameters back. Previously I had been only scaling the a scaler in y=a exp(-bx)+ c. But of course c had been scaled to. So now I also rescale c. And that fixes everything. ADDITONAL: fixed a bug about using max instead of nanmax in the fitting routines. --- prediction/dataHandler.py | 57 +++++++++++++++++++++++------------ prediction/metaRunAnalysis.py | 2 +- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index d939dec..001e8f0 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -400,11 +400,11 @@ def loadData(folder, dataPars, ew=1): vps = dataPars['volumeAcquisitionRate'] #remove outliers with a sliding window of 40 volumes (or 6 seconds) - R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int)) - G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int)) + R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))[0] + G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))[0] - R = correctPhotobleaching(rRaw, vps) - G = correctPhotobleaching(gRaw, vps) + R = correctPhotobleaching(R, vps) + G = correctPhotobleaching(G, vps) @@ -604,16 +604,21 @@ def correctPhotobleaching(raw, vps=6): photoCorr = np.zeros_like(raw) # initialize photoCorr - showPlots = False + debug = False + + performMedfilt = True - if raw.ndim == 2: - # perform median filtering (Nan friendly) - from scipy.signal import medfilt - smoothed = medfilt(raw, [1, medfilt_window]) + if raw.ndim == 2: + if performMedfilt: + # perform median filtering (Nan friendly) + from scipy.signal import medfilt + smoothed = medfilt(raw, [1, medfilt_window]) + else: + smoothed = raw.copy() - N_neurons = smoothed.shape[0] + N_neurons = raw.shape[0] # initialize the exponential fit parameter outputs popt = np.zeros([N_neurons, 3]) @@ -624,13 +629,19 @@ def correctPhotobleaching(raw, vps=6): popt[row, :], pcov[:, :, row], xVals = fitPhotobleaching(smoothed[row, :], vps) photoCorr[row, :] = popt[row, 0] * raw[row, :] / expfunc(xVals, *popt[row, :]) + showPlots = False + if debug: + if np.random.rand() > 0.85: + showPlots = True + if showPlots: import matplotlib.pyplot as plt - plt.plot(xVals, smoothed[row, :], 'b-', label=['raw, row: '+np.str(row)]) + plt.plot(xVals, raw[row, :], 'b-', label=['raw, row: '+np.str(row)]) plt.plot(xVals, photoCorr[row, :], "r-", label=['photoCorr, row: '+np.str(row)]) plt.xlabel('Time (s)') plt.ylabel('ActivityTrace') + plt.title('correctPhotobleaching()') plt.legend() plt.show() @@ -670,32 +681,36 @@ def fitPhotobleaching(activityTrace, vps): from scipy.optimize import curve_fit # set up some bounds on our exponential fitting parameter, y=a*exp(bx)+c - num_recordingLengths = 6 # the timescale of exponential decay should not be more than six recording lengths long + num_recordingLengths = 8 # the timescale of exponential decay should not be more than eight recording lengths long # because otherwise we could have recorded for way longer!! #Scale the activity trace to somethign around one. scaleFactor=np.nanmean(activityTrace) activityTrace=activityTrace/scaleFactor - bounds = ([0, 1 / (num_recordingLengths * max(xVals)), 0], # lower bounds - [max(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)]) # upper bound + bounds = ([0, 1 / (num_recordingLengths * np.nanmax(xVals)), 0], # lower bounds + [np.nanmax(activityTrace[nonNaNs])*1.5, 0.5, 2 * np.nanmean(activityTrace)]) # upper bound # as a first guess, a is half the max, b is 1/(half the length of the recording), and c is the average - popt_guess = [max(activityTrace) / 2, 2 / max(xVals), np.nanmean(activityTrace)] + popt_guess = [np.nanmax(activityTrace) / 2, 2 / np.nanmax(xVals), np.nanmean(activityTrace)] popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess, bounds=bounds) - showPlots = False + if np.random.rand()>0.9: + showPlots = True + else: + showPlots = False + if showPlots: import matplotlib.pyplot as plt + plt.cla() plt.plot(xVals, activityTrace, 'b-', label='data') plt.plot(xVals, expfunc(xVals, *popt), "r-", label='fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) plt.xlabel('Time (s)') plt.ylabel('ActivityTrace') plt.legend() - plt.show() ## Now we want to inspect our fit, find values that are clear outliers, and refit while excluding those residual = activityTrace - expfunc(xVals, *popt) # its ok to have nan's here @@ -715,12 +730,16 @@ def fitPhotobleaching(activityTrace, vps): popt, pcov = curve_fit(expfunc, xVals[excOutliers], activityTrace[excOutliers], p0=popt) if showPlots: + import matplotlib.pyplot as plt plt.plot(xVals, expfunc(xVals, *popt), 'y-', label='excluding outliers fit a*exp(-b*x)+c: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt)) plt.legend() - #rescale back to full size + #rescale the amplitude, a, back to full size popt[0]=scaleFactor*popt[0] + + #rescale the c parameter + popt[2]=scaleFactor*popt[2] return popt, pcov, xVals @@ -767,7 +786,7 @@ def nanOutliers(data, window_size, n_sigmas=3): newData = np.array(data, dtype=float) newData[indices] = np.nan - Debug = True + Debug = False if Debug: chosen = np.squeeze( np.round(np.random.rand(1) * M-1).astype(int)) import matplotlib.pyplot as plt diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index 9a3d07b..54436b3 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -11,7 +11,7 @@ import runAnalysis as analysis analysis.actuallyRun('AML32','moving') -if False: +if True: analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') From 353b98aa9cc10e56ec8a25153408a7a939c8f525 Mon Sep 17 00:00:00 2001 From: Andrew Leifer Date: Fri, 22 Nov 2019 15:21:26 -0500 Subject: [PATCH 24/25] Debugging: python photobleaching and nan-ing aversely affects R^2 moving photobleaching correiton and outlier detectiong and naning from Jeff's matlab code to python appears to decrease velocity decoding performance of our marquee dataset in fig 2v3. (but strangely decoding performance of turning is preserved). In previous commits, photobleaching correciton had been done according to Xiaowen's method in the Phys Rev E paper (division not subtraction). Visual inspection shows no obvious difference between the old matlab based photoblecahign correciotn and the new division approach in python. If anything, the new approach does a better job preserving variance across time. Remains unresolved why performance suffers. One hypothesis: maybe I am currently being less agressive about nan'ing entire columns then Jeff's matlab pipeline? Need to check. --- prediction/dataHandler.py | 41 +++++++++++++++++++++++++++++------ prediction/metaRunAnalysis.py | 2 +- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/prediction/dataHandler.py b/prediction/dataHandler.py index 001e8f0..0991f45 100755 --- a/prediction/dataHandler.py +++ b/prediction/dataHandler.py @@ -390,22 +390,49 @@ def loadData(folder, dataPars, ew=1): rRaw=np.array(data['rRaw'])[:,:len(np.array(data['hasPointsTime']))] gRaw=np.array(data['gRaw'])[:,:len(np.array(data['hasPointsTime']))] + rphotocorr = np.array(data['rPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))] + gphotocorr = np.array(data['gPhotoCorr'])[:, :len(np.array(data['hasPointsTime']))] #load neural data - original=False + debug = True + original = False if original: - R = np.array(data['rPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] - G = np.array(data['gPhotoCorr'])[:,:len(np.array(data['hasPointsTime']))] + R = np.copy(rphotocorr) + G = np.copy(gphotocorr) else: + R = rRaw.copy() + G = gRaw.copy() vps = dataPars['volumeAcquisitionRate'] #remove outliers with a sliding window of 40 volumes (or 6 seconds) - R = nanOutliers(rRaw, np.round(2*3.3*vps).astype(int))[0] - G = nanOutliers(gRaw, np.round(2*3.3*vps).astype(int))[0] + R = nanOutliers(R, np.round(2*3.3*vps).astype(int))[0] + G = nanOutliers(G, np.round(2*3.3*vps).astype(int))[0] R = correctPhotobleaching(R, vps) G = correctPhotobleaching(G, vps) + if debug: + + nplots=2 + chosen = np.round(np.random.rand(nplots)*R.shape[0]).astype(int) + import matplotlib.pyplot as plt + plt.cla() + plt.subplot(nplots, 1, 1,) + plt.plot(G[chosen[0],:],'g', label='gRaw w/ python photocorrection') + plt.plot(gphotocorr[chosen[0],:],'b', label='gPhotoCorr via matlab') + plt.title('Comparsion of Green Channel photocorrection methods for neuron ' + str(chosen[0])) + plt.legend() + + plt.subplot(nplots,1,2) + plt.plot(R[chosen[0], :], 'r', label='rRaw w/ python photocorrection') + plt.plot(rphotocorr[chosen[0], :], 'm', label='rPhotoCorr via matlab') + plt.title('Comparsion of Red Channel photocorrection methods for neuron ' + str(chosen[0])) + plt.legend() + plt.show() + + + + # @@ -696,8 +723,8 @@ def fitPhotobleaching(activityTrace, vps): popt, pcov = curve_fit(expfunc, xVals[nonNaNs], activityTrace[nonNaNs], p0=popt_guess, bounds=bounds) - - if np.random.rand()>0.9: + debug = False + if np.logical_and(np.random.rand() > 0.9, debug): showPlots = True else: showPlots = False diff --git a/prediction/metaRunAnalysis.py b/prediction/metaRunAnalysis.py index 54436b3..9a3d07b 100644 --- a/prediction/metaRunAnalysis.py +++ b/prediction/metaRunAnalysis.py @@ -11,7 +11,7 @@ import runAnalysis as analysis analysis.actuallyRun('AML32','moving') -if True: +if False: analysis.actuallyRun('AML18','moving') analysis.actuallyRun('AML70','moving') analysis.actuallyRun('AML175','moving') From eea8b8277d1dcaad247468c3a468a4d406e1aaf1 Mon Sep 17 00:00:00 2001 From: Monika Scholz Date: Fri, 13 Dec 2019 09:30:49 +0100 Subject: [PATCH 25/25] add automatic detection of user and modify all imports to conform --- figures/main/fig1_figManifold.py | 13 ++++++++----- figures/main/fig1_figManifoldMoving.py | 10 ++++++---- figures/main/fig1v3.py | 10 ++++++---- figures/main/fig2_expVarslm.py | 11 +++++++---- figures/main/fig2v3.py | 11 +++++++---- figures/main/fig3v3.py | 11 ++++++----- figures/main/fig4v3.py | 11 ++++++----- figures/supp/S1.py | 12 +++++++----- figures/supp/S10.py | 13 +++++++------ figures/supp/S2.py | 10 ++++++---- figures/supp/S3.py | 10 ++++++---- figures/supp/S4.py | 10 ++++++---- figures/supp/S5.py | 11 +++++++---- figures/supp/S6.py | 11 +++++++---- figures/supp/S7_nStability.py | 11 +++++++---- figures/supp/S8_lagplots.py | 11 +++++++---- figures/supp/S9_fitTransition.py | 10 ++++++---- prediction/dataQualityCheck.py | 11 +++++++---- prediction/plotAnalysis.py | 12 +++++++----- prediction/print Metadata.py | 13 +++++++------ prediction/runAnalysis.py | 11 +++++++---- prediction/sandbox.py | 13 ++++++++++++- prediction/userTracker.py | 15 +++++++++++++++ 23 files changed, 167 insertions(+), 94 deletions(-) create mode 100644 prediction/userTracker.py diff --git a/figures/main/fig1_figManifold.py b/figures/main/fig1_figManifold.py index 2dd28c5..a3b35f8 100644 --- a/figures/main/fig1_figManifold.py +++ b/figures/main/fig1_figManifold.py @@ -8,10 +8,12 @@ import numpy as np import matplotlib as mpl from sklearn.feature_selection import mutual_info_classif +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +31,11 @@ data = {} for typ in ['AML18', 'AML32']: for condition in ['immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) @@ -140,4 +143,4 @@ #ax.view_init(elev=-40, azim=90) -plt.show() \ No newline at end of file +plt.show() diff --git a/figures/main/fig1_figManifoldMoving.py b/figures/main/fig1_figManifoldMoving.py index 3f00fe1..e96bd02 100644 --- a/figures/main/fig1_figManifoldMoving.py +++ b/figures/main/fig1_figManifoldMoving.py @@ -12,6 +12,7 @@ import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +30,11 @@ data = {} for typ in ['AML18', 'AML32']: for condition in ['moving']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/main/fig1v3.py b/figures/main/fig1v3.py index c30241b..87ee010 100644 --- a/figures/main/fig1v3.py +++ b/figures/main/fig1v3.py @@ -26,6 +26,7 @@ #import singlePanels as sp #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # suddenly this isn't imported from stylesheet anymore... mpl.rcParams["axes.labelsize"] = 14 @@ -42,10 +43,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig2_expVarslm.py b/figures/main/fig2_expVarslm.py index c42b83f..a323c87 100644 --- a/figures/main/fig2_expVarslm.py +++ b/figures/main/fig2_expVarslm.py @@ -7,6 +7,7 @@ """ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -14,6 +15,7 @@ # #import makePlots as mp from prediction import dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML175', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig2v3.py b/figures/main/fig2v3.py index e01ff3a..d921021 100755 --- a/figures/main/fig2v3.py +++ b/figures/main/fig2v3.py @@ -7,6 +7,7 @@ """ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -14,6 +15,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -33,10 +35,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML175', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig3v3.py b/figures/main/fig3v3.py index 3b9a042..2c08550 100755 --- a/figures/main/fig3v3.py +++ b/figures/main/fig3v3.py @@ -16,7 +16,7 @@ #from matplotlib_venn import venn2 from scipy.cluster.hierarchy import dendrogram, linkage, fcluster, set_link_color_palette import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * from prediction.pycpd import deformable_registration, rigid_registration @@ -54,10 +54,11 @@ def registerWorms(R, X, dim=3, nr = True): data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/main/fig4v3.py b/figures/main/fig4v3.py index c23e6a5..5a8509d 100755 --- a/figures/main/fig4v3.py +++ b/figures/main/fig4v3.py @@ -22,7 +22,7 @@ # import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * # stats from sklearn.metrics import r2_score,mean_squared_error @@ -93,10 +93,11 @@ def main(): data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S1.py b/figures/supp/S1.py index c97a8e7..a5ea76c 100755 --- a/figures/supp/S1.py +++ b/figures/supp/S1.py @@ -17,6 +17,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -34,11 +35,12 @@ data = {} for typ in ['AML18', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) - + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) + try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S10.py b/figures/supp/S10.py index 76e077f..303db84 100755 --- a/figures/supp/S10.py +++ b/figures/supp/S10.py @@ -23,7 +23,7 @@ #import svgutils as svg # import prediction.dataHandler as dh - +from prediction import userTracker from prediction.stylesheet import * from prediction.pycpd import deformable_registration, rigid_registration @@ -59,11 +59,12 @@ def registerWorms(Xref, X, dim=3): data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) - + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) + try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S2.py b/figures/supp/S2.py index f020944..3d4f6a2 100755 --- a/figures/supp/S2.py +++ b/figures/supp/S2.py @@ -16,6 +16,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -33,10 +34,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175', 'Special']: for condition in ['chip', 'moving', 'immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S3.py b/figures/supp/S3.py index 5107074..71f0569 100755 --- a/figures/supp/S3.py +++ b/figures/supp/S3.py @@ -17,6 +17,7 @@ from scipy.ndimage.filters import gaussian_filter1d import matplotlib.ticker as mtick import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -35,10 +36,11 @@ data = {} for typ in ['AML32', 'Special']: for condition in ['immobilized', 'transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S4.py b/figures/supp/S4.py index abfc9b2..f4cdac7 100755 --- a/figures/supp/S4.py +++ b/figures/supp/S4.py @@ -16,6 +16,7 @@ import matplotlib.ticker as mtick from scipy.stats import ttest_ind from prediction import dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -65,10 +66,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in [ 'chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) - dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S5.py b/figures/supp/S5.py index afbcd3f..e57fd7f 100755 --- a/figures/supp/S5.py +++ b/figures/supp/S5.py @@ -8,10 +8,12 @@ import numpy as np import matplotlib as mpl from sklearn.feature_selection import mutual_info_classif +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * # suddenly this isn't imported from stylesheet anymore... @@ -29,10 +31,11 @@ data = {} for typ in ['AML32', 'AML18', 'AML70', 'AML175']: for condition in ['chip', 'moving', 'immobilized']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S6.py b/figures/supp/S6.py index a82bf4d..520297f 100644 --- a/figures/supp/S6.py +++ b/figures/supp/S6.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S7_nStability.py b/figures/supp/S7_nStability.py index 437deb9..242b9e6 100644 --- a/figures/supp/S7_nStability.py +++ b/figures/supp/S7_nStability.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * @@ -76,10 +78,11 @@ def findBounds(x, y, ybound): data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/figures/supp/S8_lagplots.py b/figures/supp/S8_lagplots.py index 9d107e5..d18f336 100644 --- a/figures/supp/S8_lagplots.py +++ b/figures/supp/S8_lagplots.py @@ -8,6 +8,7 @@ import numpy as np import matplotlib as mpl +import os # import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec @@ -15,6 +16,7 @@ # #import makePlots as mp import prediction.dataHandler as dh +from prediction import userTracker # deliberate import all! from prediction.stylesheet import * from scipy.stats import pearsonr @@ -34,10 +36,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'chip']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets dataSets = dh.loadDictFromHDF(outLocData) diff --git a/figures/supp/S9_fitTransition.py b/figures/supp/S9_fitTransition.py index ee76056..864d18c 100644 --- a/figures/supp/S9_fitTransition.py +++ b/figures/supp/S9_fitTransition.py @@ -17,6 +17,7 @@ import matplotlib.gridspec as gridspec from scipy.ndimage.filters import gaussian_filter1d import prediction.dataHandler as dh +from prediction import userTracker from sklearn import linear_model from sklearn import preprocessing @@ -35,10 +36,11 @@ data = {} for typ in ['Special']: for condition in ['transition']:# ['moving', 'immobilized', 'chip']: - folder = '../../{}_{}/'.format(typ, condition) - dataLog = '../../{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "../../Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "../../Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) try: # load multiple datasets diff --git a/prediction/dataQualityCheck.py b/prediction/dataQualityCheck.py index 88762b5..e4626c7 100755 --- a/prediction/dataQualityCheck.py +++ b/prediction/dataQualityCheck.py @@ -4,10 +4,12 @@ import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr +from prediction import userTracker # mpl.rcParams['interactive'] = False ############################################### @@ -23,10 +25,11 @@ # load data into dictionary # ############################################## -folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) -dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) -outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) -outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) +path = userTracker.dataPath() +folder = os.path.join(path, '{}_{}/'.format(typ, condition)) +dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) +outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) +outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd diff --git a/prediction/plotAnalysis.py b/prediction/plotAnalysis.py index 721afed..ba6ed7b 100755 --- a/prediction/plotAnalysis.py +++ b/prediction/plotAnalysis.py @@ -3,11 +3,12 @@ import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr - +from prediction import userTracker ############################################### @@ -26,10 +27,11 @@ data = {} for typ in ['AML32', 'AML70']: for condition in ['moving', 'immobilized']: - folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) - dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) # load multiple datasets diff --git a/prediction/print Metadata.py b/prediction/print Metadata.py index ef5d33c..c9dc415 100755 --- a/prediction/print Metadata.py +++ b/prediction/print Metadata.py @@ -7,8 +7,8 @@ import dataHandler as dh import numpy as np import pprint - - +import os +from prediction import userTracker ################################################ # # grab all the data we will need @@ -50,10 +50,11 @@ def dump_keys(d, lvl=0): for typ in ['Special', 'AML32', 'AML18', 'AML175', 'AML70']: for condition in ['transition','immobilized','moving', 'chip']: - folder = '{}_{}/'.format(typ, condition) - dataLog = '{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) #print strains[typ], conditions[condition] try: # load multiple datasets diff --git a/prediction/runAnalysis.py b/prediction/runAnalysis.py index 6e5e7ea..bc92b08 100755 --- a/prediction/runAnalysis.py +++ b/prediction/runAnalysis.py @@ -4,10 +4,12 @@ import numpy as np import matplotlib.pylab as plt import h5py +import os # custom modules import dataHandler as dh import makePlots as mp import dimReduction as dr +from prediction import userTracker ############################################### # @@ -31,10 +33,11 @@ def actuallyRun(typ='AML32', condition = 'moving'): # load data into dictionary # ############################################## - folder = '/Users/leifer/workspace/PredictionCode/{}_{}/'.format(typ, condition) - dataLog = '/Users/leifer/workspace/PredictionCode/{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition) - outLoc = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}_results.hdf5".format(typ, condition) - outLocData = "/Users/leifer/workspace/PredictionCode/Analysis/{}_{}.hdf5".format(typ, condition) + path = userTracker.dataPath() + folder = os.path.join(path, '{}_{}/'.format(typ, condition)) + dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) + outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) + outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) # data parameters dataPars = {'medianWindow':50, # smooth eigenworms with gauss filter of that size, must be odd diff --git a/prediction/sandbox.py b/prediction/sandbox.py index 3d10a07..9fbda2b 100755 --- a/prediction/sandbox.py +++ b/prediction/sandbox.py @@ -6,6 +6,17 @@ @author: mscholz """ +import os +from prediction import userTracker +path = userTracker.dataPath() +print path +folder = os.path.join(path, '{}_{}/'.format(typ, condition)) +dataLog = os.path.join(path,'{0}_{1}/{0}_{1}_datasets.txt'.format(typ, condition)) +outLoc = os.path.join(path, 'Analysis/{}_{}_results.hdf5'.format(typ, condition)) +outLocData = os.path.join(path,'/Analysis/{}_{}.hdf5'.format(typ, condition)) +print folder + + import numpy as np #import fourier_vec as fourier import matplotlib.pyplot as plt @@ -795,4 +806,4 @@ def loadAtlas(dim3=False): ## ## projection on behavioral axes ## -###################################### \ No newline at end of file +###################################### diff --git a/prediction/userTracker.py b/prediction/userTracker.py new file mode 100644 index 0000000..b7cb238 --- /dev/null +++ b/prediction/userTracker.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +"""detect which user is using the machine and change the path to the data accordingly.""" +import socket + +def dataPath(): + """hardcoded or automatic detection of the users' data path.""" + host = socket.gethostname() + if host== 'subsitute for andys computer name': + path = '/Users/leifer/workspace/PredictionCode/' + elif host=='nif3004': + path = '/media/scholz_la/hd2/Data/Data' + else: + raise Exception('This is not a known host. Edit util/userTracker.py to add your hostname and data paths.') + return path \ No newline at end of file