From d83ec18cf84ef22a94d31e1b6e7942969ea8d3d1 Mon Sep 17 00:00:00 2001 From: Pranjalya Date: Fri, 12 Jun 2020 22:29:21 +0530 Subject: [PATCH 1/7] Extracted some hidden files --- models/cross-dimensional-attention | 1 + 1 file changed, 1 insertion(+) create mode 160000 models/cross-dimensional-attention diff --git a/models/cross-dimensional-attention b/models/cross-dimensional-attention new file mode 160000 index 0000000..24532dd --- /dev/null +++ b/models/cross-dimensional-attention @@ -0,0 +1 @@ +Subproject commit 24532dd9470c9ff280ddd8f3443bbed4a9674a35 From e5ee13edd83aa425ffb7daf35990f9fe7d53ef01 Mon Sep 17 00:00:00 2001 From: Pranjalya Date: Fri, 19 Jun 2020 18:41:07 +0530 Subject: [PATCH 2/7] Folder shelved --- models/cross-dimensional-attention | 1 - .../tensorflow/Base_Handler.py | 82 ++++ .../tensorflow/Base_Handler.pyc | Bin 0 -> 4488 bytes .../tensorflow/Base_TFModel.py | 36 ++ .../tensorflow/Base_TFModel.pyc | Bin 0 -> 1810 bytes .../tensorflow/BurstLoss_Generation.ipynb | 187 +++++++++ .../tensorflow/Config.yaml | 55 +++ .../tensorflow/MultiDim_Analyzer_Handler.py | 166 ++++++++ .../tensorflow/MultiDim_Analyzer_Handler.pyc | Bin 0 -> 7437 bytes .../tensorflow/MultiDim_Analyzer_Model.py | 362 ++++++++++++++++++ .../tensorflow/MultiDim_Analyzer_Model.pyc | Bin 0 -> 15586 bytes .../tensorflow/__init__.py | 7 + .../tensorflow/__init__.pyc | Bin 0 -> 137 bytes 13 files changed, 895 insertions(+), 1 deletion(-) delete mode 160000 models/cross-dimensional-attention create mode 100644 models/cross-dimensional-attention/tensorflow/Base_Handler.py create mode 100644 models/cross-dimensional-attention/tensorflow/Base_Handler.pyc create mode 100644 models/cross-dimensional-attention/tensorflow/Base_TFModel.py create mode 100644 models/cross-dimensional-attention/tensorflow/Base_TFModel.pyc create mode 100644 models/cross-dimensional-attention/tensorflow/BurstLoss_Generation.ipynb create mode 100644 models/cross-dimensional-attention/tensorflow/Config.yaml create mode 100644 models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py create mode 100644 models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc create mode 100644 models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py create mode 100644 models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc create mode 100644 models/cross-dimensional-attention/tensorflow/__init__.py create mode 100644 models/cross-dimensional-attention/tensorflow/__init__.pyc diff --git a/models/cross-dimensional-attention b/models/cross-dimensional-attention deleted file mode 160000 index 24532dd..0000000 --- a/models/cross-dimensional-attention +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 24532dd9470c9ff280ddd8f3443bbed4a9674a35 diff --git a/models/cross-dimensional-attention/tensorflow/Base_Handler.py b/models/cross-dimensional-attention/tensorflow/Base_Handler.py new file mode 100644 index 0000000..f9062a2 --- /dev/null +++ b/models/cross-dimensional-attention/tensorflow/Base_Handler.py @@ -0,0 +1,82 @@ +# uncompyle6 version 3.7.0 +# Python bytecode 2.7 (62211) +# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) +# [GCC 8.4.0] +# Embedded file name: /home/jiawei/Tensor_MultiDim_NYC/Model/Base_Handler.py +# Compiled at: 2019-04-29 02:51:25 +import tensorflow as tf, numpy as np, yaml, os, h5py, time, sys, math +from Lib.Model_Visualization import * + +class Basement_Handler(object): + + def __init__(self, sess, model_config, is_training): + self.sess = sess + self.model_config = model_config + self.max_grad_norm = float(model_config.get('max_grad_norm', 5.0)) + self.init_logging(is_training) + self.logger.info(model_config) + + def init_logging(self, is_training): + if is_training is not True: + base_dir = self.model_config.get('result_data') + folder_dir = self.model_config.get('category') + '_for_' + self.model_config.get('data_name') + else: + base_dir = self.model_config.get('result_model') + folder_dir = generate_folder_id(self.model_config) + log_dir = os.path.join(self.model_config.get('result_dir'), base_dir, folder_dir) + if not os.path.exists(log_dir): + os.makedirs(log_dir) + self.log_dir = log_dir + self.logger = get_logger(self.log_dir, folder_dir) + self.writer = tf.summary.FileWriter(self.log_dir) + + def trainable_parameter_info(self): + total_parameters = 0 + for variable in tf.trainable_variables(): + total_parameters += np.product([ x.value for x in variable.get_shape() ]) + + self.logger.info('Total number of trainable parameters: %d' % total_parameters) + for var in tf.global_variables(): + self.logger.debug('%s, %s' % (var.name, var.get_shape())) + + def summary_logging(self, global_step, names, values): + for name, value in zip(names, values): + summary = tf.Summary() + summary_value = summary.value.add() + summary_value.simple_value = value + summary_value.tag = name + self.writer.add_summary(summary, global_step) + + def save_model(self, saver, epoch, val_loss): + config_filename = 'config_%02d.yaml' % epoch + config = dict(self.model_config) + global_step = self.sess.run(tf.train.get_or_create_global_step()) + config['epoch'] = epoch + config['global_step'] = global_step + config['log_dir'] = self.log_dir + config['model_filename'] = saver.save(self.sess, os.path.join(self.log_dir, 'models-%.4f' % val_loss), global_step=global_step, write_meta_graph=False) + with open(os.path.join(self.log_dir, config_filename), 'w') as (f): + yaml.dump(config, f) + return config['model_filename'] + + def restore(self): + config = dict(self.model_config) + self.pred_step = config['global_step'] + model_filename = config['model_filename'] + saver = tf.train.Saver(tf.global_variables()) + saver.restore(self.sess, model_filename) + + def train_test_valid_assignment(self): + pass + + def initial_parameter(self): + pass + + def data_assignment(self): + pass + + def train(self): + pass + + def test(self): + pass \ No newline at end of file diff --git a/models/cross-dimensional-attention/tensorflow/Base_Handler.pyc b/models/cross-dimensional-attention/tensorflow/Base_Handler.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cb17850c03099e2d27dcf366153da4bf0277c3a GIT binary patch literal 4488 zcmc&%-EJH;6!y#}o9rebr9Wv)K?Ffyiy%Qjsi-Pc(efkI25<^(SAaFy@or{2JF^{o z+XOXN<(@a-4d5|&0G@(-9ss`Yc(xn3B~p`&bL_E?kI(m=b3Bc|R@%SzfB&wh>YoPw zU&G^mLX+YzQn^yyXe!65+nCC8syjE8=T&!pDmPWPIh7YwcVQ|os_x=cZmDid(}CraJxE_3sp&O8js{>IOCCxYnNyy;J%{Wx)UWD9RTPl`0R)zd?8 z%Ex%zeKdAzKJ<2D>ZE`}#3JAX4H00z>`mL}Wdw#dMS#@{BEaND5!xzkiLj*7wg}5g z?{t>1UvCc0^tnzew$VI})MKR#gjj?~c?hu@!VsJ~R!{I3=FD-za9%xba3o@xj6vDLaSI~}}Ju@z2u=XPxuQ5JqKcbJHt}em3xQYCkU% z@2P2Ey^ir;=4D0ZO7!puVsjb&z$j90u{xegRYXs zeE?}|uN>K}VU|3y+171axUw?0Cb`dUWFxbE@A}p)+P6har+99T4?NFdjM!v&T-QdU z8m*`mU5Hkq9?j-qXn{EUPY7_fSL%LCRiBA3BQ+G@;l4w0|1gY*LjnI;`Zx^D(Z^>G z8>)Ea$kKU^Vkx{8rn1|h7l0GcnXPYfCNTzh4Rts-odftpwfjv*7VBxQB0M;afKZ#~w3tM3W=*?4GnJfjFl`5Sq7$<&D zgkhN#!XUe!Iq$@-QF7m=S>-%U2eS-1vNbp;th1FS=AzK|Wty9eMhT8W3qQ?rd#B1g z2uX7ui6}Wj^R=cgWTRaYX{x-o6J7lRCI~B~`5)BOJi!_s$3{iV(aGpyd@@>%UPF5# zZbfIJ9y1Z?Nn3@Uzd%61A4Y2bI!OxJJyGh$_m_m3k`<5=5Xa07U<}`_l4~Foxb{P= z9&C|fm!aXoDRqa;lk9*L0@|7TG8K+!>R`(bw^0W zP}PIVl42Et1_MRyzrAOaC^zv3U41KIk-nUXp&cw{va=Rll+J#x;M4E zlYwxK+@8@vR(Kx3%#0O!Of8XevM*T>L&ri4f9_vre!4NTSigK7?;O*yiki`>=)L$f zS}7Sk=F=<&Ak_Z%gt5*gF?euOeHRnV!+BOgq%^SO5XlGlgB$|pA@U3=$AU6w0Xu|{ zofgy(IR`TM7OUvNi5hGYbh{(oZ9x1n8wcD-?wt^Knl_7vk~EdP;j+Z3m0LNy>gU>2-*9J}t z-4=%eEH5IGS5aPCGg#=1>zwfi8bmKJSz~GkIw67)(C{~8Dd-?#C=0*`@kodeoVZ`a z>M9|C)PRKPf>t_*4)6p%q5*+`_#7@t7k3c339JD%#x5==P1b}baZEi6>MFeSpA`U} zvl0KX`uVH^C;W42xLPxV@H5cAz8e&r-x2t?9W#Eo9+-`H-c2_Tl2I-&*>Tz1)0gmC zvZo`+QZwA!8=LR0`bgPQa>%tuv%->QJs%1SnI}A|CPknh zgIdytKM#pn0TrxQSr(lmEE0Pp?p^ZG>NA+DnT_>XuBVbuliXREQI2s{WPr)JD5R6o z_y|KeooXt51yY?QaxerJC{m{O&C}yB--R*pjbxJk@i3I7jHVcI^JVrzk!gdP7os+P zSE3Fw$7|7AC^9?`-6%Bu2ExC{3CiM<8L9JNn2|yOE}jn;8YB*|74AnJsRB{bc@6Yh@jDty{+^nY$b{A&g|&0VoK&Ra?3YHF3pB{WP*xqG;5sPT=gcU5804(5+wR7N?)^~gXNTZ z2hxoDBK@CRGee6;NJ48ytyb#wLM<%_7Rh4LO(P`Bo$;hD6HhdDD5<(@ zzk+|@H~1kw0PeYYvOq#=rn$G*_I;fDu)|;5n?JIjpA#|tLi)c?byGV6Dk5j=Rkvkx! zDFQhoPQC-U?1cydf&A7u4n-z`epPWp^14XZSYboN-R3%6eI$0IGh=n6Y#M#C#v7HJ zOC9+$9i1v?l9ttpc21S!Nau_!DzZpLURzg`SzcU31K^C3HF5?*LWe}|JF%-kTnQmb zsIWlGDhsr%h;sr>X;j4Z z!a13ZY~qo0-0r^jsOxBWT8wOJ0+O3|i1D$bGWr^=lEP-D@3%-aPWnih+kc8tbVLoP`;T2GWm*x(TZ( zraX%QIYyiq>BdN`Ni<>ZHA$_rVxU{+MqOyrI_A#H_9*w}$P8rX~OnKmQE9#VK$*2KOpAs6SMk4 z{#BXPWWcDkmwEYwf;hw!3$y^kXi@DYkkDGfhg0YEa5a$b!khZ4WpbvIFT;Ww{0v)+ ztvavd|6z+C0xHq%n+;qvi0Klp=d?Ae8Su>LpEZL5_jEOoFc_FVdeh`kmAK!!e6ax4 z;WuD9rnvr6@&h0xy$GWmOWX}9(;|^F-T}HjFkIdE5!hoe%*FySOSiZ-F?r)h)ZyON zf?81DsfUePBb?k@G`esMbnys#?D~w~m*VtXC!XD`4i#7F&7>}G(I0):\n", + " if mask_time[cam,time] > 1:\n", + " length = int(mask_time[cam,time]-1)\n", + " mask_time[cam,time-length:time] = mask_time[cam,time]\n", + " mask_delta[cam,time-length:time] = 0\n", + " time -= length\n", + " time -= 1\n", + "delta_seq = mask_delta[mask_delta != 0]\n", + "delta_seq = np.sort(delta_seq)\n", + "delta_burst = delta_seq[delta_seq<=134]\n", + "print delta_burst.shape\n", + "delta_burst = delta_burst[delta_burst > 1]\n", + "print delta_burst.shape\n", + "print delta_burst.mean(),delta_burst.std(),mask_miss.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "missing rate: 0.9009605050086975\n" + ] + } + ], + "source": [ + "burst_mean,burst_std,mask_burst,ratio = delta_burst.mean(),delta_burst.std(),np.zeros_like(mask_miss),0.618\n", + "for cam in range(186):\n", + " time = 0\n", + " while(time<6624):\n", + " if np.random.uniform(size=1) < ratio:\n", + " length = int(np.random.normal(burst_mean, burst_std, 1))\n", + " if length > 0:\n", + " mask_burst[cam,time:time+length] = 1.0\n", + " time += length\n", + " else:\n", + " time += 1\n", + " else:\n", + " time += 1\n", + "move = mask_burst+mask_miss\n", + "move[move>1] = 1\n", + "move = 1.0-move\n", + "for i in range(6624-48*12+1):\n", + " if move[:,i:i+48*12].mean() == 0:\n", + " print i\n", + "print 'missing rate:', 1.0-move.mean()\n", + "\n", + "combine_file = h5py.File('burst_90.h5','w')\n", + "combine_file['mask'] = move\n", + "combine_file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7999438 1.0 0.0\n", + "[]\n" + ] + } + ], + "source": [ + "combine_file = h5py.File('burst_20.h5','r')\n", + "move = combine_file['mask'][:]\n", + "combine_file.close()\n", + "print move.mean(),move.max(),move.min()\n", + "for i in range(6624-48*12+1):\n", + " if move[:,i:i+48*12].mean() == 0:\n", + " print i\n", + "temp = move[move!=0]\n", + "\n", + "print temp[temp!=1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/cross-dimensional-attention/tensorflow/Config.yaml b/models/cross-dimensional-attention/tensorflow/Config.yaml new file mode 100644 index 0000000..c01e7b5 --- /dev/null +++ b/models/cross-dimensional-attention/tensorflow/Config.yaml @@ -0,0 +1,55 @@ +--- +# === Configure Main function === # +category: Traffic-NYC +data_name: data_mask_maximum +mask_name: burst_70 +meas_index: 1 +model_name: MDAnalyzer +result_dir: Result +result_model: Model-Config +result_data: Validation-Result +GPU: 1 +# === Configure Model Handler === # +# Data Assignment +batch_size: 19 +period_enc: 432 +period_dec: 432 + +# Model training Controler +epoch_iter: 600 +epochs: 100 +patience: 50 +learning_rate: 0.001 +lr_decay: 0.3 +lr_decay_epoch: 20 +lr_decay_interval: 15 +loss_func: RMSE +upbound: 1 +max_grad_norm: 5.0 + +# === Configure in MDAnalyzer Model pipeline === # +# Frame: Sequence Element-wise-addition Concatenation Dimension-reduce Independent +model_structure: Sequence +num_enclayer: 7 +num_declayer: 7 +num_heads: 12 +units_IDw: 14 +units_Timew: 6 +# Query, Key and Value +time_stride_AM: 1 +time_stride_V: 1 +time_fuse_AM: 1 +time_fuse_V: 1 +units_value: 3 +units_weight: 12 +drop_rate_attention: 0.0 +# Filter SetUp +units_conv: 8 +units_pred: 8 +filter_encdec: dense +filter_pred: dense +drop_rate_forward: 0.1 +# Mask label +flag_identity: False +flag_time: False +flag_imputation: True diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py new file mode 100644 index 0000000..e932542 --- /dev/null +++ b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py @@ -0,0 +1,166 @@ +import tensorflow as tf +import numpy as np +import yaml +import os +import h5py +import time +import sys +import math + +from Lib.Data_Processing import * +from Lib.Utility import * +from Model.MultiDim_Analyzer_Model import MultiDim_Analyzer +from Model.Base_Handler import Basement_Handler + + +class MDAnalyzer_Handler(Basement_Handler): + def __init__(self, dataset_name, model_config, sess, is_training=True): + + # Initialization of Configuration, Parameter and Datasets + super(MDAnalyzer_Handler, self).__init__(sess=sess, model_config=model_config, is_training=is_training) + self.initial_parameter() + self.data_assignment(dataset_name) + + # Define the general model and the corresponding input + self.shape_enc = (self.batch_size, self.num_identity, self.period_enc) + self.shape_dec = (self.batch_size, self.num_identity, self.period_dec) + self.input_enc = tf.placeholder(tf.float32, shape=self.shape_enc, name='encoder_inputs') + self.input_ori = tf.placeholder(tf.float32, shape=self.shape_dec, name='decoder_inputs') + self.truth_pred = tf.placeholder(tf.float32, shape=self.shape_dec, name='ground_truth') + self.truth_mask = tf.placeholder(tf.float32, shape=self.shape_dec, name='natural_missing') + self.move_mask = tf.placeholder(tf.float32, shape=self.shape_dec, name='removed_missing') + self.shared_info = tf.placeholder(tf.float32, shape=(self.num_identity, self.num_shared_feature), name='position') + with tf.variable_scope("impute", reuse=tf.AUTO_REUSE): + self.impute_segs = tf.get_variable("impute_var",shape=self.shape_enc, trainable=True, + initializer=tf.random_normal_initializer(mean=0,stddev=0.1)) + # Initialization for the model training structure. + self.learning_rate = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(self.lr_init),trainable=False) + self.lr_new = tf.placeholder(tf.float32, shape=(), name='lr_new') + self.lr_update = tf.assign(self.learning_rate, self.lr_new, name='lr_update') + + self.train_test_valid_assignment() + self.trainable_parameter_info() + self.saver = tf.train.Saver(tf.global_variables()) + + def initial_parameter(self): + + # Configuration Set + config = self.model_config + + # Model Input Initialization + self.batch_size = int(config.get('batch_size',1)) + self.period_enc = int(config.get('period_enc',12)) + self.period_dec = int(config.get('period_dec',12)) + + # Initialization for Training Controler + self.epochs = int(config.get('epochs',100)) + self.epoch_iter = int(config.get('epoch_iter',5)) + self.patience = int(config.get('patience',30)) + self.lr_init = float(config.get('learning_rate',0.001)) + self.lr_decay = float(config.get('lr_decay',0.1)) + self.lr_decay_epoch = int(config.get('lr_decay_epoch',20)) + self.lr_decay_interval = int(config.get('lr_decay_interval',10)) + + def data_assignment(self,dataset_name): + model_config = self.model_config + set_whole, self.node_pos, self.maximum = Data_Division(dataset_name) + + # Pre-calculation for model training + self.scalar = limit_scalar(set_whole) + self.set_whole,disp_whole = self.scalar.transform(set_whole) + self.num_identity,self.num_shared_feature = self.node_pos.shape[0],self.node_pos.shape[1] + self.whole_segs = (set_whole[0].shape[-1]-self.period_enc)/2+1 + self.whole_size = int(self.whole_segs/self.batch_size) + + # Display the data structure of Training/Testing/Validation Dataset + print 'Available Segments[batches] %d[%d] Shape of data/mask piece %s and min-mean-max is %.2f-%.2f-%.2f' % ( + self.whole_segs,self.whole_size,set_whole[0].shape, disp_whole[0], disp_whole[1], disp_whole[2]) + print 'Measurement maximum(average,std) are %4.4f(%4.4f,%4.4f)' % (self.maximum,self.scalar.mean,self.scalar.std) + + # Data Generator + self.gen_whole = Data_Generator(self.set_whole, set_whole[0], self.whole_segs, self.batch_size, self.period_enc, is_training=True) + + def train_test_valid_assignment(self):#, is_training = True, reuse = False + + # the original mask use 1 to indicate the missing point, and the inverse has been done during the data_division function + value_sets = ( + tf.expand_dims(self.input_enc, -1), # the input value of encoder (current data with randomly removed) + tf.expand_dims(self.input_ori, -1), # the input value of decoder (future/current data with randomly removed) + tf.expand_dims(self.truth_pred,-1), # the groundthuth of the future/current prediction + tf.expand_dims(self.truth_mask,-1), # the label of NATURAL missing data 0 -- missing, 1 -- available + tf.expand_dims(self.move_mask, -1), # the label of remove missing data 0 -- missing, 1 -- available + self.shared_info, # the shared information of the nodes (position), normalized value [0,1] + self.scalar # (Class Function) Rescale the the input and the output + ) + with tf.name_scope('Train'): + with tf.variable_scope('MultiDim_Analyzer', reuse=False): + self.MD_Analyzer_train = MultiDim_Analyzer(value_sets, self.learning_rate, self.sess, self.model_config, is_training=True) + + def train(self): + self.sess.run(tf.global_variables_initializer()) + print ('Training Started') + min_impute_metric = float('inf') + epoch_cnt,wait = 0,0 + + start_time = time.time() + while epoch_cnt <= self.epochs: + + # Training Preparation: Learning rate pre=setting, Model Interface summary. + + cur_lr = self.calculate_scheduled_lr(epoch_cnt) + whole_fetches = {'global_step': tf.train.get_or_create_global_step(), + 'train_op': self.MD_Analyzer_train.train_op, + 'preds': self.MD_Analyzer_train.orig_impute, + 'metric': self.MD_Analyzer_train.orig_metric, + 'loss': self.MD_Analyzer_train.loss} + Results = {"loss":[],"imputed":[],"metric":[],"ground":[],"mask_compare":[]} + # Framework and Visualization SetUp for Training + for trained_batch in range(0,self.whole_size): + (curdata,curmask,curmove,curdata_orig) = self.gen_whole.next() + feed_dict_whole = {self.input_enc: curdata*curmove, + self.input_ori: curdata_orig, + self.truth_pred: curdata, + self.truth_mask: curmask, + self.move_mask: curmove, + self.shared_info: self.node_pos} + whole_output = self.sess.run(whole_fetches,feed_dict=feed_dict_whole) + message = "Epoch [%3d/%3d] [%d/%d] lr: %.4f, loss: %.8f" % ( + epoch_cnt, self.epochs, trained_batch, self.whole_size, cur_lr, whole_output["loss"]) + if trained_batch % 50 == 0: + print message + + + Results["metric"].append(whole_output['metric']) + Results["loss"].append(whole_output['loss']) + Results["imputed"].append(whole_output['preds']) + Results["ground"].append(curdata_orig) + Results["mask_compare"].append(curmask-curmove) + global_step = whole_output['global_step'] + + loss,metric_seg = np.mean(Results["loss"]),np.mean(Results["metric"],axis=0) + if metric_seg[0] <= min_impute_metric: + min_impute_metric = metric_seg[0] + metrics = calculate_metrics_np(Results["imputed"],Results["ground"],Results["mask_compare"]) + + # Information Logging for Model Training and Validation (Maybe for Curve Plotting) + summary_format = ['loss/train_loss','metric/mse_segmin','metric/rmse','metric/mae','metric/mape','metric/mre'] + summary_data = [loss,min_impute_metric,metrics[1],metrics[2],metrics[3],metrics[4]] + self.summary_logging(global_step, summary_format, summary_data) + # Message Summary of each epoch (For info.log logging) + message = 'Epoch [%3d/%3d] loss: %.4f(%.4f), Orig MSE/RMSE/MAE/MAPE/MRE %s' % ( + epoch_cnt, self.epochs, np.mean(Results["loss"]),min_impute_metric,metrics) + self.logger.info(message) + epoch_cnt += 1 + print '%ds'%(time.time()-start_time) + + def calculate_scheduled_lr(self, epoch, min_lr=1e-6): + decay_factor = int(math.ceil((epoch - self.lr_decay_epoch) / float(self.lr_decay_interval))) + new_lr = self.lr_init * self.lr_decay ** max(0, decay_factor) + new_lr = max(min_lr, new_lr) + + self.logger.info('Current learning rate to: %.6f' % new_lr) + sys.stdout.flush() + + self.sess.run(self.lr_update, feed_dict={self.lr_new: new_lr}) + self.MD_Analyzer_train.set_lr(self.learning_rate) + return new_lr \ No newline at end of file diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1e6d7cf08bc76dd6310ae2730145a567441ac3b GIT binary patch literal 7437 zcmcIpJ&+tn74F{M`~Uydo$f5Hkk4KjNoQnP#{5W@b;2%+JIix&**;4(9_{q*&3a~b zJv}SwsN3iqP*9X(C;}p?NGK-+L}XNvl95wn1QY=YRq%bU_s_!0WUaQh`}OPY*WIt* z_ujkWzh)+X`p3WBtE%q5B7Wb%*ZvMgi2qu}N*(B4y`Tf{>R`NApHK%AQZJ}@Qr+cBs7<+AQN`0LnNbHbO0%b=;#rllD_FOq+!$JitHPpQB$_}U#5M$X3+xSh|moR6kxJe-15a56=y;8;ph z!I^-F9e{sac~17~2xjA#BHhB*-oZk-?Bp0*EvN<-(drmCP*BIH7u9hIM^M!0ZWMVK zQY)!rtxmLSj3{=FN;#%#7^0LvF3sVtiC)*F%BNIgTFM#9%i*{;k@rLwIDGc2cqWBs z3=0d-+|zl+dpz?`=b7m7EIgfOvd6RdRXlFXOR9#0g78y4=4D}CQQ@=-XQaN0F8%p+ zrMdG{Dx4kKHny>*LMQ0D3KvwksKO-`F5|QdC%VYvIIZ5t-M{-mcQ~fT)%Qnj$O&#@ z4HaUx=TMPRXPy)~#QYGH(#Q#(6^615KO^N?VK@a<;63~2g!qmAy`PsFrFdR7wq*1R zBw=N;zeo+;ubhTv?d2uvm$5dVMkQ&6Cibh%v=-Gd9M8!5c@{)zl-6@LK@`lEQtLivX--xmYJma(1d;3 zIB2b=jq<3O=A&pt3Dz(oNoLyCi;9a0N?uIPI3nyds;F z+{ckbx$oyRI;=ttzZGOWI7q_{W2V3lZ~*m`W=FR3AjqqSzJUH|=htON!E3QxpYv7V)ZlTWvzGxPzPT z-F?URZojvGTee>}Id4|D!;Sv|B7*p<8zY5NiE zNF+)^d(#T?2v)&FXE>BvT>sB+|M{2Q2X}6WJ%Xmg5(GyBb4T;tqxv#sL|MdO_32TH zRjBHGofQrd#mi~u&?momr6RwGE>e3p$oK|}nVQ+|X3;6J=HLN!QsbsrA~ZT7BznxS<>)yJ|r6 zd1<1uK$j2<@P^4S)|+my!mM;m@%`mX>SRQ`l4YmeJjgwPT+wcE%p;z%4N%;C7(_A6 zWqIG!iGlVb0YGE#m$$-?w!-`6efk{bX01%)w@rvCw<1$DgFPRfD2_HRe9^`<*T)dlozGE;Oy!O zCU7cG_h5&Ry%Rl*ES(Z=E`=fW;id?fc4rr39Y+c9-&TVdt_e+ax7zhmQdb9jF*>hpTNSk&Lr z=ZaOlJV6q=O7a%}j7km_lt<<;1lDvw0d64R8K#K3e~AJBd4dPHA(XCiWBzef!%GEN zp^orWz~S?^5f`8hybySTFU4r#BB*#~P9pal-D{9X2!uZ=$!l2X#s=@hhQIrP*nO(R zp1cP!k9X-gc-*r`zrZOUohk1;3US@c;}$gD52M6-FRTa)f*3XEgw@hy!Q0lLP8phg9?bAMZRT*SVh z8jBRGVNDwA zSlp>GNYH9L4$Lt_UGHEM=;|HRu@eO_0kDz%36OLEH-M~JwF=okDX8o>`gjiakm8n* z?id52ofdx7a4VX`@jel?9&w^i932oR9dVjKD?_Io0eb>E3K60J(>n9&)4!|t;j=e3 zWnnk-ppE6>!a+vttk79d9iSWhGeWHvC8!pa&c&UzrW+Xn3o zG8!XlmB)>nI;$E;5Wpqnu^`d0s$PSdfBGh@&c;C1f%j`fdU~BikUdLFBZdbNZ+Sll zP$9f4f{2(U!jH2%-5AB@rX`T)f}mzg>=q+JEBJ=MHjAnfro~O96vjA@1Y)7|yCi^M zBVingh6F*95lpHX#EKzCTiUPN^mof2ZM_n1<9i=9RPY;T-vPc~tz9g00oK1=i>^>i z8ZscewFC=xZaX1awKQu^xb4K!cGclDSjLH6Y6h~UaqDpfX1F_n2lA!v%s6L;eoQ3C z#q}+n-X}%x2}<7rl3%!3ekY6S<=y?;+a8PEoBZBE;oXKyV>#PaXvOHt4Mo;n0Nb7^fd+-s|jVbj`RM z5o0w&^ltp2`}aOXuXmq(JU=7{>V7vZ({H;&^gbpNu?3-e-FWY!=G`TYXO)`A`O`7% zhvcOqh~5OPmPtd&VWchR<`5nRdvFU{KW#}`#kP|q$d3HDS+6s+@ zkVBLMF}>S`@8%MnbWv6nK@KH;6d+AN9YG=?%YB}w%v^0}I6BE4IWMhdkikKMfa>oV zi*&P&4QXUeYu2nb#Kz%rGn>H0pHoeB~WJcnc|wR=%qqYzkv3v zuK)ZWzyI~W|MbQ6D&G~Ak;-rjU;70LICO~4;YoHKxT0FaUDKe#HP8}J;m1XK{oFZB zAUq2Odj++$4EF&9T58ElbmO}TaTJBPN}G(wf@rJ(9PQ@jXH~;lZU9G-fj@?G-34_G zfF$YrMM5}f$T8EbmteK2h%TD4j1q3Ovy4GaFKJn3(z2X4X&JB8ybvrEiC-8rh!*2= z52b=cLF__snh1%*2eG*r8N@Q4WgkvS3B?{cLyrIhx+ZVC7Psx8%R<%2%N&?w&%{v3 z17UOn+rk$&2smW0JTT#bJ?0}~)-{~=BaBMo{A)TexyJ9gi@VO?$5KsUZH7ORy`0zc#s2}p`$jzg literal 0 HcmV?d00001 diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py new file mode 100644 index 0000000..8a07c6c --- /dev/null +++ b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py @@ -0,0 +1,362 @@ +import tensorflow as tf +import numpy as np +import math +import os +import json + +from Lib.Utility import * +from Model.Base_TFModel import Basement_TFModel + +class MultiDim_Analyzer(Basement_TFModel): + + def __init__(self, value_sets, init_learning_rate, sess, config, is_training=True, *args, **kwargs): + + super(MultiDim_Analyzer, self).__init__(sess=sess, config=config, learning_rate=init_learning_rate,is_training=is_training) + ''' + Model input Explanation & Reminder: + enc_input: the masked input of encoder, the dimension of time may varies with dec_input by [data generation and assignment in handler] + dec_input: the masked input of encoder, the dimension of time may varies with enc_input by [data generation and assignment in handler] + truth_pred: used in loss calculation: have identical dimension with decoder input, same(shifted) value for completion(prediction) + truth_mask: used in the calculation of metric like MSE and relabel the model output for comparison (Mask of Natural Loss) + shared_info: auxiliary information for encoder/decoder input encoding for identity dimension. BTW, encoding in time dimension is similar to NLP + scalar: the maximum value of each measurement, used for metric calculation + + For data in the value_sets (Data Unzip): + Size for model input/output: (batch_size, num_identity, num_measurement, period_enc/dec, 1) + Size for auxiliary shared_info: (batch_size, num_shared_feature) + ''' + (enc_input, ori_input, truth_pred, truth_mask, move_mask, shared_info, scalar) = value_sets + self.num_identity = enc_input.get_shape().as_list()[1] + + # Initialization of the model hyperparameter, enc-dec structure, evaluation metric & Optimizier + self.initial_parameter() + self.model_input,self.model_output = self.encdec_handler(enc_input, shared_info, truth_pred, truth_mask, move_mask) + self.metric_opt(self.model_output, ori_input, truth_pred, truth_mask, move_mask, scalar) + + def encdec_handler(self, enc_input, shared_info, truth_pred, truth_mask, move_mask): + # for dimention introduction of the input: refer to class initialization + + # Options for Model Structure + # Independent -------------- 3 Enc-Dec model are used to learn the relationship for each dimension repectively + # Sequence ----------------- Following an arbitrary order to do the multiplication on the value vector + # Element-wise-addition ---- Multiplication on the same value vector and then do the element-wise addition (average) + # Concatenation ------------ Multiplication on the same value vector and then do the concatenation and matmul (generalized ew-addition) + # Dimension-reduce --------- Expand the input 3D value to a 1D vector and then calculate the huge AM. (limitation of memory) + + # The encode is available for Identity&Measurement and Time dimension + (shared_encoder,shared_decoder,time_encoder,time_decoder) = self.auxiliary_encode(shared_info) + if self.flag_casuality == True: + mask_casuality = self.casual_mask() + else: + mask_casuality = None + if self.flag_imputation == True: + self.mask_imputation = self.impute_mask() + else: + self.mask_imputation = None + if self.flag_time == True: + enc_input = enc_input + time_encoder + if self.flag_identity == True: + enc_input = enc_input + shared_encoder + + with tf.variable_scope('layer_init'): + enc_init = self.multihead_attention(enc_input, self.attention_unit, self.model_structure) + enc_init = self.feed_forward_layer(enc_init, self.conv_unit, self.filter_encdec) + enc_init = enc_input + tf.multiply(tf.constant(1.0,dtype=tf.float32)-move_mask,enc_init) + topenc = self.encoder(enc_input, self.model_structure) + return enc_init,topenc + + def encoder(self,enclayer_init,model_structure): + enclayer_in = enclayer_init + with tf.variable_scope('Encoder'): + for cnt_enclayer in range(0,self.num_enclayer): + with tf.variable_scope('layer_%d'%(cnt_enclayer)): + enclayer_in = self.layer_norm(enclayer_in + self.multihead_attention( + enclayer_in, self.attention_unit, model_structure), 'norm_1') + enclayer_in = self.layer_norm(enclayer_in + self.feed_forward_layer( + enclayer_in, self.conv_unit, self.filter_encdec), 'norm_2')# + with tf.variable_scope('Enc_pred1'): + enclayer_in = self.multihead_attention(enclayer_in, self.attention_unit, model_structure) + enclayer_in = self.feed_forward_layer(enclayer_in, self.conv_unit, self.filter_encdec) + return enclayer_in + + def decoder(self, declayer_init, model_structure, encoder_top, mask_casuality=None): + declayer_in = declayer_init + with tf.variable_scope('Decoder'): + with tf.variable_scope('layer_0'): + declayer_in = self.layer_norm(declayer_in + self.multihead_attention( + declayer_in, self.attention_unit, model_structure, mask=mask_casuality), 'norm_1') + (attention_out,KVtop_share) = self.multihead_attention( + declayer_in, self.attention_unit, model_structure, top_encod=encoder_top, scope='enc-dec-attention') + declayer_in = self.layer_norm(declayer_in + attention_out, 'norm_2') + declayer_in = self.layer_norm(declayer_in + self.feed_forward_layer( + declayer_in, self.conv_unit, self.filter_encdec), 'norm_3') + for cnt_declayer in range(1,self.num_declayer): + with tf.variable_scope('layer_%d'%(cnt_declayer)): + declayer_in = self.layer_norm(declayer_in + self.multihead_attention( + declayer_in, self.attention_unit, model_structure, mask=mask_casuality), 'norm_1') + declayer_in = self.layer_norm(declayer_in + self.multihead_attention( + declayer_in, self.attention_unit, model_structure, top_encod=encoder_top, cache=KVtop_share, + scope='enc-dec-attention'), 'norm_2') + declayer_in = self.layer_norm(declayer_in + self.feed_forward_layer( + declayer_in, self.conv_unit, self.filter_encdec), 'norm_3') + with tf.variable_scope('Dec_pred_1'): + declayer_in = self.multihead_attention(declayer_in, self.attention_unit, model_structure) + declayer_in = self.feed_forward_layer(declayer_in, self.conv_unit, self.filter_encdec) + + return declayer_in + + def metric_opt(self, model_output, truth_orig, truth_pred, truth_mask, move_mask, scalar): + + loss_mask = move_mask + global_step = tf.train.get_or_create_global_step() + avail_output = tf.multiply(model_output, loss_mask) + avail_truth = tf.multiply(truth_pred, loss_mask) + + if self.loss_func == 'MSE': + self.loss = loss_mse(avail_output, avail_truth, loss_mask) + elif self.loss_func == 'RMSE': + self.loss = loss_rmse(avail_output, avail_truth, loss_mask) + elif self.loss_func == 'MAE': + self.loss = loss_mae(avail_output, avail_truth, loss_mask) + else: + self.loss = loss_rmse(avail_output, avail_truth, loss_mask)+loss_mae(avail_output, avail_truth, loss_mask) + + if self.is_training: + optimizer = tf.train.AdamOptimizer(self.learning_rate) + tvars = tf.trainable_variables() + grads = tf.gradients(self.loss, tvars) + grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm) + self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') + self.info_merge = tf.summary.merge_all() + + orig_preds = scalar.TFinverse_transform(model_output) + orig_truth = tf.multiply(truth_orig,truth_mask-move_mask) + self.orig_impute = tf.multiply(truth_orig,move_mask) + tf.multiply(orig_preds, tf.constant(1.0,dtype=tf.float32)-move_mask) + self.orig_metric = calculate_metrics(tf.multiply(orig_preds,truth_mask-move_mask), orig_truth, truth_mask-move_mask) + + def multihead_attention(self, att_input, att_unit, model_structure, top_encod=None, cache=None, mask=None, scope='self-attention'): + """ + att_input: the input to be calculated in this module with size of [batch, #identity, #measurement, time, 1] + att_unit: the hyperparameter for the dimention of Q/K and V + top_encod: the output from the top encoder layer [batch_size,#identity, #measurement, time, 1] + mask: mask the casuality of the self-attention layer, [batch, time, time] or [batch, #id*#meas*time, #id*#meas*time] + + 3D convolution is applied to realize the unit expansion. For the convenience of application, we have the following index mapping: + [batch, in_depth, in_height, in_width, in_channels] = [batch_size, num_identity, num_measurement, length_time, 1] + """ + # Initialization for some necessary item + (value_units, Iunits, Tunits) = att_unit + KVtop_cache = None + # Since the value of num_measurement and period are small and may equal to each other by coincidence + # We use the dimention of num_identity + V_filters, V_kernal, V_stride = value_units*self.num_heads, (1, self.V_timfuse), (1, self.V_timjump) + batch,ids,time = att_input.get_shape().as_list()[:3] + + with tf.variable_scope(scope): + if top_encod is None or cache is None: + if top_encod is None: + top_encod = att_input + else: + if cache is None: + KVtop_cache = {} + # Linear projection (unit expansion) for multihead-attention dimension: + Q_iden = tf.layers.dense(tf.reshape(att_input,[batch,ids,-1]), self.num_heads*Iunits, + use_bias=False, name='Q_ID') + K_iden = tf.layers.dense(tf.reshape(top_encod,[batch,ids,-1]), self.num_heads*Iunits, + use_bias=False, name='K_ID') + Q_time = tf.layers.dense(tf.reshape(tf.transpose(att_input,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, + use_bias=False, name='Q_Time') + K_time = tf.layers.dense(tf.reshape(tf.transpose(top_encod,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, + use_bias=False, name='K_Time') + V = tf.layers.conv2d(inputs=top_encod, filters=V_filters, kernel_size=V_kernal, strides=V_stride, + padding="same", data_format="channels_last", name='V') + if KVtop_cache is not None: + KVtop_cache = {'share_Kid':K_id, 'share_Ktime':K_time, 'share_V':V} + else: + Q_iden = tf.layers.dense(tf.reshape(att_input,[batch,ids,-1]), self.num_heads*Iunits, + use_bias=False, name='Q_ID') + Q_time = tf.layers.dense(tf.reshape(tf.transpose(att_input,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, + use_bias=False, name='Q_Time') + K_id,K_time,V = cache['share_Kid'],cache['share_Ktime'],cache['share_V'] + + # Split the matrix to multiple heads and then concatenate to build a larger batch size: + # [self.batch_size*self.num_heads, self.X, self.X] + Qhb_id = tf.concat(tf.split(Q_iden, self.num_heads, axis=2), axis=0) + Qhb_time = tf.concat(tf.split(Q_time, self.num_heads, axis=2), axis=0) + Khb_id = tf.concat(tf.split(K_iden, self.num_heads, axis=2), axis=0) + Khb_time = tf.concat(tf.split(K_time, self.num_heads, axis=2), axis=0) + # [self.batch_size*self.num_heads, self.num_identity, self.num_measurement, self.length_time, 'hidden-units'] + Q_headbatch = (Qhb_id,Qhb_time) + K_headbatch = (Khb_id,Khb_time) + V_headbatch = tf.concat(tf.split(V, self.num_heads, axis=3), axis=0) + + if mask is not None: + mask_recur = tf.tile(mask, [self.num_heads, 1, 1]) + else: + mask_recur = None + + out = self.softmax_combination(Q_headbatch, K_headbatch, V_headbatch, model_structure, att_unit, mask_recur) + + # Merge the multi-head back to the original shape + # [batch_size, self.num_identity, self.num_measurement, self.length_time, 'hidden-units'*self.num_heads] + out = tf.concat(tf.split(out, self.num_heads, axis=0), axis=3) # + out = tf.layers.dense(out, 1, name='multihead_fuse') + out = tf.layers.dropout(out, rate=self.attdrop_rate, training=self.is_training) + + if KVtop_cache is None: + return out + else: + return (out,KVtop_cache) + + def feed_forward_layer(self, info_attention, num_hunits, filter_type='dense'): + ''' + forward_type: + "dense" indicates dense layer, + "graph" indicates graph based FIR filter (graph convolution), + "attention" indicates applying the attention algorithm + "conv" indicates the shared convolution kernal is applied instead of a big weight matrix + self.ffndrop_rate may be considered later 03122019 + ''' + channel = info_attention.get_shape().as_list()[-1] + if filter_type == 'dense': + ffn_dense = tf.layers.dense(info_attention, num_hunits, use_bias=True, activation=tf.nn.relu, name=filter_type+'1') + ffn_dense = tf.layers.dense(info_attention, num_hunits, use_bias=True, activation=None, name=filter_type+'2') + return tf.layers.dense(ffn_dense, channel, use_bias=True, activation=None, name=filter_type+'3') + elif filter_type == 'graph': + raise NotImplementedError + elif filter_type == 'attention': + raise NotImplementedError + elif filter_type == 'conv': + raise NotImplementedError + + def layer_norm(self, norm_input, name_stage): + norm_step = tf.contrib.layers.layer_norm(tf.transpose(tf.squeeze(norm_input),perm=[0,2,1]), + begin_norm_axis=2, center=True, scale=True,scope=name_stage) + return tf.expand_dims(tf.transpose(norm_step,perm=[0,2,1]),-1) + + def softmax_combination(self, Q, K, V, model_structure, att_unit, mask=None): + '''mask is applied before the softmax layer, no dropout is applied, ''' + value_units,segs = V.get_shape().as_list()[-1],V.get_shape().as_list()[0] + ids,time = Q[0].get_shape().as_list()[1],Q[1].get_shape().as_list()[1] + (value_units, Iunits, Tunits) = att_unit + + (Q_I,Q_T) = Q + (K_I,K_T) = K + + # Check the dimension consistency of the combined matrix + assert Q_I.get_shape().as_list()[1:] == K_I.get_shape().as_list()[1:] + assert Q_T.get_shape().as_list()[1:] == K_T.get_shape().as_list()[1:] + assert Q_I.get_shape().as_list()[0] == Q_T.get_shape().as_list()[0] + assert K_I.get_shape().as_list()[0] == K_T.get_shape().as_list()[0] + + # Build the Attention Map + AM_Identity = tf.matmul(Q_I, tf.transpose(K_I, [0, 2, 1])) / tf.sqrt(tf.cast(Iunits, tf.float32)) + AM_Time = tf.matmul(Q_T, tf.transpose(K_T, [0, 2, 1])) / tf.sqrt(tf.cast(Tunits, tf.float32)) + if mask is not None: + AM_Time = tf.multiply(AM_Time,mask) + tf.constant(-np.inf)*(tf.constant(1.0)-mask) + if self.mask_imputation is not None: + (iden_mask, time_mask) = self.mask_imputation + #AM_Identity = tf.multiply(AM_Identity,iden_mask) + tf.constant(-1.0e9)*(tf.constant(1.0)-iden_mask) + AM_Time = tf.multiply(AM_Time,time_mask) + tf.constant(-1.0e9)*(tf.constant(1.0)-time_mask) + AM_Identity = tf.nn.softmax(AM_Identity, 2) + AM_Time = tf.nn.softmax(AM_Time, 2) + + shape_id = [segs, ids, time, value_units] + shape_time = [segs, time, ids, value_units] + + if model_structure == 'Sequence': + Out_Id = tf.reshape(tf.matmul(AM_Identity, tf.reshape(V,[segs, ids, -1])), shape_id) + Out_Id = tf.transpose(Out_Id,perm=[0,2,1,3]) + Out_Id_Time = tf.reshape(tf.matmul(AM_Time, tf.reshape(Out_Id, [segs,time,-1])), shape_time) + return tf.transpose(Out_Id_Time,perm=[0,2,1,3]) + else: + V_id,V_time = V,tf.transpose(V,perm=[0,2,1,3]) + Out_Identity = tf.reshape(tf.matmul(AM_Identity, tf.reshape(V_id,[segs, ids, -1])), shape_id) + Out_Time = tf.reshape(tf.matmul(AM_Time, tf.reshape(V_time,[segs, time, -1])), shape_time) + + Out_Time = tf.transpose(Out_Time,perm=[0,2,1,3]) + if model_structure == 'Element-wise-addition': + return tf.divide(tf.add(Out_Identity,Out_Time),tf.constant(2.0)) + elif model_structure == 'Concatenation': + Attention_output = tf.concat([Out_Identity, Out_Time], 3) + return tf.layers.dense(Attention_output, value_units, use_bias=False) + else: + raise UnavailableStructureMode + + def casual_mask(self): + ''' + This function is only applied in the self-attention layer of decoder. + The lower triangular matrix is used to indicate the available reference of all position in each calculation + Key Idea: Only the previous position is applied to predict the future + ''' + batch_size,period = self.batch_size,self.period_dec + casual_unit = np.tril(np.ones((period, period))) + casual_tensor = tf.convert_to_tensor(casual_unit, dtype=tf.float32) + return tf.tile(tf.expand_dims(casual_tensor, 0), [batch_size, 1, 1]) + + def impute_mask(self): + batch_size = self.batch_size + iden_unit = 1.0-np.identity(self.num_identity) + time_unit = 1.0-np.identity(self.period_dec) + iden_tensor = tf.tile(tf.expand_dims(tf.convert_to_tensor(iden_unit, dtype=tf.float32), 0), [self.num_heads*batch_size, 1, 1]) + time_tensor = tf.tile(tf.expand_dims(tf.convert_to_tensor(time_unit, dtype=tf.float32), 0), [self.num_heads*batch_size, 1, 1]) + return (iden_tensor,time_tensor) + + def initial_parameter(self): + + config = self.config + # Parameter Initialization of Data Assignment + self.batch_size = int(config.get('batch_size',1)) + self.period_enc = int(config.get('period_enc',12)) + self.period_dec = int(config.get('period_dec',12)) + + # Parameter Initialization of Model Framework + self.num_heads = int(config.get('num_heads',8)) + self.num_enclayer = int(config.get('num_enclayer',5)) + self.num_declayer = int(config.get('num_declayer',5)) + self.model_structure = self.config.get('model_structure') + + # Parameter Initialization of Attention (Q K V) + self.AM_timjump = int(config.get('time_stride_AM',1)) + self.V_timjump = int(config.get('time_stride_V',1)) + self.AM_timfuse = int(config.get('time_fuse_AM',1)) + self.V_timfuse = int(config.get('time_fuse_V',1)) + vunits,Iunits,Tunits = int(config.get('units_value',6)),int(config.get('units_IDw',14)),int(config.get('units_Timew',6)) + self.attention_unit = (vunits, Iunits, Tunits) + + # Parameter Initialization of Filter (Enc-Dec, Prediction) + self.filter_encdec = config.get('filter_encdec','dense') + self.conv_unit = int(config.get('units_conv',4)) + self.attdrop_rate = float(config.get('drop_rate_attention',0.0)) + self.ffndrop_rate = float(config.get('drop_rate_forward',0.1)) + self.filter_pred = config.get('filter_pred','dense') + self.pred_unit = int(config.get('units_pred',8)) + + # label of mask + self.flag_identity = config.get('flag_identity',False) + self.flag_time = config.get('flag_time',False) + self.flag_casuality = config.get('flag_casuality',False) + self.flag_imputation = config.get('flag_imputation',False) + + def auxiliary_encode(self,shared_info): + # The concatenation is not applicable in this part since all the attention of all three dimension need to be learned. + # Expanding each dimention will not make sense for our model. + # Concatenation with the feature dimention (expanded as 1) is equivalent with the element-wise addition. + with tf.variable_scope('shared_feature'): + shared_encoder = tf.layers.dense(tf.expand_dims(shared_info,0), self.period_enc, + use_bias=False, activation=None, name='encoder') + shared_encoder = tf.reshape(shared_encoder, [1, self.num_identity, self.period_enc, 1]) + shared_encoder = tf.tile(shared_encoder, [self.batch_size, 1, 1, 1]) + shared_decoder = tf.layers.dense(tf.expand_dims(shared_info,0), self.period_dec, + use_bias=False, activation=None, name='decoder') + shared_decoder = tf.reshape(shared_decoder, [1, self.num_identity, self.period_dec, 1]) + shared_decoder = tf.tile(shared_decoder, [self.batch_size, 1, 1, 1]) + + denom = tf.constant(1000.0) + phase_enc = tf.linspace(0.0,self.period_enc-1.0,self.period_enc)*tf.constant(math.pi/180.0)/denom + phase_dec = tf.linspace(0.0,self.period_dec-1.0,self.period_dec)*tf.constant(math.pi/180.0)/denom + sin_enc,sin_dec = tf.expand_dims(tf.sin(phase_enc),0),tf.expand_dims(tf.sin(phase_dec),0) + + time_encoder = tf.expand_dims(tf.tile(tf.expand_dims(sin_enc,0),[self.batch_size,self.num_identity,1]),-1) + time_decoder = tf.expand_dims(tf.tile(tf.expand_dims(sin_dec,0),[self.batch_size,self.num_identity,1]),-1) + return (shared_encoder,shared_decoder,time_encoder,time_decoder) \ No newline at end of file diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc new file mode 100644 index 0000000000000000000000000000000000000000..277bcaa83827398b5378ce48073400cd9706071d GIT binary patch literal 15586 zcmcIr+ix7#c|S8tQQ}1xOQJ-YvNcldhL&})q}=Q#wG_!h6vZ?&lx0V;F&Xa8l0)sy zJ3~rJxkXW>X!7JXNiS)GplKe8_Ms2;LlLw^+o$#=1&Y4p4=947K#-sT(#Q1o`+YOB zOIk{Ul;LvDp83u>-?@GFbGG*5y_28(`~P^W?ux$}zAxd)dPo}h_gu@lJ6@Uh-JM#Q zA9Huc%KR>OXIGgYcX!5>_g!nZyG1ND*V^OS6YkE0^BQ9=nQ%>@+~vOL+(YL+duNgz zEiH_r)AXnKTU_vv&6%9C>;Ysy8D~j5>EzL^Yp-@2Nh^N>z*pB>d3rT%N0&Qs>%kY2 zel?d}Y=S5IeI$uyk@y_g7d_3wWA~hbuMEb&pCe`IZii8_bdP|5;~y2q-4`+I3D-b{ zJ*tV3PpIaU>P!ka?HW@8?sbi6W%jwoUS)u1pE3trxU|4o)j8|^cqZ_?h9`RuNv`Q{ zVtstKhEaQN6ASOVO^mnZHUSxPo4c@jW8S9c-rDU3d)#2c4JO@S$_=L7V6PkObDMkI zT8&G*HpUcFm;=gamqWTXPLRjkZdTmf>ozCd=9JrDDYtro zz?|Ebt@o0C=<{_Ck|;u@Jc@EI(Mpm>*=pQN^4)lgv#6D3c|MJNXT2Sz4G>70KM-xO zRT{UVUfhq{NuKoceW;dn>W!oxt;U^33m|sdCOM&#-nwD3_wIU5z$7aqc|Wa3-Cn-H zttuwjYN~J}ZmlO#mgHG}5QQ4+mf40ifI}`!$VE}>xW8gcboZXJoEk<}41#anepeAI+1`VYTf=qFm^lCWee>R2ZPNRoCz9r# zbKqC-WFN!+l6a8%?jZjTF3M?IOEa` ztWeL_`-zyZnSk({-9DJA5ox)^ACN;E)==X~TM!0Z90&qQ81>p~JnxJt!7jR49ToVpfx zxN~GX;1n5%y>qNxr@rIt%~?K!Cp(G6P0f4zz5V_;(u4j4(!G5A1KvD77rYs-P9aC( zO_~tX{|_=?z5&P|6b^-Nob|5r?*9&1km?Kt;B?@)5w$;3DWRFhrl2t zaasGSH@q4I^orVSkE$R?RW*XS|IajY8l`!KqN{JeM}w}I2h`0T%*d5Teyi*(75SpAvwukM(r=;)L4{} z;U|z0|3D&|NHuGc$|a+`1__^mWIad_Vi!-C` ztQpePcr4e2mrz>qEQX-5FOZ&xyjX(Y&x=$R5hfMBNHjF@M(T=>s~JVGT2I75ssqs%c*J^&s*4xWC-JA&NFg0qh!KM&5X_}i~@N4ZD*7JvVQ z{Jo1tqVWM(?V&5#0`vs44(tRzf;|V;0tEjCF;;@BZH~_3c0*fqNRMM>HT>j*+ z`Vi+POy{Bl^oG&R(~4gbUo7S|P{ONAuZX`xW(2tWipYDV)m?^bm*q)M6*~0rs9xn# z!c$v89)CZ2IeZ#l3&)U1kxC?}QciP??j$^)dOv|%8jWBL&!be*q}9!`sJY%TQ&Dei zczYB(d>Pj16D$?a7VS7OcaMhDaDaFEk7tnM^PoFZ^!o|){F$9-4*v>#0qAuxnf^Pd*SEUAvr5sZ@1wXXw2=Tzmi08 zt0l=sZ!r=`ecrm3b~ciJme5Dv-=Zy+i)`L>n*OLdCS?b zmH`%IGArR&)vNijM77O=4!L2&rImdDEI~AU;*B_MQPR;r=U5CNWNHIP-Im&R8;hGM z$JNRdSBliV=0K-98@>w5PD{v;epw4oMw`r;9Suu*Ka88=@t=iVJ>yN|nfFeq?1=ZP zUPk4YC9URAYt>0(C-Bf=y29eW0WpGG--2E(<3NpLa#YR!_1s~GkieV1gHKuHYs}aD zVYtXE0!1*hA413YE+EGJ~j; zQzBI;kt-|Njvyk0Jff0_zcsL47~nzIK(qntL#e)d0AY^h!uCMd{LoJ!ULm1p+($NN zT;mA7j=08A_YrKvJ3sV0e_i&nBRjD(i%Ei+LA(-QN9^mUn**^PbyyTkkGEmigB^e#pK_bP zV7mM`W3_`|YuCX@*K}dDbab7(qNW&hGZ0f?Gv-)lScH{F5$1*QgZ?mIWEkr5QKulm zJ+A-lF;rW^0}z8+Z(v3@-$b&s@Dv5udx#262E`wYS=hjk?JouSY7!_LHt=n2>kS@SZ(E3P|2Db|4REvAT;c!`lP)=3A3N?m)V z)^)`hPY3ssU^U*b_RVgq)xC#P0&ICBxsQqWdU$)O8c8{Yv=cR84r1StTTRlH)m+(o zX`=wv5%upRt?cdK6I<3k7(F{Ut+y@Y(n>lj2*1CNYUD3MxEAMDGXB41f&CqiG2#w;<1P+$FEqh6BVsr!R#8$-{XKUT?JR!O* z)bV{Bx6mXR25Os_vC4F^Q`z7c%;LffYK9*`qHeg#jP06{dz)Dym^8B<+Tfbsjz}GGOWodvV?@%>L?sL5 zJf&E9x-HsSqdo~pqrNJ8Q-og7BVk5S^ffF_r++1B|mOV3DVZ1SfGb=a;e^VQ8 zL(ycmz26>r=iXDgMg63{-q*;n`U`;W==s(2!1rZ5*_*^Xwci`Bo$-z%q&*8Ge8T5* z7REWBYsK@74D=I%6Bdk}f~k%VYELUY;g9=I13K?L1K5}!ehs+mTskf?XC{OFzmS2s zyQTwhn7A~zJvWCCICM2^PsStuoI!HLHE?)K%h(Q*pn+it0}ev~fjh)CP#@vPhklKb z(8n~uF?3ON_xq~?2kc5j>4ke(6^CZPy2K79G@!pD7ylN{(N3w?Ndf<@1i}hJ%_#xD zUji|$Y3O*$o{Yk>lGzYSI-}vq)k{GIJ}J&VNlBlAQIjkXpwwZ*dhq0WtE2S<2$pe2 zpb=cV9tOr6!Ggg@G}UvbhgiU#rq4$)$%&@Aq4KI~1aWHx?qa^$9yTY|(UwxaI(*%t z8jUeWHDrntCdff5>^N9XSD^2xt)SY#w)d+JsL0PYo1IGFpuz*0K{%oUyTWnB%HS+3 zc;Vvt3m0BE|6o0&2FxJL^gEg~TX2W5+GePAxzkpRRN$GxJDZ9`y-ZG99iZWb+|%Bjo{#YP59 zfZI^eYEnMt!!oLjqH(2&H6b95SDmaO=rqG1l%asNx-6W^f}G@9#iO%#fmy1k7l!nk0S7dz!jZbL|zQbT?W?i%B?%(28aVyNZ|y1>f1=R z;5;In2*Mang=OB4lSW2S17ht_z50VWH$dFwk+Ye5{A}j8HmL@Sh=rEz5raKe4&~UR zhr&P(7TC<68)F=jp+&?9@sDF8d#+<0Eb0;)OlwEJqy`8>sp<-r*G_{G5dxwa9m61K z1KSR4aum3{rF@nw4QgA`?K559lJ`I2b1x^bsLWq3yqMBv>U4v`e;Khr;1~0vrD+D* z0rE&!kip-NUREag?sxx52L4h4p$;!S{1+6;+s)|Ly<(dD%)K;A&d~d09H2nqF3I1# zB6lk>S4xJ+;({*8R2GDM$;(*=y@S@W2~5vWIKv!*n;A;EP_<_7^vz>K$RpQDgp3Ov z)#!A>Y>ysRQnp0+EBFro5sB$0bMHc42$d7lNH^#tu{OpZejVSY)4s;sAFvwlB)s0y z`2x>rmde{kyal&FjbZwXOL$X8)D1aH=I{L}%Rj~BTS#OPWywkw{slpQ$>h(Ne4B+F z1Of%c_bq)RUQm4BlAn0_)#y4599?jv#=`q8I_X?ZdT(}vi*NM1?2Q^tE;8M<$?BzY z*j`)DfmG-%Ybd$gw@YVFzPa5wu{y;Vh?@Wnf+rl8OMALFoLK;DM}4+$0N)SrWY>t< zKjVEEmy+On)h^%?5RABAtW-@gcNZN@){qm*fA`4f1K`wK`f`X^B5Sv<4e zoL}eO(dOZE1y2S6F9~m{=X~5996*kn6^9f$4B}EpDNqdfr){>2dJA$45`}pllX$$+ zggmZtAccU26kvFl<{!+i5+hLTn0tK04Azr=mEE zKRbdg2@phsrvSqZ7l?3B*Dt&cN(Dr46t*kfE^~c&%0{9`G_MI?qm0wFS^+{U;&%9H zL4wN>Ra_Qht@^x?JP1Ip@k_yL96FsN9JOtv-Sun)!IrQgkK)pLuDZ>2-s-Ec=@>V~ z&NvydGI2%EiYZu3jN}c2H|@gSOzPqH@G7RmoLkD^USnqBg~yQeXdriRvm$~UhzqbL zQvL)*;lCmo-jxdfo2V#~487RJ;;zGMg~hv}+HuN1iO!z}f#`OM8$aYd4eAx`a#(ys zyZ?X;%@G{2fyO|J@;(Tukx*wr$6KIO>428>Ym{p=3jznV8s*Y80Rm7iFzKX@Cvx~N z&K``;VYi9$i}s-KpSQhY5U_@MHlSr+taQ=#ji*zg{jmL|45I9#Pz+V}V^(;MNwt&V zPxyL~$?qZ=dY!hhmHeciDQPPCsW+mOY_)yW&a3p7w9n6?zvsBm3^qErmq+lqgpc-` zGOF6^4S!^>vC)t!@HDZ#CSy>VIHm;5QmR2kKwMF_=gES=9)+`4;;tZ+Us-`)d|GK zn5skA(zq2if5ulFK{=iUY^zs#i|#$ z23G&L%d7gZ(hV4l0C7fXz!M`tSk(qRIRbprZs>dfbhs$yv5>-uso%tB@s!fZKXkgeX!VJtc#Piw$`F z>b-OyL2PHBTQHCA={~IDR?;I@LY3rpnzs|q! zysX}eXjS#I4eNuiAr(aUK_&r{XOJw+GpEjJ4W_t)&Tj-nOy2#J#nq~6K}Xh-TL6O) z3!_t%ui#blG8oq`{5>K#&Ey3pyc7&k%?tXtC+hVdiE zuie!JA5DV$u%Rgr?c5#LiyZKI%1chk`4c`bq|eusdNalMy!UDEV2u}R_`x^w{L~>q z9m#?LYU$5wfH1%&nE9fgULxI z&oYs2_jlJZ~DoJr)EAqJu|Uu`uxPy#GZ*~?8la@ zXm%`Az&-PfxRE$?!4LfCKZQpq|B19~XUA(<@jI~3q|0Yt$I&@|O-3qYM5J|=KhiCK ipc|s8)6Y16DO1%pecU$Y*VvV=O7b99AAiz2;Qud9E+;+! literal 0 HcmV?d00001 diff --git a/models/cross-dimensional-attention/tensorflow/__init__.py b/models/cross-dimensional-attention/tensorflow/__init__.py new file mode 100644 index 0000000..9615bf0 --- /dev/null +++ b/models/cross-dimensional-attention/tensorflow/__init__.py @@ -0,0 +1,7 @@ +# uncompyle6 version 3.7.1 +# Python bytecode 2.7 (62211) +# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) +# [GCC 8.4.0] +# Embedded file name: /home/jiawei/Tensor_MultiDim_NYC/Model/__init__.py +# Compiled at: 2019-04-29 08:21:25 +pass \ No newline at end of file diff --git a/models/cross-dimensional-attention/tensorflow/__init__.pyc b/models/cross-dimensional-attention/tensorflow/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d4580da012e7a0e63e8b133b2ae405451a92294 GIT binary patch literal 137 zcmZSn%*(Yl?O04Q0~9a^fU5vQ}wel6U$RG z^+Qtgit~%&eM@snGF>ur Date: Fri, 19 Jun 2020 19:03:53 +0530 Subject: [PATCH 3/7] Create README.md --- models/cross-dimensional-attention/README.md | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 models/cross-dimensional-attention/README.md diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md new file mode 100644 index 0000000..b8db9b4 --- /dev/null +++ b/models/cross-dimensional-attention/README.md @@ -0,0 +1,22 @@ +# Cross-Dimensional-Attention Model + +## [CDSA: Cross-Dimensional Self-Attention for Multivariate, Geo-tagged Time Series Imputation](https://arxiv.org/pdf/1905.09904.pdf) +_Jiawei Ma, Zheng Shou, Alireza Zareian, Hassan Mansour, Anthony Vetro, Shih-Fu Chang_ + +Many real-world applications involve multivariate, geo-tagged time series data: at +each location, multiple sensors record corresponding measurements. For example, +air quality monitoring system records PM2.5, CO, etc. The resulting time-series +data often has missing values due to device outages or communication errors. In +order to impute the missing values, state-of-the-art methods are built on Recurrent +Neural Networks (RNN), which process each time stamp sequentially, prohibiting +the direct modeling of the relationship between distant time stamps. Recently, the +self-attention mechanism has been proposed for sequence modeling tasks such as +machine translation, significantly outperforming RNN because the relationship between each two time stamps can be modeled explicitly. In this paper, we are the first +to adapt the self-attention mechanism for multivariate, geo-tagged time series data. +In order to jointly capture the self-attention across multiple dimensions, including +time, location and the sensor measurements, while maintain low computational +complexity, we propose a novel approach called Cross-Dimensional Self-Attention +(CDSA) to process each dimension sequentially, yet in an order-independent manner. Our extensive experiments on four real-world datasets, including three standard +benchmarks and our newly collected NYC-traffic dataset, demonstrate that our +approach outperforms the state-of-the-art imputation and forecasting methods. A +detailed systematic analysis confirms the effectiveness of our design choices. From 6317d70285cc2d5f7f07d619f61a363c886616d7 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Fri, 19 Jun 2020 21:12:03 +0530 Subject: [PATCH 4/7] Update README.md --- models/cross-dimensional-attention/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md index b8db9b4..0d70cde 100644 --- a/models/cross-dimensional-attention/README.md +++ b/models/cross-dimensional-attention/README.md @@ -20,3 +20,5 @@ complexity, we propose a novel approach called Cross-Dimensional Self-Attention benchmarks and our newly collected NYC-traffic dataset, demonstrate that our approach outperforms the state-of-the-art imputation and forecasting methods. A detailed systematic analysis confirms the effectiveness of our design choices. + +![](https://drive.google.com/file/d/1U-N4c0d3w-pTYc3cFd3iWVbaaFWinNb7/view?usp=sharing) From 685768ecb947c17b56fc7de07778d0142acdbcb3 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Fri, 19 Jun 2020 21:14:35 +0530 Subject: [PATCH 5/7] Update README.md --- models/cross-dimensional-attention/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md index 0d70cde..39f0f27 100644 --- a/models/cross-dimensional-attention/README.md +++ b/models/cross-dimensional-attention/README.md @@ -21,4 +21,4 @@ benchmarks and our newly collected NYC-traffic dataset, demonstrate that our approach outperforms the state-of-the-art imputation and forecasting methods. A detailed systematic analysis confirms the effectiveness of our design choices. -![](https://drive.google.com/file/d/1U-N4c0d3w-pTYc3cFd3iWVbaaFWinNb7/view?usp=sharing) +![image](https://drive.google.com/uc?export=view&id=1U-N4c0d3w-pTYc3cFd3iWVbaaFWinNb7) From de6fffcd4b5f98f75137875979bfb9b35a121f12 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Fri, 19 Jun 2020 21:16:49 +0530 Subject: [PATCH 6/7] Update README.md --- models/cross-dimensional-attention/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md index 39f0f27..bb962ef 100644 --- a/models/cross-dimensional-attention/README.md +++ b/models/cross-dimensional-attention/README.md @@ -21,4 +21,4 @@ benchmarks and our newly collected NYC-traffic dataset, demonstrate that our approach outperforms the state-of-the-art imputation and forecasting methods. A detailed systematic analysis confirms the effectiveness of our design choices. -![image](https://drive.google.com/uc?export=view&id=1U-N4c0d3w-pTYc3cFd3iWVbaaFWinNb7) +![image](https://drive.google.com/uc?export=view&id=1Z0RbYkBYot3FOlf_V7A33qzZdlERntSg) From 96b1b074e3f4db5af5d34fc50144b04906fdabef Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Fri, 19 Jun 2020 21:18:34 +0530 Subject: [PATCH 7/7] Update README.md --- models/cross-dimensional-attention/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md index bb962ef..b6e1a4f 100644 --- a/models/cross-dimensional-attention/README.md +++ b/models/cross-dimensional-attention/README.md @@ -21,4 +21,4 @@ benchmarks and our newly collected NYC-traffic dataset, demonstrate that our approach outperforms the state-of-the-art imputation and forecasting methods. A detailed systematic analysis confirms the effectiveness of our design choices. -![image](https://drive.google.com/uc?export=view&id=1Z0RbYkBYot3FOlf_V7A33qzZdlERntSg) +![image](https://drive.google.com/uc?export=view&id=1qJzGTBewD-Q9gT7M1FaKqAKW7H6S4OBK)