diff --git a/models/cross-dimensional-attention/README.md b/models/cross-dimensional-attention/README.md
new file mode 100644
index 0000000..b6e1a4f
--- /dev/null
+++ b/models/cross-dimensional-attention/README.md
@@ -0,0 +1,24 @@
+# Cross-Dimensional-Attention Model
+
+## [CDSA: Cross-Dimensional Self-Attention for Multivariate, Geo-tagged Time Series Imputation](https://arxiv.org/pdf/1905.09904.pdf)
+_Jiawei Ma, Zheng Shou, Alireza Zareian, Hassan Mansour, Anthony Vetro, Shih-Fu Chang_
+
+Many real-world applications involve multivariate, geo-tagged time series data: at
+each location, multiple sensors record corresponding measurements. For example,
+air quality monitoring system records PM2.5, CO, etc. The resulting time-series
+data often has missing values due to device outages or communication errors. In
+order to impute the missing values, state-of-the-art methods are built on Recurrent
+Neural Networks (RNN), which process each time stamp sequentially, prohibiting
+the direct modeling of the relationship between distant time stamps. Recently, the
+self-attention mechanism has been proposed for sequence modeling tasks such as
+machine translation, significantly outperforming RNN because the relationship between each two time stamps can be modeled explicitly. In this paper, we are the first
+to adapt the self-attention mechanism for multivariate, geo-tagged time series data.
+In order to jointly capture the self-attention across multiple dimensions, including
+time, location and the sensor measurements, while maintain low computational
+complexity, we propose a novel approach called Cross-Dimensional Self-Attention
+(CDSA) to process each dimension sequentially, yet in an order-independent manner. Our extensive experiments on four real-world datasets, including three standard
+benchmarks and our newly collected NYC-traffic dataset, demonstrate that our
+approach outperforms the state-of-the-art imputation and forecasting methods. A
+detailed systematic analysis confirms the effectiveness of our design choices.
+
+![image](https://drive.google.com/uc?export=view&id=1qJzGTBewD-Q9gT7M1FaKqAKW7H6S4OBK)
diff --git a/models/cross-dimensional-attention/tensorflow/Base_Handler.py b/models/cross-dimensional-attention/tensorflow/Base_Handler.py
new file mode 100644
index 0000000..f9062a2
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/Base_Handler.py
@@ -0,0 +1,82 @@
+# uncompyle6 version 3.7.0
+# Python bytecode 2.7 (62211)
+# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) 
+# [GCC 8.4.0]
+# Embedded file name: /home/jiawei/Tensor_MultiDim_NYC/Model/Base_Handler.py
+# Compiled at: 2019-04-29 02:51:25
+import tensorflow as tf, numpy as np, yaml, os, h5py, time, sys, math
+from Lib.Model_Visualization import *
+
+class Basement_Handler(object):
+
+    def __init__(self, sess, model_config, is_training):
+        self.sess = sess
+        self.model_config = model_config
+        self.max_grad_norm = float(model_config.get('max_grad_norm', 5.0))
+        self.init_logging(is_training)
+        self.logger.info(model_config)
+
+    def init_logging(self, is_training):
+        if is_training is not True:
+            base_dir = self.model_config.get('result_data')
+            folder_dir = self.model_config.get('category') + '_for_' + self.model_config.get('data_name')
+        else:
+            base_dir = self.model_config.get('result_model')
+            folder_dir = generate_folder_id(self.model_config)
+        log_dir = os.path.join(self.model_config.get('result_dir'), base_dir, folder_dir)
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+        self.log_dir = log_dir
+        self.logger = get_logger(self.log_dir, folder_dir)
+        self.writer = tf.summary.FileWriter(self.log_dir)
+
+    def trainable_parameter_info(self):
+        total_parameters = 0
+        for variable in tf.trainable_variables():
+            total_parameters += np.product([ x.value for x in variable.get_shape() ])
+
+        self.logger.info('Total number of trainable parameters: %d' % total_parameters)
+        for var in tf.global_variables():
+            self.logger.debug('%s, %s' % (var.name, var.get_shape()))
+
+    def summary_logging(self, global_step, names, values):
+        for name, value in zip(names, values):
+            summary = tf.Summary()
+            summary_value = summary.value.add()
+            summary_value.simple_value = value
+            summary_value.tag = name
+            self.writer.add_summary(summary, global_step)
+
+    def save_model(self, saver, epoch, val_loss):
+        config_filename = 'config_%02d.yaml' % epoch
+        config = dict(self.model_config)
+        global_step = self.sess.run(tf.train.get_or_create_global_step())
+        config['epoch'] = epoch
+        config['global_step'] = global_step
+        config['log_dir'] = self.log_dir
+        config['model_filename'] = saver.save(self.sess, os.path.join(self.log_dir, 'models-%.4f' % val_loss), global_step=global_step, write_meta_graph=False)
+        with open(os.path.join(self.log_dir, config_filename), 'w') as (f):
+            yaml.dump(config, f)
+        return config['model_filename']
+
+    def restore(self):
+        config = dict(self.model_config)
+        self.pred_step = config['global_step']
+        model_filename = config['model_filename']
+        saver = tf.train.Saver(tf.global_variables())
+        saver.restore(self.sess, model_filename)
+
+    def train_test_valid_assignment(self):
+        pass
+
+    def initial_parameter(self):
+        pass
+
+    def data_assignment(self):
+        pass
+
+    def train(self):
+        pass
+
+    def test(self):
+        pass
\ No newline at end of file
diff --git a/models/cross-dimensional-attention/tensorflow/Base_Handler.pyc b/models/cross-dimensional-attention/tensorflow/Base_Handler.pyc
new file mode 100644
index 0000000..8cb1785
Binary files /dev/null and b/models/cross-dimensional-attention/tensorflow/Base_Handler.pyc differ
diff --git a/models/cross-dimensional-attention/tensorflow/Base_TFModel.py b/models/cross-dimensional-attention/tensorflow/Base_TFModel.py
new file mode 100644
index 0000000..16105da
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/Base_TFModel.py
@@ -0,0 +1,36 @@
+# uncompyle6 version 3.7.0
+# Python bytecode 2.7 (62211)
+# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) 
+# [GCC 8.4.0]
+# Embedded file name: /home/jiawei/Tensor_MultiDim_NYC/Model/Base_TFModel.py
+# Compiled at: 2019-04-29 02:51:25
+import os, tensorflow as tf
+
+class Basement_TFModel(object):
+    """Define and Initialize the basic/necessary element of a tensorflow model """
+    __module__ = __name__
+
+    def __init__(self, sess, config, learning_rate, is_training):
+        self.sess = sess
+        self.config = config
+        self.is_training = is_training
+        self.model_name = config.get('model_name', 'MDAnalyzer')
+        self.train_op = None
+        self.learning_rate = learning_rate
+        self.max_grad_norm = float(config.get('max_grad_norm', 5.0))
+        self.loss = None
+        self.loss_func = config.get('loss_func', 'RMSE')
+        self.maximum_type = int(config.get('upbound', 1))
+        return
+
+    def set_lr(self, new_learning_rate):
+        self.learning_rate = new_learning_rate
+
+    def save_checkpoint(self, step=None):
+        pass
+
+    def load_checkpoint(self, step=None):
+        pass
+
+    def initial_parameter(self):
+        pass
\ No newline at end of file
diff --git a/models/cross-dimensional-attention/tensorflow/Base_TFModel.pyc b/models/cross-dimensional-attention/tensorflow/Base_TFModel.pyc
new file mode 100644
index 0000000..edfae20
Binary files /dev/null and b/models/cross-dimensional-attention/tensorflow/Base_TFModel.pyc differ
diff --git a/models/cross-dimensional-attention/tensorflow/BurstLoss_Generation.ipynb b/models/cross-dimensional-attention/tensorflow/BurstLoss_Generation.ipynb
new file mode 100644
index 0000000..56b31a1
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/BurstLoss_Generation.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/dvmm-filer2/users/jiawei/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import h5py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[u'data', u'mask', u'maximum', u'position']\n",
+      "(186, 6624) (186, 6624) (186, 2) 0.9157041\n",
+      "25.585367 0.9157041 25.585367 2.992725 0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "combine_file = h5py.File('data_mask_maximum.h5','r')\n",
+    "print combine_file.keys()\n",
+    "data = combine_file['data'][:]\n",
+    "mask = combine_file['mask'][:]\n",
+    "position = combine_file['position'][:]\n",
+    "maximum = combine_file['maximum'].value\n",
+    "combine_file.close()\n",
+    "print data.shape,mask.shape,position.shape,mask.mean()\n",
+    "print maximum,mask.mean(),data.max(),data.mean(),data.min()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(24290,)\n",
+      "(5887,)\n",
+      "6.350773 9.809643 0.08429594\n"
+     ]
+    }
+   ],
+   "source": [
+    "mask_miss = 1.0-mask\n",
+    "mask_time,mask_delta = np.zeros_like(mask),np.zeros_like(mask)\n",
+    "mask_time[:,0] = mask_miss[:,0]\n",
+    "for cam in range(186):\n",
+    "    for time in range(1,6624):\n",
+    "        if mask_miss[cam,time] == 1:\n",
+    "            mask_time[cam,time] = mask_time[cam,time-1]+1\n",
+    "    mask_delta[cam,:] = mask_time[cam,:]\n",
+    "    while(time>0):\n",
+    "        if mask_time[cam,time] > 1:\n",
+    "            length = int(mask_time[cam,time]-1)\n",
+    "            mask_time[cam,time-length:time] = mask_time[cam,time]\n",
+    "            mask_delta[cam,time-length:time] = 0\n",
+    "            time -= length\n",
+    "        time -= 1\n",
+    "delta_seq = mask_delta[mask_delta != 0]\n",
+    "delta_seq = np.sort(delta_seq)\n",
+    "delta_burst = delta_seq[delta_seq<=134]\n",
+    "print delta_burst.shape\n",
+    "delta_burst = delta_burst[delta_burst > 1]\n",
+    "print delta_burst.shape\n",
+    "print delta_burst.mean(),delta_burst.std(),mask_miss.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "missing rate: 0.9009605050086975\n"
+     ]
+    }
+   ],
+   "source": [
+    "burst_mean,burst_std,mask_burst,ratio = delta_burst.mean(),delta_burst.std(),np.zeros_like(mask_miss),0.618\n",
+    "for cam in range(186):\n",
+    "    time = 0\n",
+    "    while(time<6624):\n",
+    "        if np.random.uniform(size=1) < ratio:\n",
+    "            length = int(np.random.normal(burst_mean, burst_std, 1))\n",
+    "            if length > 0:\n",
+    "                mask_burst[cam,time:time+length] = 1.0\n",
+    "                time += length\n",
+    "            else:\n",
+    "                time += 1\n",
+    "        else:\n",
+    "            time += 1\n",
+    "move = mask_burst+mask_miss\n",
+    "move[move>1] = 1\n",
+    "move = 1.0-move\n",
+    "for i in range(6624-48*12+1):\n",
+    "    if move[:,i:i+48*12].mean() == 0:\n",
+    "        print i\n",
+    "print 'missing rate:', 1.0-move.mean()\n",
+    "\n",
+    "combine_file = h5py.File('burst_90.h5','w')\n",
+    "combine_file['mask'] = move\n",
+    "combine_file.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7999438 1.0 0.0\n",
+      "[]\n"
+     ]
+    }
+   ],
+   "source": [
+    "combine_file = h5py.File('burst_20.h5','r')\n",
+    "move = combine_file['mask'][:]\n",
+    "combine_file.close()\n",
+    "print move.mean(),move.max(),move.min()\n",
+    "for i in range(6624-48*12+1):\n",
+    "    if move[:,i:i+48*12].mean() == 0:\n",
+    "        print i\n",
+    "temp = move[move!=0]\n",
+    "\n",
+    "print temp[temp!=1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/cross-dimensional-attention/tensorflow/Config.yaml b/models/cross-dimensional-attention/tensorflow/Config.yaml
new file mode 100644
index 0000000..c01e7b5
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/Config.yaml
@@ -0,0 +1,55 @@
+---
+# === Configure Main function === #
+category: Traffic-NYC
+data_name: data_mask_maximum
+mask_name: burst_70
+meas_index: 1
+model_name: MDAnalyzer
+result_dir: Result
+result_model: Model-Config
+result_data: Validation-Result
+GPU: 1
+# === Configure Model Handler === #
+# Data Assignment
+batch_size: 19
+period_enc: 432
+period_dec: 432
+
+# Model training Controler
+epoch_iter: 600
+epochs: 100
+patience: 50
+learning_rate: 0.001
+lr_decay: 0.3
+lr_decay_epoch: 20
+lr_decay_interval: 15
+loss_func: RMSE
+upbound: 1
+max_grad_norm: 5.0
+
+# === Configure in MDAnalyzer Model pipeline === #
+# Frame: Sequence Element-wise-addition Concatenation Dimension-reduce Independent
+model_structure: Sequence
+num_enclayer: 7
+num_declayer: 7
+num_heads: 12
+units_IDw: 14
+units_Timew: 6
+# Query, Key and Value
+time_stride_AM: 1
+time_stride_V: 1
+time_fuse_AM: 1
+time_fuse_V: 1
+units_value: 3
+units_weight: 12
+drop_rate_attention: 0.0
+# Filter SetUp
+units_conv: 8
+units_pred: 8
+filter_encdec: dense
+filter_pred: dense
+drop_rate_forward: 0.1
+# Mask label
+flag_identity: False
+flag_time: False
+flag_imputation: True
diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py
new file mode 100644
index 0000000..e932542
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.py
@@ -0,0 +1,166 @@
+import tensorflow as tf
+import numpy as np
+import yaml
+import os
+import h5py
+import time
+import sys
+import math
+
+from Lib.Data_Processing import *
+from Lib.Utility import *
+from Model.MultiDim_Analyzer_Model import MultiDim_Analyzer
+from Model.Base_Handler import Basement_Handler
+
+
+class MDAnalyzer_Handler(Basement_Handler):
+    def __init__(self, dataset_name, model_config, sess, is_training=True):
+        
+        # Initialization of Configuration, Parameter and Datasets
+        super(MDAnalyzer_Handler, self).__init__(sess=sess, model_config=model_config, is_training=is_training)
+        self.initial_parameter()
+        self.data_assignment(dataset_name)
+        
+        # Define the general model and the corresponding input
+        self.shape_enc = (self.batch_size, self.num_identity, self.period_enc)
+        self.shape_dec = (self.batch_size, self.num_identity, self.period_dec)
+        self.input_enc = tf.placeholder(tf.float32, shape=self.shape_enc, name='encoder_inputs')
+        self.input_ori = tf.placeholder(tf.float32, shape=self.shape_dec, name='decoder_inputs')
+        self.truth_pred = tf.placeholder(tf.float32, shape=self.shape_dec, name='ground_truth')
+        self.truth_mask = tf.placeholder(tf.float32, shape=self.shape_dec, name='natural_missing')
+        self.move_mask = tf.placeholder(tf.float32, shape=self.shape_dec, name='removed_missing')
+        self.shared_info = tf.placeholder(tf.float32, shape=(self.num_identity, self.num_shared_feature), name='position')
+        with tf.variable_scope("impute", reuse=tf.AUTO_REUSE):
+            self.impute_segs = tf.get_variable("impute_var",shape=self.shape_enc, trainable=True,
+                                               initializer=tf.random_normal_initializer(mean=0,stddev=0.1))
+        # Initialization for the model training structure.
+        self.learning_rate = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(self.lr_init),trainable=False)
+        self.lr_new = tf.placeholder(tf.float32, shape=(), name='lr_new')
+        self.lr_update = tf.assign(self.learning_rate, self.lr_new, name='lr_update')
+        
+        self.train_test_valid_assignment()
+        self.trainable_parameter_info()
+        self.saver = tf.train.Saver(tf.global_variables())
+
+    def initial_parameter(self):
+        
+        # Configuration Set
+        config = self.model_config
+        
+        # Model Input Initialization
+        self.batch_size = int(config.get('batch_size',1))
+        self.period_enc = int(config.get('period_enc',12))
+        self.period_dec = int(config.get('period_dec',12))
+        
+        # Initialization for Training Controler
+        self.epochs = int(config.get('epochs',100))
+        self.epoch_iter = int(config.get('epoch_iter',5))
+        self.patience = int(config.get('patience',30))
+        self.lr_init = float(config.get('learning_rate',0.001))
+        self.lr_decay = float(config.get('lr_decay',0.1))
+        self.lr_decay_epoch = int(config.get('lr_decay_epoch',20))
+        self.lr_decay_interval = int(config.get('lr_decay_interval',10))
+
+    def data_assignment(self,dataset_name):
+        model_config = self.model_config
+        set_whole, self.node_pos, self.maximum = Data_Division(dataset_name)
+        
+        # Pre-calculation for model training
+        self.scalar = limit_scalar(set_whole)
+        self.set_whole,disp_whole = self.scalar.transform(set_whole)
+        self.num_identity,self.num_shared_feature = self.node_pos.shape[0],self.node_pos.shape[1]
+        self.whole_segs = (set_whole[0].shape[-1]-self.period_enc)/2+1
+        self.whole_size = int(self.whole_segs/self.batch_size)
+        
+        # Display the data structure of Training/Testing/Validation Dataset
+        print 'Available Segments[batches] %d[%d] Shape of data/mask piece %s and min-mean-max is %.2f-%.2f-%.2f' % (
+            self.whole_segs,self.whole_size,set_whole[0].shape, disp_whole[0], disp_whole[1], disp_whole[2])
+        print 'Measurement maximum(average,std) are %4.4f(%4.4f,%4.4f)' % (self.maximum,self.scalar.mean,self.scalar.std)
+        
+        # Data Generator
+        self.gen_whole = Data_Generator(self.set_whole, set_whole[0], self.whole_segs, self.batch_size, self.period_enc, is_training=True)
+        
+    def train_test_valid_assignment(self):#, is_training = True, reuse = False
+        
+        # the original mask use 1 to indicate the missing point, and the inverse has been done during the data_division function
+        value_sets = (
+            tf.expand_dims(self.input_enc, -1),         # the input value of encoder (current data with randomly removed)
+            tf.expand_dims(self.input_ori, -1),         # the input value of decoder (future/current data with randomly removed)
+            tf.expand_dims(self.truth_pred,-1),         # the groundthuth of the future/current prediction
+            tf.expand_dims(self.truth_mask,-1),         # the label of NATURAL missing data 0 -- missing, 1 -- available
+            tf.expand_dims(self.move_mask, -1),         # the label of remove missing data 0 -- missing, 1 -- available
+            self.shared_info,                     # the shared information of the nodes (position), normalized value [0,1]
+            self.scalar                         # (Class Function) Rescale the the input and the output
+        )
+        with tf.name_scope('Train'):
+            with tf.variable_scope('MultiDim_Analyzer', reuse=False):
+                self.MD_Analyzer_train = MultiDim_Analyzer(value_sets, self.learning_rate, self.sess, self.model_config, is_training=True)
+                
+    def train(self):
+        self.sess.run(tf.global_variables_initializer())
+        print ('Training Started')
+        min_impute_metric = float('inf')
+        epoch_cnt,wait = 0,0
+        
+        start_time = time.time()
+        while epoch_cnt <= self.epochs:
+            
+            # Training Preparation: Learning rate pre=setting, Model Interface summary.
+            
+            cur_lr = self.calculate_scheduled_lr(epoch_cnt)
+            whole_fetches = {'global_step': tf.train.get_or_create_global_step(),
+                             'train_op': self.MD_Analyzer_train.train_op, 
+                             'preds': self.MD_Analyzer_train.orig_impute,
+                             'metric': self.MD_Analyzer_train.orig_metric,
+                             'loss': self.MD_Analyzer_train.loss}
+            Results = {"loss":[],"imputed":[],"metric":[],"ground":[],"mask_compare":[]}
+            # Framework and Visualization SetUp for Training 
+            for trained_batch in range(0,self.whole_size):
+                (curdata,curmask,curmove,curdata_orig) = self.gen_whole.next()
+                feed_dict_whole = {self.input_enc: curdata*curmove, 
+                                   self.input_ori: curdata_orig, 
+                                   self.truth_pred: curdata, 
+                                   self.truth_mask: curmask,
+                                   self.move_mask: curmove,
+                                   self.shared_info: self.node_pos}
+                whole_output = self.sess.run(whole_fetches,feed_dict=feed_dict_whole)
+                message = "Epoch [%3d/%3d] [%d/%d] lr: %.4f, loss: %.8f" % (
+                    epoch_cnt, self.epochs, trained_batch, self.whole_size, cur_lr, whole_output["loss"])
+                if trained_batch % 50 == 0:
+                    print message
+                
+                
+                Results["metric"].append(whole_output['metric'])
+                Results["loss"].append(whole_output['loss'])
+                Results["imputed"].append(whole_output['preds'])
+                Results["ground"].append(curdata_orig)
+                Results["mask_compare"].append(curmask-curmove)
+                global_step = whole_output['global_step']
+            
+            loss,metric_seg = np.mean(Results["loss"]),np.mean(Results["metric"],axis=0)
+            if metric_seg[0] <= min_impute_metric:
+                min_impute_metric = metric_seg[0]
+            metrics = calculate_metrics_np(Results["imputed"],Results["ground"],Results["mask_compare"])
+            
+            # Information Logging for Model Training and Validation (Maybe for Curve Plotting)
+            summary_format = ['loss/train_loss','metric/mse_segmin','metric/rmse','metric/mae','metric/mape','metric/mre']
+            summary_data = [loss,min_impute_metric,metrics[1],metrics[2],metrics[3],metrics[4]]
+            self.summary_logging(global_step, summary_format, summary_data)
+            # Message Summary of each epoch (For info.log logging)
+            message = 'Epoch [%3d/%3d] loss: %.4f(%.4f), Orig MSE/RMSE/MAE/MAPE/MRE %s' % (
+                epoch_cnt, self.epochs, np.mean(Results["loss"]),min_impute_metric,metrics)
+            self.logger.info(message)
+            epoch_cnt += 1
+        print '%ds'%(time.time()-start_time)
+            
+    def calculate_scheduled_lr(self, epoch, min_lr=1e-6):
+        decay_factor = int(math.ceil((epoch - self.lr_decay_epoch) / float(self.lr_decay_interval)))
+        new_lr = self.lr_init * self.lr_decay ** max(0, decay_factor)
+        new_lr = max(min_lr, new_lr)
+        
+        self.logger.info('Current learning rate to: %.6f' % new_lr)
+        sys.stdout.flush()
+        
+        self.sess.run(self.lr_update, feed_dict={self.lr_new: new_lr})
+        self.MD_Analyzer_train.set_lr(self.learning_rate)
+        return new_lr
\ No newline at end of file
diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc
new file mode 100644
index 0000000..d1e6d7c
Binary files /dev/null and b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Handler.pyc differ
diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py
new file mode 100644
index 0000000..8a07c6c
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.py
@@ -0,0 +1,362 @@
+import tensorflow as tf
+import numpy as np
+import math
+import os
+import json
+
+from Lib.Utility import *
+from Model.Base_TFModel import Basement_TFModel
+
+class MultiDim_Analyzer(Basement_TFModel):
+    
+    def __init__(self, value_sets, init_learning_rate, sess, config, is_training=True, *args, **kwargs):
+        
+        super(MultiDim_Analyzer, self).__init__(sess=sess, config=config, learning_rate=init_learning_rate,is_training=is_training)
+        '''
+        Model input Explanation & Reminder:
+        enc_input: the masked input of encoder, the dimension of time may varies with dec_input by [data generation and assignment in handler]
+        dec_input: the masked input of encoder, the dimension of time may varies with enc_input by [data generation and assignment in handler]
+        truth_pred: used in loss calculation: have identical dimension with decoder input, same(shifted) value for completion(prediction)
+        truth_mask: used in the calculation of metric like MSE and relabel the model output for comparison (Mask of Natural Loss)
+        shared_info: auxiliary information for encoder/decoder input encoding for identity dimension. BTW, encoding in time dimension is similar to NLP
+        scalar: the maximum value of each measurement, used for metric calculation
+        
+        For data in the value_sets (Data Unzip):
+        Size for model input/output: (batch_size, num_identity, num_measurement, period_enc/dec, 1)
+        Size for auxiliary shared_info: (batch_size, num_shared_feature)
+        '''
+        (enc_input, ori_input, truth_pred, truth_mask, move_mask, shared_info, scalar) = value_sets
+        self.num_identity = enc_input.get_shape().as_list()[1]
+
+        # Initialization of the model hyperparameter, enc-dec structure, evaluation metric & Optimizier
+        self.initial_parameter()
+        self.model_input,self.model_output = self.encdec_handler(enc_input, shared_info, truth_pred, truth_mask, move_mask)
+        self.metric_opt(self.model_output, ori_input, truth_pred, truth_mask, move_mask, scalar)
+
+    def encdec_handler(self, enc_input, shared_info, truth_pred, truth_mask, move_mask):
+        # for dimention introduction of the input: refer to class initialization
+        
+        # Options for Model Structure
+        # Independent -------------- 3 Enc-Dec model are used to learn the relationship for each dimension repectively
+        # Sequence ----------------- Following an arbitrary order to do the multiplication on the value vector
+        # Element-wise-addition ---- Multiplication on the same value vector and then do the element-wise addition (average)
+        # Concatenation ------------ Multiplication on the same value vector and then do the concatenation and matmul (generalized ew-addition)
+        # Dimension-reduce --------- Expand the input 3D value to a 1D vector and then calculate the huge AM. (limitation of memory)
+        
+        # The encode is available for Identity&Measurement and Time dimension
+        (shared_encoder,shared_decoder,time_encoder,time_decoder) = self.auxiliary_encode(shared_info) 
+        if self.flag_casuality == True:
+            mask_casuality = self.casual_mask()
+        else:
+            mask_casuality = None
+        if self.flag_imputation == True:
+            self.mask_imputation = self.impute_mask()
+        else:
+            self.mask_imputation = None
+        if self.flag_time == True:
+            enc_input = enc_input + time_encoder
+        if self.flag_identity == True:
+            enc_input = enc_input + shared_encoder
+        
+        with tf.variable_scope('layer_init'):
+            enc_init = self.multihead_attention(enc_input, self.attention_unit, self.model_structure)
+            enc_init = self.feed_forward_layer(enc_init, self.conv_unit, self.filter_encdec)
+            enc_init = enc_input + tf.multiply(tf.constant(1.0,dtype=tf.float32)-move_mask,enc_init)
+        topenc = self.encoder(enc_input, self.model_structure)
+        return enc_init,topenc
+        
+    def encoder(self,enclayer_init,model_structure):
+        enclayer_in = enclayer_init
+        with tf.variable_scope('Encoder'):
+            for cnt_enclayer in range(0,self.num_enclayer):
+                with tf.variable_scope('layer_%d'%(cnt_enclayer)):
+                    enclayer_in = self.layer_norm(enclayer_in + self.multihead_attention(
+                        enclayer_in, self.attention_unit, model_structure), 'norm_1')
+                    enclayer_in = self.layer_norm(enclayer_in + self.feed_forward_layer(
+                        enclayer_in, self.conv_unit, self.filter_encdec), 'norm_2')#
+        with tf.variable_scope('Enc_pred1'):
+            enclayer_in = self.multihead_attention(enclayer_in, self.attention_unit, model_structure)
+            enclayer_in = self.feed_forward_layer(enclayer_in, self.conv_unit, self.filter_encdec)
+            return enclayer_in
+
+    def decoder(self, declayer_init, model_structure, encoder_top, mask_casuality=None):
+        declayer_in = declayer_init
+        with tf.variable_scope('Decoder'):
+            with tf.variable_scope('layer_0'):
+                declayer_in = self.layer_norm(declayer_in + self.multihead_attention(
+                    declayer_in, self.attention_unit, model_structure, mask=mask_casuality), 'norm_1')
+                (attention_out,KVtop_share) = self.multihead_attention(
+                    declayer_in, self.attention_unit, model_structure, top_encod=encoder_top, scope='enc-dec-attention')
+                declayer_in = self.layer_norm(declayer_in + attention_out, 'norm_2')
+                declayer_in = self.layer_norm(declayer_in + self.feed_forward_layer(
+                    declayer_in, self.conv_unit, self.filter_encdec), 'norm_3')
+            for cnt_declayer in range(1,self.num_declayer):
+                with tf.variable_scope('layer_%d'%(cnt_declayer)):
+                    declayer_in = self.layer_norm(declayer_in + self.multihead_attention(
+                        declayer_in, self.attention_unit, model_structure, mask=mask_casuality), 'norm_1')
+                    declayer_in = self.layer_norm(declayer_in + self.multihead_attention(
+                        declayer_in, self.attention_unit, model_structure, top_encod=encoder_top, cache=KVtop_share, 
+                        scope='enc-dec-attention'), 'norm_2')
+                    declayer_in = self.layer_norm(declayer_in + self.feed_forward_layer(
+                        declayer_in, self.conv_unit, self.filter_encdec), 'norm_3')
+        with tf.variable_scope('Dec_pred_1'):
+            declayer_in = self.multihead_attention(declayer_in, self.attention_unit, model_structure)
+            declayer_in = self.feed_forward_layer(declayer_in, self.conv_unit, self.filter_encdec)
+        
+            return declayer_in
+
+    def metric_opt(self, model_output, truth_orig, truth_pred, truth_mask, move_mask, scalar):
+        
+        loss_mask = move_mask
+        global_step = tf.train.get_or_create_global_step()
+        avail_output = tf.multiply(model_output, loss_mask)
+        avail_truth = tf.multiply(truth_pred, loss_mask)
+        
+        if self.loss_func == 'MSE':
+            self.loss = loss_mse(avail_output, avail_truth, loss_mask)
+        elif self.loss_func == 'RMSE':
+            self.loss = loss_rmse(avail_output, avail_truth, loss_mask)
+        elif self.loss_func == 'MAE':
+            self.loss = loss_mae(avail_output, avail_truth, loss_mask)
+        else:
+            self.loss = loss_rmse(avail_output, avail_truth, loss_mask)+loss_mae(avail_output, avail_truth, loss_mask)
+            
+        if self.is_training:
+            optimizer = tf.train.AdamOptimizer(self.learning_rate)
+            tvars = tf.trainable_variables()
+            grads = tf.gradients(self.loss, tvars)
+            grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm)
+            self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op')
+        self.info_merge = tf.summary.merge_all()
+        
+        orig_preds = scalar.TFinverse_transform(model_output)
+        orig_truth = tf.multiply(truth_orig,truth_mask-move_mask)
+        self.orig_impute = tf.multiply(truth_orig,move_mask) + tf.multiply(orig_preds, tf.constant(1.0,dtype=tf.float32)-move_mask)
+        self.orig_metric = calculate_metrics(tf.multiply(orig_preds,truth_mask-move_mask), orig_truth, truth_mask-move_mask)
+        
+    def multihead_attention(self, att_input, att_unit, model_structure, top_encod=None, cache=None, mask=None, scope='self-attention'):
+        """
+        att_input: the input to be calculated in this module with size of [batch, #identity, #measurement, time, 1]
+        att_unit:  the hyperparameter for the dimention of Q/K and V
+        top_encod: the output from the top encoder layer [batch_size,#identity, #measurement, time, 1]
+        mask: mask the casuality of the self-attention layer, [batch, time, time] or [batch, #id*#meas*time, #id*#meas*time]
+        
+        3D convolution is applied to realize the unit expansion. For the convenience of application, we have the following index mapping:
+        [batch, in_depth, in_height, in_width, in_channels] = [batch_size, num_identity, num_measurement, length_time, 1]
+        """
+        # Initialization for some necessary item
+        (value_units, Iunits, Tunits) = att_unit
+        KVtop_cache = None
+        # Since the value of num_measurement and period are small and may equal to each other by coincidence
+        # We use the dimention of num_identity
+        V_filters, V_kernal, V_stride = value_units*self.num_heads, (1, self.V_timfuse), (1, self.V_timjump)
+        batch,ids,time = att_input.get_shape().as_list()[:3]
+
+        with tf.variable_scope(scope):
+            if top_encod is None or cache is None:
+                if top_encod is None:
+                    top_encod = att_input
+                else:
+                    if cache is None:
+                        KVtop_cache = {}
+                # Linear projection (unit expansion) for multihead-attention dimension: 
+                Q_iden = tf.layers.dense(tf.reshape(att_input,[batch,ids,-1]), self.num_heads*Iunits, 
+                                         use_bias=False, name='Q_ID')
+                K_iden = tf.layers.dense(tf.reshape(top_encod,[batch,ids,-1]), self.num_heads*Iunits, 
+                                         use_bias=False, name='K_ID')
+                Q_time = tf.layers.dense(tf.reshape(tf.transpose(att_input,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, 
+                                         use_bias=False, name='Q_Time')
+                K_time = tf.layers.dense(tf.reshape(tf.transpose(top_encod,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, 
+                                         use_bias=False, name='K_Time')
+                V = tf.layers.conv2d(inputs=top_encod, filters=V_filters, kernel_size=V_kernal, strides=V_stride, 
+                                     padding="same", data_format="channels_last", name='V')
+                if KVtop_cache is not None:
+                    KVtop_cache = {'share_Kid':K_id, 'share_Ktime':K_time, 'share_V':V}
+            else:
+                Q_iden = tf.layers.dense(tf.reshape(att_input,[batch,ids,-1]), self.num_heads*Iunits, 
+                                         use_bias=False, name='Q_ID')
+                Q_time = tf.layers.dense(tf.reshape(tf.transpose(att_input,[0,2,1,3]),[batch,time,-1]), self.num_heads*Tunits, 
+                                         use_bias=False, name='Q_Time')
+                K_id,K_time,V = cache['share_Kid'],cache['share_Ktime'],cache['share_V']
+
+            # Split the matrix to multiple heads and then concatenate to build a larger batch size: 
+            # [self.batch_size*self.num_heads, self.X, self.X]
+            Qhb_id = tf.concat(tf.split(Q_iden, self.num_heads, axis=2), axis=0)
+            Qhb_time = tf.concat(tf.split(Q_time, self.num_heads, axis=2), axis=0)
+            Khb_id = tf.concat(tf.split(K_iden, self.num_heads, axis=2), axis=0)
+            Khb_time = tf.concat(tf.split(K_time, self.num_heads, axis=2), axis=0)
+            # [self.batch_size*self.num_heads, self.num_identity, self.num_measurement, self.length_time, 'hidden-units']
+            Q_headbatch = (Qhb_id,Qhb_time)
+            K_headbatch = (Khb_id,Khb_time)
+            V_headbatch = tf.concat(tf.split(V, self.num_heads, axis=3), axis=0)
+            
+            if mask is not None:
+                mask_recur = tf.tile(mask, [self.num_heads, 1, 1])
+            else:
+                mask_recur = None
+            
+            out = self.softmax_combination(Q_headbatch, K_headbatch, V_headbatch, model_structure, att_unit, mask_recur)
+
+            # Merge the multi-head back to the original shape 
+            # [batch_size, self.num_identity, self.num_measurement, self.length_time, 'hidden-units'*self.num_heads]
+            out = tf.concat(tf.split(out, self.num_heads, axis=0), axis=3)  # 
+            out = tf.layers.dense(out, 1, name='multihead_fuse')
+            out = tf.layers.dropout(out, rate=self.attdrop_rate, training=self.is_training)
+            
+            if KVtop_cache is None:
+                return out
+            else:
+                return (out,KVtop_cache)
+    
+    def feed_forward_layer(self, info_attention, num_hunits, filter_type='dense'):
+        '''
+        forward_type: 
+        "dense" indicates dense layer, 
+        "graph" indicates graph based FIR filter (graph convolution),
+        "attention" indicates applying the attention algorithm
+        "conv" indicates the shared convolution kernal is applied instead of a big weight matrix
+        self.ffndrop_rate may be considered later 03122019
+        '''
+        channel = info_attention.get_shape().as_list()[-1]
+        if filter_type == 'dense':
+            ffn_dense = tf.layers.dense(info_attention, num_hunits, use_bias=True, activation=tf.nn.relu, name=filter_type+'1')
+            ffn_dense = tf.layers.dense(info_attention, num_hunits, use_bias=True, activation=None, name=filter_type+'2')
+            return tf.layers.dense(ffn_dense, channel, use_bias=True, activation=None, name=filter_type+'3')
+        elif filter_type == 'graph': 
+            raise NotImplementedError
+        elif filter_type == 'attention':
+            raise NotImplementedError
+        elif filter_type == 'conv':
+            raise NotImplementedError
+    
+    def layer_norm(self, norm_input, name_stage):
+        norm_step = tf.contrib.layers.layer_norm(tf.transpose(tf.squeeze(norm_input),perm=[0,2,1]), 
+                                                 begin_norm_axis=2, center=True, scale=True,scope=name_stage)
+        return tf.expand_dims(tf.transpose(norm_step,perm=[0,2,1]),-1)
+    
+    def softmax_combination(self, Q, K, V, model_structure, att_unit, mask=None):
+        '''mask is applied before the softmax layer, no dropout is applied, '''
+        value_units,segs = V.get_shape().as_list()[-1],V.get_shape().as_list()[0]
+        ids,time = Q[0].get_shape().as_list()[1],Q[1].get_shape().as_list()[1]
+        (value_units, Iunits, Tunits) = att_unit
+        
+        (Q_I,Q_T) = Q
+        (K_I,K_T) = K
+
+        # Check the dimension consistency of the combined matrix
+        assert Q_I.get_shape().as_list()[1:] == K_I.get_shape().as_list()[1:]
+        assert Q_T.get_shape().as_list()[1:] == K_T.get_shape().as_list()[1:]
+        assert Q_I.get_shape().as_list()[0] == Q_T.get_shape().as_list()[0]
+        assert K_I.get_shape().as_list()[0] == K_T.get_shape().as_list()[0]
+
+        # Build the Attention Map
+        AM_Identity = tf.matmul(Q_I, tf.transpose(K_I, [0, 2, 1])) / tf.sqrt(tf.cast(Iunits, tf.float32))
+        AM_Time = tf.matmul(Q_T, tf.transpose(K_T, [0, 2, 1])) / tf.sqrt(tf.cast(Tunits, tf.float32))
+        if mask is not None:
+            AM_Time = tf.multiply(AM_Time,mask) + tf.constant(-np.inf)*(tf.constant(1.0)-mask)
+        if self.mask_imputation is not None:
+            (iden_mask, time_mask) = self.mask_imputation
+            #AM_Identity = tf.multiply(AM_Identity,iden_mask) + tf.constant(-1.0e9)*(tf.constant(1.0)-iden_mask)
+            AM_Time = tf.multiply(AM_Time,time_mask) + tf.constant(-1.0e9)*(tf.constant(1.0)-time_mask)
+        AM_Identity = tf.nn.softmax(AM_Identity, 2)
+        AM_Time = tf.nn.softmax(AM_Time, 2)
+
+        shape_id = [segs, ids, time, value_units]
+        shape_time = [segs, time, ids, value_units]
+
+        if model_structure == 'Sequence':
+            Out_Id = tf.reshape(tf.matmul(AM_Identity, tf.reshape(V,[segs, ids, -1])), shape_id)
+            Out_Id = tf.transpose(Out_Id,perm=[0,2,1,3])
+            Out_Id_Time = tf.reshape(tf.matmul(AM_Time, tf.reshape(Out_Id, [segs,time,-1])), shape_time)
+            return tf.transpose(Out_Id_Time,perm=[0,2,1,3])
+        else:
+            V_id,V_time = V,tf.transpose(V,perm=[0,2,1,3])
+            Out_Identity = tf.reshape(tf.matmul(AM_Identity, tf.reshape(V_id,[segs, ids, -1])), shape_id)
+            Out_Time = tf.reshape(tf.matmul(AM_Time, tf.reshape(V_time,[segs, time, -1])), shape_time)
+
+            Out_Time = tf.transpose(Out_Time,perm=[0,2,1,3])
+            if model_structure == 'Element-wise-addition':
+                return tf.divide(tf.add(Out_Identity,Out_Time),tf.constant(2.0))
+            elif model_structure == 'Concatenation':
+                Attention_output = tf.concat([Out_Identity, Out_Time], 3)
+                return tf.layers.dense(Attention_output, value_units, use_bias=False)
+            else:
+                raise UnavailableStructureMode
+                    
+    def casual_mask(self):
+        '''
+        This function is only applied in the self-attention layer of decoder.
+        The lower triangular matrix is used to indicate the available reference of all position in each calculation
+        Key Idea: Only the previous position is applied to predict the future
+        '''
+        batch_size,period = self.batch_size,self.period_dec
+        casual_unit = np.tril(np.ones((period, period)))
+        casual_tensor = tf.convert_to_tensor(casual_unit, dtype=tf.float32)
+        return tf.tile(tf.expand_dims(casual_tensor, 0), [batch_size, 1, 1])
+    
+    def impute_mask(self):
+        batch_size = self.batch_size
+        iden_unit = 1.0-np.identity(self.num_identity)
+        time_unit = 1.0-np.identity(self.period_dec)
+        iden_tensor = tf.tile(tf.expand_dims(tf.convert_to_tensor(iden_unit, dtype=tf.float32), 0), [self.num_heads*batch_size, 1, 1])
+        time_tensor = tf.tile(tf.expand_dims(tf.convert_to_tensor(time_unit, dtype=tf.float32), 0), [self.num_heads*batch_size, 1, 1])
+        return (iden_tensor,time_tensor)
+
+    def initial_parameter(self):
+
+        config = self.config
+        # Parameter Initialization of Data Assignment
+        self.batch_size = int(config.get('batch_size',1))
+        self.period_enc = int(config.get('period_enc',12))
+        self.period_dec = int(config.get('period_dec',12))
+        
+        # Parameter Initialization of Model Framework
+        self.num_heads = int(config.get('num_heads',8))
+        self.num_enclayer = int(config.get('num_enclayer',5))
+        self.num_declayer = int(config.get('num_declayer',5))
+        self.model_structure = self.config.get('model_structure')
+        
+        # Parameter Initialization of Attention (Q K V)
+        self.AM_timjump = int(config.get('time_stride_AM',1))
+        self.V_timjump = int(config.get('time_stride_V',1))
+        self.AM_timfuse = int(config.get('time_fuse_AM',1))
+        self.V_timfuse = int(config.get('time_fuse_V',1))
+        vunits,Iunits,Tunits = int(config.get('units_value',6)),int(config.get('units_IDw',14)),int(config.get('units_Timew',6))
+        self.attention_unit = (vunits, Iunits, Tunits)
+        
+        # Parameter Initialization of Filter (Enc-Dec, Prediction)
+        self.filter_encdec = config.get('filter_encdec','dense')
+        self.conv_unit = int(config.get('units_conv',4))
+        self.attdrop_rate = float(config.get('drop_rate_attention',0.0))
+        self.ffndrop_rate = float(config.get('drop_rate_forward',0.1))
+        self.filter_pred = config.get('filter_pred','dense')
+        self.pred_unit = int(config.get('units_pred',8))
+        
+        # label of mask
+        self.flag_identity = config.get('flag_identity',False)
+        self.flag_time = config.get('flag_time',False)
+        self.flag_casuality = config.get('flag_casuality',False)
+        self.flag_imputation = config.get('flag_imputation',False)
+
+    def auxiliary_encode(self,shared_info):
+        # The concatenation is not applicable in this part since all the attention of all three dimension need to be learned.
+        # Expanding each dimention will not make sense for our model.
+        # Concatenation with the feature dimention (expanded as 1) is equivalent with the element-wise addition.
+        with tf.variable_scope('shared_feature'):
+            shared_encoder = tf.layers.dense(tf.expand_dims(shared_info,0), self.period_enc, 
+                                             use_bias=False, activation=None, name='encoder')
+            shared_encoder = tf.reshape(shared_encoder, [1, self.num_identity, self.period_enc, 1])
+            shared_encoder = tf.tile(shared_encoder, [self.batch_size, 1, 1, 1])
+            shared_decoder = tf.layers.dense(tf.expand_dims(shared_info,0), self.period_dec, 
+                                             use_bias=False, activation=None, name='decoder')
+            shared_decoder = tf.reshape(shared_decoder, [1, self.num_identity, self.period_dec, 1])
+            shared_decoder = tf.tile(shared_decoder, [self.batch_size, 1, 1, 1])
+            
+            denom = tf.constant(1000.0)
+            phase_enc = tf.linspace(0.0,self.period_enc-1.0,self.period_enc)*tf.constant(math.pi/180.0)/denom
+            phase_dec = tf.linspace(0.0,self.period_dec-1.0,self.period_dec)*tf.constant(math.pi/180.0)/denom
+            sin_enc,sin_dec = tf.expand_dims(tf.sin(phase_enc),0),tf.expand_dims(tf.sin(phase_dec),0)
+            
+            time_encoder = tf.expand_dims(tf.tile(tf.expand_dims(sin_enc,0),[self.batch_size,self.num_identity,1]),-1)
+            time_decoder = tf.expand_dims(tf.tile(tf.expand_dims(sin_dec,0),[self.batch_size,self.num_identity,1]),-1)
+            return (shared_encoder,shared_decoder,time_encoder,time_decoder)
\ No newline at end of file
diff --git a/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc
new file mode 100644
index 0000000..277bcaa
Binary files /dev/null and b/models/cross-dimensional-attention/tensorflow/MultiDim_Analyzer_Model.pyc differ
diff --git a/models/cross-dimensional-attention/tensorflow/__init__.py b/models/cross-dimensional-attention/tensorflow/__init__.py
new file mode 100644
index 0000000..9615bf0
--- /dev/null
+++ b/models/cross-dimensional-attention/tensorflow/__init__.py
@@ -0,0 +1,7 @@
+# uncompyle6 version 3.7.1
+# Python bytecode 2.7 (62211)
+# Decompiled from: Python 3.6.9 (default, Apr 18 2020, 01:56:04) 
+# [GCC 8.4.0]
+# Embedded file name: /home/jiawei/Tensor_MultiDim_NYC/Model/__init__.py
+# Compiled at: 2019-04-29 08:21:25
+pass
\ No newline at end of file
diff --git a/models/cross-dimensional-attention/tensorflow/__init__.pyc b/models/cross-dimensional-attention/tensorflow/__init__.pyc
new file mode 100644
index 0000000..9d4580d
Binary files /dev/null and b/models/cross-dimensional-attention/tensorflow/__init__.pyc differ