From a5f88b5a4a2b60d2f382a7c6f24577fcadb46848 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 09:26:58 -0800
Subject: [PATCH 01/15] moved latent space into a module

---
 src/lasdi/{latent_space.py => latent_space/__init__.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/lasdi/{latent_space.py => latent_space/__init__.py} (100%)

diff --git a/src/lasdi/latent_space.py b/src/lasdi/latent_space/__init__.py
similarity index 100%
rename from src/lasdi/latent_space.py
rename to src/lasdi/latent_space/__init__.py

From 1b75a9e5b8e20952a7a6bb3058451c9a9b0df5fe Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 09:31:00 -0800
Subject: [PATCH 02/15] move NN modules into networks.py

---
 src/lasdi/latent_space/__init__.py | 108 +---------------------------
 src/lasdi/networks.py              | 109 +++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 107 deletions(-)
 create mode 100644 src/lasdi/networks.py

diff --git a/src/lasdi/latent_space/__init__.py b/src/lasdi/latent_space/__init__.py
index fdf8fef..09b128a 100644
--- a/src/lasdi/latent_space/__init__.py
+++ b/src/lasdi/latent_space/__init__.py
@@ -1,31 +1,6 @@
 import torch
 import numpy as np
-
-# activation dict
-act_dict = {'ELU': torch.nn.ELU,
-            'hardshrink': torch.nn.Hardshrink,
-            'hardsigmoid': torch.nn.Hardsigmoid,
-            'hardtanh': torch.nn.Hardtanh,
-            'hardswish': torch.nn.Hardswish,
-            'leakyReLU': torch.nn.LeakyReLU,
-            'logsigmoid': torch.nn.LogSigmoid,
-            'multihead': torch.nn.MultiheadAttention,
-            'PReLU': torch.nn.PReLU,
-            'ReLU': torch.nn.ReLU,
-            'ReLU6': torch.nn.ReLU6,
-            'RReLU': torch.nn.RReLU,
-            'SELU': torch.nn.SELU,
-            'CELU': torch.nn.CELU,
-            'GELU': torch.nn.GELU,
-            'sigmoid': torch.nn.Sigmoid,
-            'SiLU': torch.nn.SiLU,
-            'mish': torch.nn.Mish,
-            'softplus': torch.nn.Softplus,
-            'softshrink': torch.nn.Softshrink,
-            'tanh': torch.nn.Tanh,
-            'tanhshrink': torch.nn.Tanhshrink,
-            'threshold': torch.nn.Threshold,
-            }
+from ..networks import MultiLayerPerceptron
 
 def initial_condition_latent(param_grid, physics, autoencoder):
 
@@ -50,87 +25,6 @@ def initial_condition_latent(param_grid, physics, autoencoder):
 
     return Z0
 
-class MultiLayerPerceptron(torch.nn.Module):
-
-    def __init__(self, layer_sizes,
-                 act_type='sigmoid', reshape_index=None, reshape_shape=None,
-                 threshold=0.1, value=0.0, num_heads=1):
-        super(MultiLayerPerceptron, self).__init__()
-
-        # including input, hidden, output layers
-        self.n_layers = len(layer_sizes)
-        self.layer_sizes = layer_sizes
-
-        # Linear features between layers
-        self.fcs = []
-        for k in range(self.n_layers-1):
-            self.fcs += [torch.nn.Linear(layer_sizes[k], layer_sizes[k + 1])]
-        self.fcs = torch.nn.ModuleList(self.fcs)
-        self.init_weight()
-
-        # Reshape input or output layer
-        assert((reshape_index is None) or (reshape_index in [0, -1]))
-        assert((reshape_shape is None) or (np.prod(reshape_shape) == layer_sizes[reshape_index]))
-        self.reshape_index = reshape_index
-        self.reshape_shape = reshape_shape
-
-        # Initalize activation function
-        self.act_type = act_type
-        self.use_multihead = False
-        if act_type == "threshold":
-            self.act = act_dict[act_type](threshold, value)
-
-        elif act_type == "multihead":
-            self.use_multihead = True
-            if (self.n_layers > 3): # if you have more than one hidden layer
-                self.act = []
-                for i in range(self.n_layers-2):
-                    self.act += [act_dict[act_type](layer_sizes[i+1], num_heads)]
-            else:
-                self.act = [torch.nn.Identity()]  # No additional activation
-            self.act = torch.nn.ModuleList(self.fcs)
-
-        #all other activation functions initialized here
-        else:
-            self.act = act_dict[act_type]()
-        return
-    
-    def forward(self, x):
-        if (self.reshape_index == 0):
-            # make sure the input has a proper shape
-            assert(list(x.shape[-len(self.reshape_shape):]) == self.reshape_shape)
-            # we use torch.Tensor.view instead of torch.Tensor.reshape,
-            # in order to avoid data copying.
-            x = x.view(list(x.shape[:-len(self.reshape_shape)]) + [self.layer_sizes[self.reshape_index]])
-
-        for i in range(self.n_layers-2):
-            x = self.fcs[i](x) # apply linear layer
-            if (self.use_multihead):
-                x = self.apply_attention(self, x, i)
-            else:
-                x = self.act(x)
-
-        x = self.fcs[-1](x)
-
-        if (self.reshape_index == -1):
-            # we use torch.Tensor.view instead of torch.Tensor.reshape,
-            # in order to avoid data copying.
-            x = x.view(list(x.shape[:-1]) + self.reshape_shape)
-
-        return x
-    
-    def apply_attention(self, x, act_idx):
-        x = x.unsqueeze(1)  # Add sequence dimension for attention
-        x, _ = self.act[act_idx](x, x, x) # apply attention
-        x = x.squeeze(1)  # Remove sequence dimension
-        return x
-    
-    def init_weight(self):
-        # TODO(kevin): support other initializations?
-        for fc in self.fcs:
-            torch.nn.init.xavier_uniform_(fc.weight)
-        return
-
 class Autoencoder(torch.nn.Module):
 
     def __init__(self, physics, config):
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
new file mode 100644
index 0000000..ae12030
--- /dev/null
+++ b/src/lasdi/networks.py
@@ -0,0 +1,109 @@
+import torch
+import numpy as np
+
+# activation dict
+act_dict = {'ELU': torch.nn.ELU,
+            'hardshrink': torch.nn.Hardshrink,
+            'hardsigmoid': torch.nn.Hardsigmoid,
+            'hardtanh': torch.nn.Hardtanh,
+            'hardswish': torch.nn.Hardswish,
+            'leakyReLU': torch.nn.LeakyReLU,
+            'logsigmoid': torch.nn.LogSigmoid,
+            'multihead': torch.nn.MultiheadAttention,
+            'PReLU': torch.nn.PReLU,
+            'ReLU': torch.nn.ReLU,
+            'ReLU6': torch.nn.ReLU6,
+            'RReLU': torch.nn.RReLU,
+            'SELU': torch.nn.SELU,
+            'CELU': torch.nn.CELU,
+            'GELU': torch.nn.GELU,
+            'sigmoid': torch.nn.Sigmoid,
+            'SiLU': torch.nn.SiLU,
+            'mish': torch.nn.Mish,
+            'softplus': torch.nn.Softplus,
+            'softshrink': torch.nn.Softshrink,
+            'tanh': torch.nn.Tanh,
+            'tanhshrink': torch.nn.Tanhshrink,
+            'threshold': torch.nn.Threshold,
+            }
+
+class MultiLayerPerceptron(torch.nn.Module):
+
+    def __init__(self, layer_sizes,
+                 act_type='sigmoid', reshape_index=None, reshape_shape=None,
+                 threshold=0.1, value=0.0, num_heads=1):
+        super(MultiLayerPerceptron, self).__init__()
+
+        # including input, hidden, output layers
+        self.n_layers = len(layer_sizes)
+        self.layer_sizes = layer_sizes
+
+        # Linear features between layers
+        self.fcs = []
+        for k in range(self.n_layers-1):
+            self.fcs += [torch.nn.Linear(layer_sizes[k], layer_sizes[k + 1])]
+        self.fcs = torch.nn.ModuleList(self.fcs)
+        self.init_weight()
+
+        # Reshape input or output layer
+        assert((reshape_index is None) or (reshape_index in [0, -1]))
+        assert((reshape_shape is None) or (np.prod(reshape_shape) == layer_sizes[reshape_index]))
+        self.reshape_index = reshape_index
+        self.reshape_shape = reshape_shape
+
+        # Initalize activation function
+        self.act_type = act_type
+        self.use_multihead = False
+        if act_type == "threshold":
+            self.act = act_dict[act_type](threshold, value)
+
+        elif act_type == "multihead":
+            self.use_multihead = True
+            if (self.n_layers > 3): # if you have more than one hidden layer
+                self.act = []
+                for i in range(self.n_layers-2):
+                    self.act += [act_dict[act_type](layer_sizes[i+1], num_heads)]
+            else:
+                self.act = [torch.nn.Identity()]  # No additional activation
+            self.act = torch.nn.ModuleList(self.fcs)
+
+        #all other activation functions initialized here
+        else:
+            self.act = act_dict[act_type]()
+        return
+    
+    def forward(self, x):
+        if (self.reshape_index == 0):
+            # make sure the input has a proper shape
+            assert(list(x.shape[-len(self.reshape_shape):]) == self.reshape_shape)
+            # we use torch.Tensor.view instead of torch.Tensor.reshape,
+            # in order to avoid data copying.
+            x = x.view(list(x.shape[:-len(self.reshape_shape)]) + [self.layer_sizes[self.reshape_index]])
+
+        for i in range(self.n_layers-2):
+            x = self.fcs[i](x) # apply linear layer
+            if (self.use_multihead):
+                x = self.apply_attention(self, x, i)
+            else:
+                x = self.act(x)
+
+        x = self.fcs[-1](x)
+
+        if (self.reshape_index == -1):
+            # we use torch.Tensor.view instead of torch.Tensor.reshape,
+            # in order to avoid data copying.
+            x = x.view(list(x.shape[:-1]) + self.reshape_shape)
+
+        return x
+    
+    def apply_attention(self, x, act_idx):
+        x = x.unsqueeze(1)  # Add sequence dimension for attention
+        x, _ = self.act[act_idx](x, x, x) # apply attention
+        x = x.squeeze(1)  # Remove sequence dimension
+        return x
+    
+    def init_weight(self):
+        # TODO(kevin): support other initializations?
+        for fc in self.fcs:
+            torch.nn.init.xavier_uniform_(fc.weight)
+        return
\ No newline at end of file

From 78a25d09bd0c3bb8fe2a89370133c74846cfee9a Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 09:38:22 -0800
Subject: [PATCH 03/15] remove multihead attention

---
 src/lasdi/latent_space/__init__.py |  5 ++---
 src/lasdi/networks.py              | 23 +++--------------------
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/src/lasdi/latent_space/__init__.py b/src/lasdi/latent_space/__init__.py
index 09b128a..e798005 100644
--- a/src/lasdi/latent_space/__init__.py
+++ b/src/lasdi/latent_space/__init__.py
@@ -41,15 +41,14 @@ def __init__(self, physics, config):
         act_type = config['activation'] if 'activation' in config else 'sigmoid'
         threshold = config["threshold"] if "threshold" in config else 0.1
         value = config["value"] if "value" in config else 0.0
-        num_heads = config['num_heads'] if 'num_heads' in config else 1
 
         self.encoder = MultiLayerPerceptron(layer_sizes, act_type,
                                             reshape_index=0, reshape_shape=self.qgrid_size,
-                                            threshold=threshold, value=value, num_heads=num_heads)
+                                            threshold=threshold, value=value)
         
         self.decoder = MultiLayerPerceptron(layer_sizes[::-1], act_type,
                                             reshape_index=-1, reshape_shape=self.qgrid_size,
-                                            threshold=threshold, value=value, num_heads=num_heads)
+                                            threshold=threshold, value=value)
 
         return
 
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index ae12030..3e63076 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -31,7 +31,7 @@ class MultiLayerPerceptron(torch.nn.Module):
 
     def __init__(self, layer_sizes,
                  act_type='sigmoid', reshape_index=None, reshape_shape=None,
-                 threshold=0.1, value=0.0, num_heads=1):
+                 threshold=0.1, value=0.0):
         super(MultiLayerPerceptron, self).__init__()
 
         # including input, hidden, output layers
@@ -53,19 +53,11 @@ def __init__(self, layer_sizes,
 
         # Initalize activation function
         self.act_type = act_type
-        self.use_multihead = False
         if act_type == "threshold":
             self.act = act_dict[act_type](threshold, value)
 
         elif act_type == "multihead":
-            self.use_multihead = True
-            if (self.n_layers > 3): # if you have more than one hidden layer
-                self.act = []
-                for i in range(self.n_layers-2):
-                    self.act += [act_dict[act_type](layer_sizes[i+1], num_heads)]
-            else:
-                self.act = [torch.nn.Identity()]  # No additional activation
-            self.act = torch.nn.ModuleList(self.fcs)
+            raise RuntimeError("MultiLayerPerceptron: MultiheadAttention requires a different architecture!")
 
         #all other activation functions initialized here
         else:
@@ -82,10 +74,7 @@ def forward(self, x):
 
         for i in range(self.n_layers-2):
             x = self.fcs[i](x) # apply linear layer
-            if (self.use_multihead):
-                x = self.apply_attention(self, x, i)
-            else:
-                x = self.act(x)
+            x = self.act(x)
 
         x = self.fcs[-1](x)
 
@@ -96,12 +85,6 @@ def forward(self, x):
 
         return x
     
-    def apply_attention(self, x, act_idx):
-        x = x.unsqueeze(1)  # Add sequence dimension for attention
-        x, _ = self.act[act_idx](x, x, x) # apply attention
-        x = x.squeeze(1)  # Remove sequence dimension
-        return x
-    
     def init_weight(self):
         # TODO(kevin): support other initializations?
         for fc in self.fcs:

From 76b1ab3a2609e9759cb33389d6d98f6ea5df2155 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 09:50:58 -0800
Subject: [PATCH 04/15] export and load loss histories

---
 examples/burgers1d.ipynb | 14 +++-----------
 src/lasdi/gplasdi.py     | 10 ++++++++++
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/burgers1d.ipynb b/examples/burgers1d.ipynb
index 1cff602..237bc31 100644
--- a/examples/burgers1d.ipynb
+++ b/examples/burgers1d.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "d67dad03-9d76-4891-82ff-7e19d1369a24",
    "metadata": {},
    "outputs": [],
@@ -78,7 +78,7 @@
    "outputs": [],
    "source": [
     "# Specify the restart file you have.\n",
-    "filename = 'lasdi_10_01_2024_17_09.npy'\n",
+    "filename = 'restarts/burgers1d.restart.npy'\n",
     "\n",
     "import yaml\n",
     "from lasdi.workflow import initialize_trainer\n",
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "dcdac0c2",
    "metadata": {},
    "outputs": [],
@@ -302,14 +302,6 @@
     "\n",
     "plot_prediction(param, autoencoder, physics, sindy, gp_dictionnary, n_samples, true, scale)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "11c629e4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/src/lasdi/gplasdi.py b/src/lasdi/gplasdi.py
index 3e156d4..131990c 100644
--- a/src/lasdi/gplasdi.py
+++ b/src/lasdi/gplasdi.py
@@ -145,6 +145,11 @@ def __init__(self, physics, autoencoder, latent_dynamics, param_space, config):
         self.X_train = torch.Tensor([])
         self.X_test = torch.Tensor([])
 
+        self.training_loss = []
+        self.ae_loss = []
+        self.ld_loss = []
+        self.coef_loss = []
+
         return
 
     def train(self):
@@ -291,4 +296,9 @@ def load(self, dict_):
         self.optimizer.load_state_dict(dict_['optimizer'])
         if (self.device != 'cpu'):
             optimizer_to(self.optimizer, self.device)
+
+        self.training_loss = dict_['training_loss']
+        self.ae_loss = ['ae_loss']
+        self.ld_loss = ['ld_loss']
+        self.coef_loss = ['coeff_loss']
         return

From 91b0bcf4653181f6e2f51e24afe9daf9fbaad511 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 10:44:38 -0800
Subject: [PATCH 05/15] base LatentSpace class

---
 examples/burgers1d.ipynb           | 30 +++++++++++++++++++++
 src/lasdi/gplasdi.py               | 11 +++-----
 src/lasdi/latent_space/__init__.py | 42 +++++++++++++++++++++++++-----
 3 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/examples/burgers1d.ipynb b/examples/burgers1d.ipynb
index 237bc31..f65534e 100644
--- a/examples/burgers1d.ipynb
+++ b/examples/burgers1d.ipynb
@@ -179,6 +179,28 @@
     "n_coef = restart_file['latent_dynamics']['ncoefs']"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "03a96b35",
+   "metadata": {},
+   "source": [
+    "# Loss history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0163864",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(1)\n",
+    "plt.loglog(trainer.training_loss)\n",
+    "plt.loglog(trainer.ae_loss)\n",
+    "plt.loglog(trainer.ld_loss)\n",
+    "plt.loglog(trainer.coef_loss)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "1262a0c3",
@@ -302,6 +324,14 @@
     "\n",
     "plot_prediction(param, autoencoder, physics, sindy, gp_dictionnary, n_samples, true, scale)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba098b6d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/src/lasdi/gplasdi.py b/src/lasdi/gplasdi.py
index 131990c..8bbd903 100644
--- a/src/lasdi/gplasdi.py
+++ b/src/lasdi/gplasdi.py
@@ -168,11 +168,6 @@ def train(self):
         n_train = ps.n_train()
         ld = self.latent_dynamics
 
-        self.training_loss = []
-        self.ae_loss = []
-        self.ld_loss = []
-        self.coef_loss = []
-
         '''
             determine number of iterations.
             Perform n_iter iterations until overall iterations hit max_iter.
@@ -298,7 +293,7 @@ def load(self, dict_):
             optimizer_to(self.optimizer, self.device)
 
         self.training_loss = dict_['training_loss']
-        self.ae_loss = ['ae_loss']
-        self.ld_loss = ['ld_loss']
-        self.coef_loss = ['coeff_loss']
+        self.ae_loss = dict_['ae_loss']
+        self.ld_loss = dict_['ld_loss']
+        self.coef_loss = dict_['coeff_loss']
         return
diff --git a/src/lasdi/latent_space/__init__.py b/src/lasdi/latent_space/__init__.py
index e798005..7d6774e 100644
--- a/src/lasdi/latent_space/__init__.py
+++ b/src/lasdi/latent_space/__init__.py
@@ -25,18 +25,46 @@ def initial_condition_latent(param_grid, physics, autoencoder):
 
     return Z0
 
-class Autoencoder(torch.nn.Module):
+class LatentSpace(torch.nn.Module):
 
     def __init__(self, physics, config):
-        super(Autoencoder, self).__init__()
+        super(LatentSpace, self).__init__()
 
         self.qgrid_size = physics.qgrid_size
+        self.n_z = config['latent_dimension']
+
+        return
+
+    def forward(self, x):
+        raise RuntimeError("LatentSpace.forward: abstract method!")
+
+    def export(self):
+        dict_ = {'qgrid_size': self.qgrid_size,
+                 'n_z': self.n_z}
+        return dict_
+
+    def load(self, dict_):
+        """
+        Notes
+        -----
+        This abstract class only checks if the variables in restart file are the same as the instance attributes.
+        """
+
+        assert(dict_['qgrid_size'] == self.qgrid_size)
+        assert(dict_['n_z'] == self.n_z)
+        return
+
+
+class Autoencoder(LatentSpace):
+
+    def __init__(self, physics, config):
+        super().__init__(physics, config)
+        # super(Autoencoder, self).__init__()
+
         self.space_dim = np.prod(self.qgrid_size)
         hidden_units = config['hidden_units']
-        n_z = config['latent_dimension']
-        self.n_z = n_z
 
-        layer_sizes = [self.space_dim] + hidden_units + [n_z]
+        layer_sizes = [self.space_dim] + hidden_units + [self.n_z]
         #grab relevant initialization values from config
         act_type = config['activation'] if 'activation' in config else 'sigmoid'
         threshold = config["threshold"] if "threshold" in config else 0.1
@@ -60,9 +88,11 @@ def forward(self, x):
         return x
     
     def export(self):
-        dict_ = {'autoencoder_param': self.cpu().state_dict()}
+        dict_ = super().export()
+        dict_['autoencoder_param'] = self.cpu().state_dict()
         return dict_
     
     def load(self, dict_):
+        super().load(dict_)
         self.load_state_dict(dict_['autoencoder_param'])
         return
\ No newline at end of file

From c69fd7f1a33eef74b5e7bb365ed1dfdc82684a66 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 14:43:50 -0800
Subject: [PATCH 06/15] convolutional neural networks module

---
 src/lasdi/latent_space/__init__.py |   1 -
 src/lasdi/networks.py              | 186 +++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/src/lasdi/latent_space/__init__.py b/src/lasdi/latent_space/__init__.py
index 7d6774e..36365c1 100644
--- a/src/lasdi/latent_space/__init__.py
+++ b/src/lasdi/latent_space/__init__.py
@@ -59,7 +59,6 @@ class Autoencoder(LatentSpace):
 
     def __init__(self, physics, config):
         super().__init__(physics, config)
-        # super(Autoencoder, self).__init__()
 
         self.space_dim = np.prod(self.qgrid_size)
         hidden_units = config['hidden_units']
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index 3e63076..2eae26d 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -85,6 +85,192 @@ def forward(self, x):
 
         return x
     
+    def init_weight(self):
+        # TODO(kevin): support other initializations?
+        for fc in self.fcs:
+            torch.nn.init.xavier_uniform_(fc.weight)
+        return
+
+class CNN2D(torch.nn.Module):
+    from enum import Enum
+    class Mode(Enum):
+        Forward = 1
+        Backward = -1
+
+    def __init__(self, mode, channels, kernel_sizes,
+                 strides, paddings, dilations,
+                 groups=1, bias=True, padding_mode='zeros',
+                 act_type='ReLU'):
+        super(CNN2D, self).__init__()
+
+        if (mode == 'forward'):
+            self.mode = self.Mode.Forward
+            module = torch.nn.Conv2d
+        elif (mode == 'backward'):
+            self.mode = self.Mode.Backward
+            module = torch.nn.ConvTranspose2d
+        else:
+            raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
+        
+        self.channels = channels
+        self.n_layers = len(channels)
+        self.layer_sizes = np.zeros([self.n_layers, 3], dtype=int)
+        self.layer_sizes[:, 0] = channels
+
+        assert(len(kernel_sizes) == self.n_layers - 1)
+        assert(len(strides) == self.n_layers - 1)
+        assert(len(paddings) == self.n_layers - 1)
+        assert(len(dilations) == self.n_layers - 1)
+        self.kernel_sizes = kernel_sizes
+        self.strides = strides
+        self.paddings = paddings
+        self.dilations = dilations
+
+        self.groups = groups
+        self.bias = bias
+        self.padding_mode = padding_mode
+
+        from lasdi.networks import act_dict
+        # TODO(kevin): not use threshold activation for now.
+        assert(act_type != 'threshold')
+        self.act = act_dict[act_type]()
+
+        self.fcs = []
+        for k in range(self.n_layers - 1):
+            self.fcs += [module(self.channels[k], self.channels[k+1], self.kernel_sizes[k],
+                                stride=self.strides[k], padding=self.paddings[k], dilation=self.dilations[k],
+                                groups=self.groups, bias=self.bias, padding_mode=self.padding_mode)]
+            
+        self.fcs = torch.nn.ModuleList(self.fcs)
+        self.init_weight()
+
+        self.batch_reshape = None
+
+        return
+    
+    def set_data_shape(self, data_shape : list):
+        idx = 0 if (self.mode == CNN2D.Mode.Forward) else -1
+
+        if (len(data_shape) > 2):
+            if (data_shape[-3] != self.channels[idx]):
+                assert(self.channels[idx] == 1)
+                self.batch_reshape = [np.prod(data_shape[:-2]), 1]
+            else:
+                self.batch_reshape = [np.prod(data_shape[:-3]), self.channels[idx]]
+        elif (len(data_shape) == 2):
+            assert(self.channels[idx] == 1)
+            self.batch_reshape = [1]
+        
+        self.batch_reshape += data_shape[-2:]
+        self.layer_sizes[idx, 1:] = data_shape[-2:]
+
+        if (self.mode == CNN2D.Mode.Forward):
+            for k in range(self.n_layers - 1):
+                self.layer_sizes[k+1, 1:] = CNN2D.compute_output_layer_size(self.layer_sizes[k, 1:],
+                                                self.kernel_sizes[k], self.strides[k], self.paddings[k],
+                                                self.dilations[k], self.mode)
+        else:
+            for k in range(self.n_layers - 2, -1, -1):
+                self.layer_sizes[k, 1:] = CNN2D.compute_input_layer_size(self.layer_sizes[k+1, 1:],
+                                                self.kernel_sizes[k], self.strides[k], self.paddings[k],
+                                                self.dilations[k], self.mode)
+        
+        self.print_data_shape()
+        return
+    
+    def print_data_shape(self):
+        mode_str = "forward" if (self.mode == CNN2D.Mode.Forward) else "backward"
+        print("mode: ", mode_str)
+        print("batch reshape: ", self.batch_reshape)
+        for k in range(self.n_layers - 1):
+            print('input layer: ', self.layer_sizes[k],
+                  'output layer: ', self.layer_sizes[k+1])
+        return
+    
+    def forward(self, x):
+        if (self.mode == CNN2D.Mode.Forward):
+            x = x.view(self.batch_reshape)
+
+        for i in range(self.n_layers-2):
+            x = self.fcs[i](x)
+            x = self.act(x)
+
+        x = self.fcs[-1](x)
+
+        if (self.mode == CNN2D.Mode.Backward):
+            x = x.view(self.batch_reshape)
+        return x
+    
+    def reshape_input_data(self, x):
+        if (x.dim() > 2):
+            if (x.shape[-3] != self.channels[0]):
+                assert(self.channels[0] == 1)
+                batch_reshape = [np.prod(x.shape[:-2]), 1]
+            else:
+                batch_reshape = [np.prod(x.shape[:-3]), x.shape[-3]]
+
+            return x.view(batch_reshape + list(x.shape[-2:]))
+        elif (x.dim() == 2):
+            assert(self.channels[0] == 1)
+            return x.view([1] + list(x.shape))
+        
+    def reshape_output_data(self, x):
+        assert(self.batch_shape is not None)
+        assert(x.dim() == 4)
+        assert(x.shape[0] == np.prod(self.batch_shape))
+        if (self.layer_sizes[-1][0] == 1):
+            batch_reshape = x.shape[-2:]
+        else:
+            batch_reshape = x.shape[-3:]
+
+        return x.view(self.batch_shape + list(batch_reshape))
+    
+    @classmethod
+    def compute_input_layer_size(cls, output_shape, kernel_size, stride, padding, dilation, mode):
+        assert(len(output_shape) == 2)
+        if (type(kernel_size) is int):
+            kernel_size = [kernel_size, kernel_size]
+        if (type(stride) is int):
+            stride = [stride, stride]
+        if (type(padding) is int):
+            padding = [padding, padding]
+        if (type(dilation) is int):
+            dilation = [dilation, dilation]
+
+        if (mode == cls.Mode.Forward):
+            Hin = (output_shape[0] - 1) * stride[0] - 2 * padding[0] + dilation[0] * (kernel_size[0] - 1) + 1
+            Win = (output_shape[1] - 1) * stride[1] - 2 * padding[1] + dilation[1] * (kernel_size[1] - 1) + 1
+        elif (mode == cls.Mode.Backward):
+            Hin = (output_shape[0] + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1
+            Win = (output_shape[1] + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1
+        else:
+            raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
+        
+        return [int(np.floor(Hin)), int(np.floor(Win))]
+
+    @classmethod
+    def compute_output_layer_size(cls, input_shape, kernel_size, stride, padding, dilation, mode):
+        assert(len(input_shape) == 2)
+        if (type(kernel_size) is int):
+            kernel_size = [kernel_size, kernel_size]
+        if (type(stride) is int):
+            stride = [stride, stride]
+        if (type(padding) is int):
+            padding = [padding, padding]
+        if (type(dilation) is int):
+            dilation = [dilation, dilation]
+
+        if (mode == cls.Mode.Forward):
+            Hout = (input_shape[0] + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1
+            Wout = (input_shape[1] + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1
+        elif (mode == cls.Mode.Backward):
+            Hout = (input_shape[0] - 1) * stride[0] - 2 * padding[0] + dilation[0] * (kernel_size[0] - 1) + 1
+            Wout = (input_shape[1] - 1) * stride[1] - 2 * padding[1] + dilation[1] * (kernel_size[1] - 1) + 1
+        else:
+            raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
+        
+        return [int(np.floor(Hout)), int(np.floor(Wout))]
+    
     def init_weight(self):
         # TODO(kevin): support other initializations?
         for fc in self.fcs:

From 9436abd4cdb855540b2680e27624463b5f79e651 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 16:50:57 -0800
Subject: [PATCH 07/15] CNN test routines

---
 docker/Dockerfile     |   4 +-
 src/lasdi/networks.py |  40 ++++++++----
 tests/test_CNN2D.py   | 147 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 177 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_CNN2D.py

diff --git a/docker/Dockerfile b/docker/Dockerfile
index bb49ef6..0a45e12 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -23,8 +23,8 @@ RUN sudo apt-get install -yq python3-dev
 RUN sudo apt-get install -yq python3-pip
 RUN sudo apt-get install python-is-python3
 RUN sudo python -m pip install --upgrade pip
-RUN sudo python -m pip install sphinx sphinx-autoapi sphinx_rtd_theme
-#RUN sudo pip3 install numpy scipy argparse tables PyYAML h5py pybind11 pytest mpi4py merlin
+# RUN sudo python -m pip install sphinx sphinx-autoapi sphinx_rtd_theme
+RUN sudo pip3 install torch==2.0.1 numpy==1.23.0 scikit-learn==1.3 scipy==1.10 pyyaml==6.0 matplotlib==3.8.0 argparse==1.1 h5py pytest pytest-cov
 #
 RUN sudo apt-get clean -q
 
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index 2eae26d..9ff562a 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -151,17 +151,21 @@ def __init__(self, mode, channels, kernel_sizes,
     def set_data_shape(self, data_shape : list):
         idx = 0 if (self.mode == CNN2D.Mode.Forward) else -1
 
-        if (len(data_shape) > 2):
-            if (data_shape[-3] != self.channels[idx]):
+        if (self.mode == CNN2D.Mode.Forward):
+            if (len(data_shape) > 2):
+                if (data_shape[-3] != self.channels[idx]):
+                    assert(self.channels[idx] == 1)
+                    self.batch_reshape = [np.prod(data_shape[:-2]), 1]
+                else:
+                    self.batch_reshape = [np.prod(data_shape[:-3]), self.channels[idx]]
+            elif (len(data_shape) == 2):
                 assert(self.channels[idx] == 1)
-                self.batch_reshape = [np.prod(data_shape[:-2]), 1]
-            else:
-                self.batch_reshape = [np.prod(data_shape[:-3]), self.channels[idx]]
-        elif (len(data_shape) == 2):
-            assert(self.channels[idx] == 1)
-            self.batch_reshape = [1]
+                self.batch_reshape = [1]
+
+            self.batch_reshape += data_shape[-2:]
+        else:
+            self.batch_reshape = list(data_shape)
         
-        self.batch_reshape += data_shape[-2:]
         self.layer_sizes[idx, 1:] = data_shape[-2:]
 
         if (self.mode == CNN2D.Mode.Forward):
@@ -175,7 +179,9 @@ def set_data_shape(self, data_shape : list):
                                                 self.kernel_sizes[k], self.strides[k], self.paddings[k],
                                                 self.dilations[k], self.mode)
         
-        self.print_data_shape()
+        if (np.any(self.layer_sizes <= 0)):
+            self.print_data_shape()
+            raise RuntimeError("CNN2D.set_data_shape: given data shape does not fit with current architecture!")
         return
     
     def print_data_shape(self):
@@ -188,7 +194,7 @@ def print_data_shape(self):
         return
     
     def forward(self, x):
-        if (self.mode == CNN2D.Mode.Forward):
+        if ((self.batch_reshape is not None) and (self.mode == CNN2D.Mode.Forward)):
             x = x.view(self.batch_reshape)
 
         for i in range(self.n_layers-2):
@@ -197,7 +203,7 @@ def forward(self, x):
 
         x = self.fcs[-1](x)
 
-        if (self.mode == CNN2D.Mode.Backward):
+        if ((self.batch_reshape is not None) and (self.mode == CNN2D.Mode.Backward)):
             x = x.view(self.batch_reshape)
         return x
     
@@ -269,6 +275,16 @@ def compute_output_layer_size(cls, input_shape, kernel_size, stride, padding, di
         else:
             raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
         
+        if ((mode == cls.Mode.Forward) and ((Hout > np.floor(Hout)) or (Wout > np.floor(Wout)))):
+            print("input shape: ", input_shape)
+            print("kernel size: ", kernel_size)
+            print("stride: ", stride)
+            print("padding: ", padding)
+            print("dilation: ", dilation)
+            print("resulting output shape: ", [Hout, Wout])
+            raise RuntimeError("CNN2D.compute_output_layer_size: given architecture will not return the same size backward. "
+                               "Adjust the architecture!")
+
         return [int(np.floor(Hout)), int(np.floor(Wout))]
     
     def init_weight(self):
diff --git a/tests/test_CNN2D.py b/tests/test_CNN2D.py
new file mode 100644
index 0000000..099d80d
--- /dev/null
+++ b/tests/test_CNN2D.py
@@ -0,0 +1,147 @@
+from lasdi.networks import CNN2D
+from random import randint
+import numpy as np
+import torch
+
+def test_compute_output_layer():
+    depth = 2
+    channels = [randint(2, 4) for _ in range(depth)]
+    strides = [randint(1, 3) for _ in range(depth-1)]
+    paddings = [randint(1, 3) for _ in range(depth-1)]
+    dilations = [randint(1, 3) for _ in range(depth-1)]
+    kernel_sizes = [[randint(4, 6) for _ in range(2)]]
+
+    output_shape = [randint(10, 20), randint(10, 20)]
+    output_data = torch.rand([channels[-1]] + output_shape)
+
+    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
+                strides[::-1], paddings[::-1], dilations[::-1])
+    
+    input_shape = CNN2D.compute_output_layer_size(output_data.shape[1:], kernel_sizes[0],
+                                                  strides[0], paddings[0], dilations[0], cnnb.mode)
+    input_data = cnnb(output_data)
+    assert(list(input_data.shape[-2:]) == input_shape)
+
+    cnnf = CNN2D('forward', channels, kernel_sizes,
+                strides, paddings, dilations)
+
+    output_shape0 = CNN2D.compute_output_layer_size(input_data.shape[1:], kernel_sizes[0],
+                                                   strides[0], paddings[0], dilations[0], cnnf.mode)
+    assert(output_shape0 == output_shape)
+    
+    output_data0 = cnnf(input_data)
+    assert(output_data.shape == output_data0.shape)
+    
+    return
+
+def test_compute_input_layer():
+    depth = 2
+    channels = [randint(2, 4) for _ in range(depth)]
+    strides = [randint(1, 3) for _ in range(depth-1)]
+    paddings = [randint(1, 3) for _ in range(depth-1)]
+    dilations = [randint(1, 3) for _ in range(depth-1)]
+    kernel_sizes = [[randint(4, 6) for _ in range(2)]]
+
+    output_shape = [randint(10, 20), randint(10, 20)]
+
+    cnnf = CNN2D('forward', channels, kernel_sizes,
+                strides, paddings, dilations)
+    
+    input_shape = CNN2D.compute_input_layer_size(output_shape, kernel_sizes[0],
+                                                   strides[0], paddings[0], dilations[0], cnnf.mode)
+    input_data = torch.rand([channels[0]] + input_shape)
+
+    output_data = cnnf(input_data)
+    assert(list(output_data.shape[-2:]) == output_shape)
+
+    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
+                strides[::-1], paddings[::-1], dilations[::-1])
+    
+    output_shape0 = CNN2D.compute_input_layer_size(input_data.shape[1:], kernel_sizes[0],
+                                                   strides[0], paddings[0], dilations[0], cnnb.mode)
+    assert(output_shape0 == output_shape)
+
+    input_data0 = cnnb(output_data)
+    assert(input_data.shape == input_data0.shape)
+    return
+
+def test_set_data_shape():
+    depth = 3
+    channels = [randint(2, 4) for _ in range(depth)]
+    strides = [1] * (depth-1)
+    paddings = [randint(1, 3) for _ in range(depth-1)]
+    dilations = [randint(1, 3) for _ in range(depth-1)]
+    kernel_sizes = [[randint(4, 6), randint(4, 6)]] + [randint(2, 3) for _ in range(depth-2)]
+
+    input_shape = [randint(3, 5), randint(10, 20), channels[0], randint(50, 60), randint(50, 60)]
+    input_data = torch.rand(input_shape)
+
+    cnnf = CNN2D('forward', channels, kernel_sizes,
+                 strides, paddings, dilations)
+    
+    cnnf.set_data_shape(input_data.shape)
+    cnnf.print_data_shape()
+
+    assert(cnnf.batch_reshape[-3:] == input_shape[-3:])
+    assert(np.prod(cnnf.batch_reshape) == np.prod(input_shape))
+    
+    output_data = cnnf(input_data)
+    
+    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
+                 strides[::-1], paddings[::-1], dilations[::-1])
+    cnnb.set_data_shape(input_data.shape)
+    cnnb.print_data_shape()
+
+    assert(cnnb.batch_reshape == input_shape)
+
+    input_data0 = cnnb(output_data)
+    assert(input_data0.shape == input_data.shape)
+
+    return
+
+def test_set_data_shape2():
+    depth = 3
+    channels = [1] + [randint(2, 4) for _ in range(depth-1)]
+    strides = [1] * (depth-1)
+    paddings = [randint(1, 3) for _ in range(depth-1)]
+    dilations = [randint(1, 3) for _ in range(depth-1)]
+    kernel_sizes = [[randint(4, 6), randint(4, 6)]] + [randint(2, 3) for _ in range(depth-2)]
+
+    def test_func(input_shape_):
+        input_data = torch.rand(input_shape_)
+
+        cnnf = CNN2D('forward', channels, kernel_sizes,
+                    strides, paddings, dilations)
+        
+        cnnf.set_data_shape(input_data.shape)
+        cnnf.print_data_shape()
+
+        if ((len(input_shape_) == 2) or (input_shape_[-3] != 1)):
+            assert(cnnf.batch_reshape[-2:] == input_shape_[-2:])
+        else:
+            assert(cnnf.batch_reshape[-3:] == input_shape_[-3:])
+        
+        assert(np.prod(cnnf.batch_reshape) == np.prod(input_shape_))
+        
+        output_data = cnnf(input_data)
+        
+        cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
+                    strides[::-1], paddings[::-1], dilations[::-1])
+        cnnb.set_data_shape(input_data.shape)
+        cnnb.print_data_shape()
+
+        assert(cnnb.batch_reshape == input_shape_)
+
+        input_data0 = cnnb(output_data)
+        assert(input_data0.shape == input_data.shape)
+
+    input_shape = [randint(50, 60), randint(50, 60)]
+    test_func(input_shape)
+
+    input_shape = [randint(3, 5), randint(10, 20), randint(50, 60), randint(50, 60)]
+    test_func(input_shape)
+
+    input_shape = [randint(3, 5), randint(10, 20), channels[0], randint(50, 60), randint(50, 60)]
+    test_func(input_shape)
+
+    return
\ No newline at end of file

From 77950d5b2d59c748f421ee689b8af55a7166609a Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 16:55:26 -0800
Subject: [PATCH 08/15] CI test setup

---
 .github/workflows/ci.yml | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 71f5e16..c15e163 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,8 +10,33 @@ on:
       - main
 
 jobs:
-  # docker-image:
-  #   uses: ./.github/workflows/docker.yml
+  docker-image:
+    uses: ./.github/workflows/docker.yml
   api-doc:
-    # needs: [docker-image]
-    uses: ./.github/workflows/sphinx.yml
\ No newline at end of file
+    uses: ./.github/workflows/sphinx.yml
+  linux:
+    runs-on: ubuntu-latest
+    needs: [docker-image]
+    container:
+      image: ghcr.io/llnl/gplasdi/gplasdi_env:latest
+      options: --user 1001 --privileged
+      volumes:
+        - /mnt:/mnt
+    steps:
+      - name: Cancel previous runs
+        uses: styfle/cancel-workflow-action@0.11.0
+        with:
+          access_token: ${{ github.token }}
+      # - name: Set Swap Space
+      #   uses: pierotofy/set-swap-space@master
+      #   with:
+      #     swap-size-gb: 10
+      - name: Check out LaSDI
+        uses: actions/checkout@v3
+      - name: Build LaSDI
+        run: |
+            cd ${GITHUB_WORKSPACE}
+            pip install .
+      - name: Test CNN2D
+        run: |
+            pytest -vrx tests/test_CNN2D.py
\ No newline at end of file

From 44b0a95ffaa8a753112de1e1131e726a91d4f061 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 17:05:21 -0800
Subject: [PATCH 09/15] fix container name

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c15e163..f09dd92 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: [docker-image]
     container:
-      image: ghcr.io/llnl/gplasdi/gplasdi_env:latest
+      image: ghcr.io/llnl/gplasdi/lasdi_env:latest
       options: --user 1001 --privileged
       volumes:
         - /mnt:/mnt

From b653accce6b692bd2606f542ea965dfe91ca18a7 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 18:29:08 -0800
Subject: [PATCH 10/15] CNN takes input/output size instead of channel/kernel
 size.

---
 src/lasdi/latent_space/__init__.py | 81 +++++++++++++++++++++++-
 src/lasdi/networks.py              | 99 +++++++++++++++---------------
 2 files changed, 131 insertions(+), 49 deletions(-)

diff --git a/src/lasdi/latent_space/__init__.py b/src/lasdi/latent_space/__init__.py
index 36365c1..191d54b 100644
--- a/src/lasdi/latent_space/__init__.py
+++ b/src/lasdi/latent_space/__init__.py
@@ -1,6 +1,6 @@
 import torch
 import numpy as np
-from ..networks import MultiLayerPerceptron
+from ..networks import MultiLayerPerceptron, CNN2D
 
 def initial_condition_latent(param_grid, physics, autoencoder):
 
@@ -94,4 +94,83 @@ def export(self):
     def load(self, dict_):
         super().load(dict_)
         self.load_state_dict(dict_['autoencoder_param'])
+        return
+    
+class Conv2DAutoencoder(LatentSpace):
+    def __init__(self, physics, config):
+        super().__init__(physics, config)
+        from lasdi.inputs import InputParser
+        parser = InputParser(config)
+
+        assert(physics.dim == 2)
+
+        if (len(self.qgrid_size) == 2):
+            cnn_layers = [[1] + self.qgrid_size]
+        cnn_layers += parser.getInput(['cnn_layers'], datatype=list)
+
+        strides = parser.getInput(['strides'], fallback=[1] * (len(cnn_layers) - 1))
+        paddings = parser.getInput(['paddings'], fallback=[0] * (len(cnn_layers) - 1))
+        dilations = parser.getInput(['dilations'], fallback=[1] * (len(cnn_layers) - 1))
+
+        cnn_act_type = parser.getInput(['cnn_activation'], fallback='ReLU')
+
+        batch_shape = parser.getInput(['batch_shape'], datatype=list)
+        data_shape = batch_shape + self.qgrid_size
+
+        cnn_f = CNN2D(cnn_layers, 'forward', strides, paddings,
+                      dilations, act_type=cnn_act_type, data_shape=data_shape)
+        cnn_b = CNN2D(cnn_layers[::-1], 'backward', strides[::-1], paddings[::-1],
+                      dilations[::-1], act_type=cnn_act_type, data_shape=data_shape)
+
+        mlp_layers = [np.prod(cnn_f.layer_sizes[-1])]
+        mlp_layers += parser.getInput(['mlp_layers'], datatype=list)
+        mlp_layers += [self.n_z]
+
+        act_type = parser.getInput(['mlp_activation'], fallback='sigmoid')
+        threshold = parser.getInput(['threshold'], fallback=0.1)
+        value = parser.getInput(['value'], fallback=0.0)
+
+        mlp_f = MultiLayerPerceptron(mlp_layers, act_type=act_type,
+                                     reshape_index=0, reshape_shape=cnn_f.layer_sizes[-1],
+                                     threshold=threshold, value=value)
+        mlp_b = MultiLayerPerceptron(mlp_layers[::-1], act_type=act_type,
+                                     reshape_index=-1, reshape_shape=cnn_b.layer_sizes[0],
+                                     threshold=threshold, value=value),
+        
+        self.encoder = torch.nn.Sequential(cnn_f, mlp_f)
+        self.decoder = torch.nn.Sequential(mlp_b, cnn_b)
+        
+        self.print_architecture()
+        
+        return
+    
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = self.decoder(x)
+
+        return x
+    
+    def export(self):
+        dict_ = {'autoencoder_param': self.cpu().state_dict()}
+        return dict_
+    
+    def load(self, dict_):
+        self.load_state_dict(dict_['autoencoder_param'])
+        return
+    
+    def set_batch_shape(self, batch_shape):
+        data_shape = batch_shape + self.qgrid_size
+
+        self.encoder[0].set_data_shape(data_shape)
+        self.decoder[1].set_data_shape(data_shape)
+
+        self.print_architecture()
+        return
+    
+    def print_architecture(self):
+        self.encoder[0].print_data_shape()
+        self.encoder[1].print_architecture()
+        self.decoder[0].print_architecture()
+        self.decoder[1].print_data_shape()
         return
\ No newline at end of file
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index 9ff562a..adf03e4 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -90,6 +90,9 @@ def init_weight(self):
         for fc in self.fcs:
             torch.nn.init.xavier_uniform_(fc.weight)
         return
+    
+    def print_architecture(self):
+        print(self.layer_sizes)
 
 class CNN2D(torch.nn.Module):
     from enum import Enum
@@ -97,10 +100,10 @@ class Mode(Enum):
         Forward = 1
         Backward = -1
 
-    def __init__(self, mode, channels, kernel_sizes,
+    def __init__(self, layer_sizes, mode,
                  strides, paddings, dilations,
                  groups=1, bias=True, padding_mode='zeros',
-                 act_type='ReLU'):
+                 act_type='ReLU', data_shape=None):
         super(CNN2D, self).__init__()
 
         if (mode == 'forward'):
@@ -112,16 +115,15 @@ def __init__(self, mode, channels, kernel_sizes,
         else:
             raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
         
-        self.channels = channels
-        self.n_layers = len(channels)
-        self.layer_sizes = np.zeros([self.n_layers, 3], dtype=int)
-        self.layer_sizes[:, 0] = channels
+        self.n_layers = len(layer_sizes)
+        self.layer_sizes = layer_sizes
+        self.channels = [layer_sizes[k][0] for k in range(self.n_layers)]
 
-        assert(len(kernel_sizes) == self.n_layers - 1)
+        # assert(len(kernel_sizes) == self.n_layers - 1)
         assert(len(strides) == self.n_layers - 1)
         assert(len(paddings) == self.n_layers - 1)
         assert(len(dilations) == self.n_layers - 1)
-        self.kernel_sizes = kernel_sizes
+        # self.kernel_sizes = kernel_sizes
         self.strides = strides
         self.paddings = paddings
         self.dilations = dilations
@@ -135,8 +137,16 @@ def __init__(self, mode, channels, kernel_sizes,
         assert(act_type != 'threshold')
         self.act = act_dict[act_type]()
 
+        self.kernel_sizes = []
         self.fcs = []
         for k in range(self.n_layers - 1):
+            kernel_size = self.compute_kernel_size(self.layer_sizes[k][1:], self.layer_sizes[k+1][1:],
+                                                   self.strides[k], self.paddings[k], self.dilations[k], self.mode)
+            out_shape = self.compute_output_layer_size(self.layer_sizes[k][1:], kernel_size, self.strides[k],
+                                                       self.paddings[k], self.dilations[k], self.mode)
+            assert(self.layer_sizes[k+1][1:] == out_shape)
+
+            self.kernel_sizes += [kernel_size]
             self.fcs += [module(self.channels[k], self.channels[k+1], self.kernel_sizes[k],
                                 stride=self.strides[k], padding=self.paddings[k], dilation=self.dilations[k],
                                 groups=self.groups, bias=self.bias, padding_mode=self.padding_mode)]
@@ -144,7 +154,8 @@ def __init__(self, mode, channels, kernel_sizes,
         self.fcs = torch.nn.ModuleList(self.fcs)
         self.init_weight()
 
-        self.batch_reshape = None
+        if (data_shape is not None):
+            self.set_data_shape(data_shape)
 
         return
     
@@ -165,23 +176,6 @@ def set_data_shape(self, data_shape : list):
             self.batch_reshape += data_shape[-2:]
         else:
             self.batch_reshape = list(data_shape)
-        
-        self.layer_sizes[idx, 1:] = data_shape[-2:]
-
-        if (self.mode == CNN2D.Mode.Forward):
-            for k in range(self.n_layers - 1):
-                self.layer_sizes[k+1, 1:] = CNN2D.compute_output_layer_size(self.layer_sizes[k, 1:],
-                                                self.kernel_sizes[k], self.strides[k], self.paddings[k],
-                                                self.dilations[k], self.mode)
-        else:
-            for k in range(self.n_layers - 2, -1, -1):
-                self.layer_sizes[k, 1:] = CNN2D.compute_input_layer_size(self.layer_sizes[k+1, 1:],
-                                                self.kernel_sizes[k], self.strides[k], self.paddings[k],
-                                                self.dilations[k], self.mode)
-        
-        if (np.any(self.layer_sizes <= 0)):
-            self.print_data_shape()
-            raise RuntimeError("CNN2D.set_data_shape: given data shape does not fit with current architecture!")
         return
     
     def print_data_shape(self):
@@ -190,6 +184,7 @@ def print_data_shape(self):
         print("batch reshape: ", self.batch_reshape)
         for k in range(self.n_layers - 1):
             print('input layer: ', self.layer_sizes[k],
+                  'kernel size: ', self.kernel_sizes[k],
                   'output layer: ', self.layer_sizes[k+1])
         return
     
@@ -207,29 +202,37 @@ def forward(self, x):
             x = x.view(self.batch_reshape)
         return x
     
-    def reshape_input_data(self, x):
-        if (x.dim() > 2):
-            if (x.shape[-3] != self.channels[0]):
-                assert(self.channels[0] == 1)
-                batch_reshape = [np.prod(x.shape[:-2]), 1]
-            else:
-                batch_reshape = [np.prod(x.shape[:-3]), x.shape[-3]]
-
-            return x.view(batch_reshape + list(x.shape[-2:]))
-        elif (x.dim() == 2):
-            assert(self.channels[0] == 1)
-            return x.view([1] + list(x.shape))
-        
-    def reshape_output_data(self, x):
-        assert(self.batch_shape is not None)
-        assert(x.dim() == 4)
-        assert(x.shape[0] == np.prod(self.batch_shape))
-        if (self.layer_sizes[-1][0] == 1):
-            batch_reshape = x.shape[-2:]
-        else:
-            batch_reshape = x.shape[-3:]
+    @classmethod
+    def compute_kernel_size(cls, input_shape, output_shape, stride, padding, dilation, mode):
+        assert(len(input_shape) == 2)
+        assert(len(output_shape) == 2)
+        if (type(stride) is int):
+            stride = [stride, stride]
+        if (type(padding) is int):
+            padding = [padding, padding]
+        if (type(dilation) is int):
+            dilation = [dilation, dilation]
 
-        return x.view(self.batch_shape + list(batch_reshape))
+        if (mode == CNN2D.Mode.Forward):
+            kern_H = (input_shape[0] + 2 * padding[0] - 1 - stride[0] * (output_shape[0] - 1)) / dilation[0] + 1
+            kern_W = (input_shape[1] + 2 * padding[1] - 1 - stride[1] * (output_shape[1] - 1)) / dilation[1] + 1
+        elif (mode == CNN2D.Mode.Backward):
+            kern_H = (output_shape[0] - (input_shape[0] - 1) * stride[0] + 2 * padding[0] - 1) / dilation[0] + 1
+            kern_W =  (output_shape[1] - (input_shape[1] - 1) * stride[1] + 2 * padding[1] - 1) / dilation[1] + 1
+        else:
+            raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
+        
+        if ((kern_H <= 0) or (kern_W <= 0)):
+            print("input shape: ", input_shape)
+            print("output shape: ", output_shape)
+            print("stride: ", stride)
+            print("padding: ", padding)
+            print("dilation: ", dilation)
+            print("resulting kernel size: ", [int(np.floor(kern_H)), int(np.floor(kern_W))])
+            raise RuntimeError("CNN2D.compute_kernel_size: no feasible kernel size. "
+                               "Adjust the architecture!")
+        
+        return [int(np.floor(kern_H)), int(np.floor(kern_W))]
     
     @classmethod
     def compute_input_layer_size(cls, output_shape, kernel_size, stride, padding, dilation, mode):

From cbcef8fdc7c36299bf1d281338b3cde6cd8d1694 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Wed, 20 Nov 2024 19:03:18 -0800
Subject: [PATCH 11/15] update CNN2D test

---
 tests/test_CNN2D.py | 100 +++++++++-----------------------------------
 1 file changed, 20 insertions(+), 80 deletions(-)

diff --git a/tests/test_CNN2D.py b/tests/test_CNN2D.py
index 099d80d..c2828a9 100644
--- a/tests/test_CNN2D.py
+++ b/tests/test_CNN2D.py
@@ -3,80 +3,19 @@
 import numpy as np
 import torch
 
-def test_compute_output_layer():
-    depth = 2
-    channels = [randint(2, 4) for _ in range(depth)]
-    strides = [randint(1, 3) for _ in range(depth-1)]
-    paddings = [randint(1, 3) for _ in range(depth-1)]
-    dilations = [randint(1, 3) for _ in range(depth-1)]
-    kernel_sizes = [[randint(4, 6) for _ in range(2)]]
-
-    output_shape = [randint(10, 20), randint(10, 20)]
-    output_data = torch.rand([channels[-1]] + output_shape)
-
-    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
-                strides[::-1], paddings[::-1], dilations[::-1])
-    
-    input_shape = CNN2D.compute_output_layer_size(output_data.shape[1:], kernel_sizes[0],
-                                                  strides[0], paddings[0], dilations[0], cnnb.mode)
-    input_data = cnnb(output_data)
-    assert(list(input_data.shape[-2:]) == input_shape)
-
-    cnnf = CNN2D('forward', channels, kernel_sizes,
-                strides, paddings, dilations)
-
-    output_shape0 = CNN2D.compute_output_layer_size(input_data.shape[1:], kernel_sizes[0],
-                                                   strides[0], paddings[0], dilations[0], cnnf.mode)
-    assert(output_shape0 == output_shape)
-    
-    output_data0 = cnnf(input_data)
-    assert(output_data.shape == output_data0.shape)
-    
-    return
-
-def test_compute_input_layer():
-    depth = 2
-    channels = [randint(2, 4) for _ in range(depth)]
-    strides = [randint(1, 3) for _ in range(depth-1)]
-    paddings = [randint(1, 3) for _ in range(depth-1)]
-    dilations = [randint(1, 3) for _ in range(depth-1)]
-    kernel_sizes = [[randint(4, 6) for _ in range(2)]]
-
-    output_shape = [randint(10, 20), randint(10, 20)]
-
-    cnnf = CNN2D('forward', channels, kernel_sizes,
-                strides, paddings, dilations)
-    
-    input_shape = CNN2D.compute_input_layer_size(output_shape, kernel_sizes[0],
-                                                   strides[0], paddings[0], dilations[0], cnnf.mode)
-    input_data = torch.rand([channels[0]] + input_shape)
-
-    output_data = cnnf(input_data)
-    assert(list(output_data.shape[-2:]) == output_shape)
-
-    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
-                strides[::-1], paddings[::-1], dilations[::-1])
-    
-    output_shape0 = CNN2D.compute_input_layer_size(input_data.shape[1:], kernel_sizes[0],
-                                                   strides[0], paddings[0], dilations[0], cnnb.mode)
-    assert(output_shape0 == output_shape)
-
-    input_data0 = cnnb(output_data)
-    assert(input_data.shape == input_data0.shape)
-    return
-
 def test_set_data_shape():
     depth = 3
-    channels = [randint(2, 4) for _ in range(depth)]
-    strides = [1] * (depth-1)
+    layer_sizes = [[randint(2, 4), randint(45, 50), randint(45, 50)],
+                   [randint(2, 4), randint(12, 15), randint(12, 15)],
+                   [randint(2, 4), randint(2, 4), randint(2, 4)]]
+    dilations = [1] * (depth-1)
     paddings = [randint(1, 3) for _ in range(depth-1)]
-    dilations = [randint(1, 3) for _ in range(depth-1)]
-    kernel_sizes = [[randint(4, 6), randint(4, 6)]] + [randint(2, 3) for _ in range(depth-2)]
+    strides = [randint(1, 3) for _ in range(depth-1)]
 
-    input_shape = [randint(3, 5), randint(10, 20), channels[0], randint(50, 60), randint(50, 60)]
+    input_shape = [randint(3, 5), randint(10, 20)] + layer_sizes[0]
     input_data = torch.rand(input_shape)
 
-    cnnf = CNN2D('forward', channels, kernel_sizes,
+    cnnf = CNN2D(layer_sizes, 'forward',
                  strides, paddings, dilations)
     
     cnnf.set_data_shape(input_data.shape)
@@ -87,7 +26,7 @@ def test_set_data_shape():
     
     output_data = cnnf(input_data)
     
-    cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
+    cnnb = CNN2D(layer_sizes[::-1], 'backward',
                  strides[::-1], paddings[::-1], dilations[::-1])
     cnnb.set_data_shape(input_data.shape)
     cnnb.print_data_shape()
@@ -101,17 +40,18 @@ def test_set_data_shape():
 
 def test_set_data_shape2():
     depth = 3
-    channels = [1] + [randint(2, 4) for _ in range(depth-1)]
-    strides = [1] * (depth-1)
+    layer_sizes = [[1, randint(45, 50), randint(45, 50)],
+                   [randint(2, 4), randint(12, 15), randint(12, 15)],
+                   [randint(2, 4), randint(2, 4), randint(2, 4)]]
+    dilations = [1] * (depth-1)
     paddings = [randint(1, 3) for _ in range(depth-1)]
-    dilations = [randint(1, 3) for _ in range(depth-1)]
-    kernel_sizes = [[randint(4, 6), randint(4, 6)]] + [randint(2, 3) for _ in range(depth-2)]
+    strides = [randint(1, 3) for _ in range(depth-1)]
 
     def test_func(input_shape_):
         input_data = torch.rand(input_shape_)
 
-        cnnf = CNN2D('forward', channels, kernel_sizes,
-                    strides, paddings, dilations)
+        cnnf = CNN2D(layer_sizes, 'forward',
+                     strides, paddings, dilations)
         
         cnnf.set_data_shape(input_data.shape)
         cnnf.print_data_shape()
@@ -125,8 +65,8 @@ def test_func(input_shape_):
         
         output_data = cnnf(input_data)
         
-        cnnb = CNN2D('backward', channels[::-1], kernel_sizes[::-1],
-                    strides[::-1], paddings[::-1], dilations[::-1])
+        cnnb = CNN2D(layer_sizes[::-1], 'backward',
+                     strides[::-1], paddings[::-1], dilations[::-1])
         cnnb.set_data_shape(input_data.shape)
         cnnb.print_data_shape()
 
@@ -135,13 +75,13 @@ def test_func(input_shape_):
         input_data0 = cnnb(output_data)
         assert(input_data0.shape == input_data.shape)
 
-    input_shape = [randint(50, 60), randint(50, 60)]
+    input_shape = layer_sizes[0][1:]
     test_func(input_shape)
 
-    input_shape = [randint(3, 5), randint(10, 20), randint(50, 60), randint(50, 60)]
+    input_shape = [randint(3, 5), randint(10, 20)] + layer_sizes[0][1:]
     test_func(input_shape)
 
-    input_shape = [randint(3, 5), randint(10, 20), channels[0], randint(50, 60), randint(50, 60)]
+    input_shape = [randint(3, 5), randint(10, 20)] + layer_sizes[0]
     test_func(input_shape)
 
     return
\ No newline at end of file

From c0a00c3645c84bb5e9ecc08f53a304912b60be2c Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Thu, 21 Nov 2024 15:12:42 -0800
Subject: [PATCH 12/15] documenting MLP.

---
 docs/source/conf.py   |  1 +
 src/lasdi/networks.py | 98 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 29afdaa..111cc6a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -25,6 +25,7 @@
 ]
 
 autoapi_dirs = ['../../src']
+autoapi_python_class_content = 'both'
  
 napoleon_google_docstring = False
 napoleon_use_param = False
diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index adf03e4..3d5d7c4 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -28,43 +28,126 @@
             }
 
 class MultiLayerPerceptron(torch.nn.Module):
+    """Vanilla multi-layer perceptron neural networks module.
+    """
 
     def __init__(self, layer_sizes,
                  act_type='sigmoid', reshape_index=None, reshape_shape=None,
                  threshold=0.1, value=0.0):
+        """
+        Parameters
+        ----------
+        layer_sizes : :obj:`list(int)`
+            List of vector dimensions of layers.
+        act_type : :obj:`str`, optional
+            Type of activation functions. By default :obj:`'sigmoid'` is used.
+            See :obj:`act_dict` for available types.
+        reshape_index : :obj:`int`, optinal
+            Index of layer to reshape input/output data. Either 0 or -1 is allowed.
+
+            - 0 : the first (input) layer
+            - -1 : the last (output) layer
+
+            By default the index is :obj:`None`, and reshaping is not executed.
+        reshape_shape : :obj:`list(int)`, optional
+            Target shape from/to which input/output data is reshaped.
+            Reshaping behavior changes by :attr:`reshape_index`.
+            By default the index is :obj:`None`, and reshaping is not executed.
+            For details on reshaping action, see :attr:`reshape_shape`.
+
+        Note
+        ----
+        :obj:`numpy.prod(reshape_shape) == layer_sizes[reshape_index]`
+
+        """
         super(MultiLayerPerceptron, self).__init__()
-
-        # including input, hidden, output layers
+        
         self.n_layers = len(layer_sizes)
+        """:obj:`int` : Depth of layers including input, hidden, output layers."""
+        
         self.layer_sizes = layer_sizes
+        """:obj:`list(int)` : Vector dimensions corresponding to each layer."""
 
-        # Linear features between layers
         self.fcs = []
+        """:obj:`torch.nn.ModuleList` : linear features between layers."""
         for k in range(self.n_layers-1):
             self.fcs += [torch.nn.Linear(layer_sizes[k], layer_sizes[k + 1])]
         self.fcs = torch.nn.ModuleList(self.fcs)
+
         self.init_weight()
 
         # Reshape input or output layer
-        assert((reshape_index is None) or (reshape_index in [0, -1]))
+        assert(reshape_index in [0, -1, None])
         assert((reshape_shape is None) or (np.prod(reshape_shape) == layer_sizes[reshape_index]))
         self.reshape_index = reshape_index
+        """:obj:`int` : Index of layer to reshape input/output data.
+
+        - 0 : the first (input) layer
+        - -1 : the last (output) layer
+        - :obj:`None` : no reshaping
+        """
         self.reshape_shape = reshape_shape
+        """:obj:`list(int)` : Target shape from/to which input/output data is reshaped.
+        For a reshape_shape :math:`[R_1, R_2, \ldots, R_n]`,
+
+        - if :attr:`reshape_index` is 0, the input data shape is changed as
+
+        .. math::
+            [\ldots, R_1, R_2, \ldots, R_n] \\longrightarrow [\ldots, \prod_{i=1}^n R_i]
+
+        - if :attr:`reshape_index` is -1, the output data shape is changed as
+
+        .. math::
+            [\ldots, \prod_{i=1}^n R_i] \\longrightarrow [\ldots, R_1, R_2, \ldots, R_n]
+
+        - :obj:`None` : no reshaping
+        """
 
         # Initalize activation function
         self.act_type = act_type
+        """:obj:`str` : Type of activation functions."""
+
         if act_type == "threshold":
-            self.act = act_dict[act_type](threshold, value)
+            act = act_dict[act_type](threshold, value)
+            """:obj:`torch.nn.Module` : Activation function used throughout the layers."""
 
         elif act_type == "multihead":
             raise RuntimeError("MultiLayerPerceptron: MultiheadAttention requires a different architecture!")
 
         #all other activation functions initialized here
         else:
-            self.act = act_dict[act_type]()
+            act = act_dict[act_type]()
+
+        self.act = act
+        """:obj:`torch.nn.Module` : Activation function used throughout the layers."""
         return
     
     def forward(self, x):
+        """Evaluate through the module.
+        
+        Parameters
+        ----------
+        x : :obj:`torch.Tensor`
+            Input data to pass into the module.
+
+        Note
+        ----
+        For :attr:`reshape_index` =0,
+        the last :math:`n` dimensions of :obj:`x` must match
+        :attr:`reshape_shape` :math:`=[R_1, R_2, \ldots, R_n]`.
+
+        Returns
+        -------
+        :obj:`torch.Tensor`
+            Output tensor evaluated from the module.
+
+        Note
+        ----
+        For :attr:`reshape_index` =-1,
+        the last dimension of the output tensor will be reshaped as
+        :attr:`reshape_shape` :math:`=[R_1, R_2, \ldots, R_n]`.
+        
+        """
         if (self.reshape_index == 0):
             # make sure the input has a proper shape
             assert(list(x.shape[-len(self.reshape_shape):]) == self.reshape_shape)
@@ -86,12 +169,15 @@ def forward(self, x):
         return x
     
     def init_weight(self):
+        """Initialize weights of linear features according to Xavier uniform distribution."""
+
         # TODO(kevin): support other initializations?
         for fc in self.fcs:
             torch.nn.init.xavier_uniform_(fc.weight)
         return
     
     def print_architecture(self):
+        """Print out the architecture of the module."""
         print(self.layer_sizes)
 
 class CNN2D(torch.nn.Module):

From 29fde8ab3047bd15f67bf1baf084cdfc105ec405 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Thu, 21 Nov 2024 16:46:18 -0800
Subject: [PATCH 13/15] documenting CNN2D.

---
 src/lasdi/networks.py | 240 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)

diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index 3d5d7c4..dfd17e3 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -181,15 +181,60 @@ def print_architecture(self):
         print(self.layer_sizes)
 
 class CNN2D(torch.nn.Module):
+    """Two-dimensional convolutional neural networks."""
+
     from enum import Enum
     class Mode(Enum):
+        """Enumeration to specify direction of CNN."""
         Forward = 1
+        """Contracting direction"""
         Backward = -1
+        """Expanding direction"""
 
     def __init__(self, layer_sizes, mode,
                  strides, paddings, dilations,
                  groups=1, bias=True, padding_mode='zeros',
                  act_type='ReLU', data_shape=None):
+        """
+        Parameters
+        ----------
+        layer_sizes : :obj:`numpy.array`
+            2d array of tensor dimension of each layer.
+            See :attr:`layer_sizes`.
+        mode : :obj:`str`
+            Direction of CNN
+            - `forward`: contracting direction
+            - `backward`: expanding direction
+        strides : :obj:`list`
+            List of strides corresponding to each layer.
+            Each stride is either integer or tuple.
+        paddings : :obj:`list`
+            List of paddings corresponding to each layer.
+            Each padding is either integer or tuple.
+        dilations : :obj:`list`
+            List of dilations corresponding to each layer.
+            Each dilation is either integer or tuple.
+        groups : :obj:`int`, optional
+            Groups that applies to all layers. By default 1
+        bias : :obj:`bool`, optional
+            Bias that applies to all layers. By default :obj:`True`
+        padding_mode : :obj:`str`, optional
+            Padding_mode that applies to all layers. By default :obj:`'zeros'`
+        act_type : :obj:`str`, optional
+            Activation function applied between all layers. By default :obj:`'ReLU'`.
+            See :obj:`act_dict` for available types.
+        data_shape : :obj:`list(int)`, optional
+            Data shape to/from which output/input data is reshaped.
+            See :attr:`data_shape` for details.
+
+        Note
+        ----
+        :obj:`len(strides) == layer_sizes.shape[0] - 1`
+        
+        :obj:`len(paddings) == layer_sizes.shape[0] - 1`
+
+        :obj:`len(dilations) == layer_sizes.shape[0] - 1`
+        """
         super(CNN2D, self).__init__()
 
         if (mode == 'forward'):
@@ -202,8 +247,24 @@ def __init__(self, layer_sizes, mode,
             raise RuntimeError('CNN2D: Unknown mode %s!' % mode)
         
         self.n_layers = len(layer_sizes)
+        """:obj:`int` : Depth of layers including input, hidden, output layers."""
+
         self.layer_sizes = layer_sizes
+        """:obj:`numpy.array` : 2d integer array of shape :math:`[n\_layers, 3]`,
+        indicating tensor dimension of each layer.
+        For :math:`k`-th layer, the tensor dimension is
+
+        .. math::
+            layer\_sizes[k] = [channels, height, width]
+        
+        """
+
         self.channels = [layer_sizes[k][0] for k in range(self.n_layers)]
+        """:obj:`list(int)` : list of channel size that
+        determines architecture of each layer.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
 
         # assert(len(kernel_sizes) == self.n_layers - 1)
         assert(len(strides) == self.n_layers - 1)
@@ -211,20 +272,63 @@ def __init__(self, layer_sizes, mode,
         assert(len(dilations) == self.n_layers - 1)
         # self.kernel_sizes = kernel_sizes
         self.strides = strides
+        """:obj:`list` : list of strides that
+        determine architecture of each layer.
+        Each stride can be either integer or tuple.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
         self.paddings = paddings
+        """:obj:`list` : list of paddings that
+        determine architecture of each layer.
+        Each padding can be either integer or tuple.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
         self.dilations = dilations
+        """:obj:`list` : list of dilations that
+        determine architecture of each layer.
+        Each dilation can be either integer or tuple.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
 
         self.groups = groups
+        """:obj:`int` : groups that determine architecture of all layers.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
         self.bias = bias
+        """:obj:`bool` : bias that determine architecture of all layers.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
         self.padding_mode = padding_mode
+        """:obj:`str` : padding mode that determine architecture of all layers.
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
 
         from lasdi.networks import act_dict
         # TODO(kevin): not use threshold activation for now.
         assert(act_type != 'threshold')
         self.act = act_dict[act_type]()
+        """:obj:`torch.nn.Module` : activation function applied between all layers."""
 
         self.kernel_sizes = []
+        """:obj:`list` : list of kernel_sizes that
+        determine architecture of each layer.
+        Each kernel_size can be either integer or tuple.
+        Kernel size is automatically determined so that
+        output of the corresponding layer has the shape of the next layer.
+
+        For details on how architecture is determined,
+        see `torch API documentation <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_.
+        """
         self.fcs = []
+        """:obj:`torch.nn.ModuleList` : module list of
+        :obj:`torch.nn.Conv2d` (forward) or :obj:`torch.nn.Conv2d` (backward)."""
+
         for k in range(self.n_layers - 1):
             kernel_size = self.compute_kernel_size(self.layer_sizes[k][1:], self.layer_sizes[k+1][1:],
                                                    self.strides[k], self.paddings[k], self.dilations[k], self.mode)
@@ -240,12 +344,58 @@ def __init__(self, layer_sizes, mode,
         self.fcs = torch.nn.ModuleList(self.fcs)
         self.init_weight()
 
+        self.data_shape = data_shape
+        """:obj:`list(int)` : tensor dimension of the training data
+        that will be passed into/out of the module."""
+
+        self.batch_reshape = None
+        """:obj:`list(int)` : tensor dimension to which input/output data is reshaped.
+
+        - Forward :attr:`mode`: shape of 3d-/4d-array
+        - Backward :attr:`mode`: shape of arbitrary nd-array
+
+        Determined by :meth:`set_data_shape`.
+        """
         if (data_shape is not None):
             self.set_data_shape(data_shape)
 
         return
     
     def set_data_shape(self, data_shape : list):
+        """
+        Set the batch reshape in order to reshape the input/output batches
+        based on given training data shape.
+
+        Forward :attr:`mode`:
+
+            For :obj:`data_shape` :math:`=[N_1,\ldots,N_m]`
+            and the first layer size of :math:`[C_1, H_1, W_1]`,
+
+            .. math::
+                batch\_reshape = [R_1, C_1, H_1, W_1],
+
+            where :math:`\prod_{i=1}^m N_i = R_1\\times C_1\\times H_1\\times W_1`.
+
+            If :math:`m=2` and :math:`C_1=1`, then
+
+            .. math::
+                batch\_reshape = [C_1, H_1, W_1].
+
+        Note
+        ----
+        For forward mode, :obj:`data_shape[-2:]==self.layer_sizes[0, 1:]` must be true.
+
+
+        Backward :attr:`mode`:
+
+            :attr:`batch_shape` is the same as :obj:`data_shape`.
+            Output tensor of the module is reshaped as :obj:`data_shape`.
+
+        Parameters
+        ----------
+        data_shape : :obj:`list(int)`
+            Shape of the input/output data tensor for forward/backward mode.
+        """
         idx = 0 if (self.mode == CNN2D.Mode.Forward) else -1
 
         if (self.mode == CNN2D.Mode.Forward):
@@ -262,9 +412,13 @@ def set_data_shape(self, data_shape : list):
             self.batch_reshape += data_shape[-2:]
         else:
             self.batch_reshape = list(data_shape)
+
+        self.data_shape = list(data_shape)
         return
     
     def print_data_shape(self):
+        """Print out the data shape and architecture of the module."""
+
         mode_str = "forward" if (self.mode == CNN2D.Mode.Forward) else "backward"
         print("mode: ", mode_str)
         print("batch reshape: ", self.batch_reshape)
@@ -275,6 +429,25 @@ def print_data_shape(self):
         return
     
     def forward(self, x):
+        """Evaluate through the module.
+        
+        Parameters
+        ----------
+        x : :obj:`torch.nn.Tensor`
+            Input tensor to pass into the module.
+
+            - Forward mode: nd array of shape :attr:`data_shape`
+            - Backward mode: Same shape as the output tensor of forward mode
+
+        Returns
+        -------
+        :obj:`torch.nn.Tensor`
+            Output tensor evaluated from the module.
+
+            - Forward mode: 3d array of shape :obj:`self.layer_sizes[-1]`,
+              or 4d array of shape :obj:`[self.batch_reshape[0]] + self.layer_sizes[-1]`
+            - Backward mode: nd array of shape :attr:`batch_reshape`
+        """
         if ((self.batch_reshape is not None) and (self.mode == CNN2D.Mode.Forward)):
             x = x.view(self.batch_reshape)
 
@@ -290,6 +463,28 @@ def forward(self, x):
     
     @classmethod
     def compute_kernel_size(cls, input_shape, output_shape, stride, padding, dilation, mode):
+        """Compute kernel size that produces desired output shape from given input shape.
+
+        The formula is based on torch API documentation
+        for `Conv2d <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_
+        and `ConvTranspose2d <https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html>`_.
+        
+        Parameters
+        ----------
+        input_shape : :obj:`int` or :obj:`tuple(int)`
+        output_shape : :obj:`int` or :obj:`tuple(int)`
+        stride : :obj:`int` or :obj:`tuple(int)`
+        padding : :obj:`int` or :obj:`tuple(int)`
+        dilation : :obj:`int` or :obj:`tuple(int)`
+        mode : :class:`CNN2D.Mode`
+            Direction of CNN. Either :attr:`CNN2D.Mode.Forward` or :attr:`CNN2D.Mode.Backward`
+
+        Returns
+        -------
+        :obj:`list(int)`
+            List of two integers indicating height and width of kernel.
+
+        """
         assert(len(input_shape) == 2)
         assert(len(output_shape) == 2)
         if (type(stride) is int):
@@ -322,6 +517,28 @@ def compute_kernel_size(cls, input_shape, output_shape, stride, padding, dilatio
     
     @classmethod
     def compute_input_layer_size(cls, output_shape, kernel_size, stride, padding, dilation, mode):
+        """Compute input layer size that produces desired output shape with given kernel size.
+
+        The formula is based on torch API documentation
+        for `Conv2d <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_
+        and `ConvTranspose2d <https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html>`_.
+        
+        Parameters
+        ----------
+        output_shape : :obj:`int` or :obj:`tuple(int)`
+        kernel_size : :obj:`int` or :obj:`tuple(int)`
+        stride : :obj:`int` or :obj:`tuple(int)`
+        padding : :obj:`int` or :obj:`tuple(int)`
+        dilation : :obj:`int` or :obj:`tuple(int)`
+        mode : :class:`CNN2D.Mode`
+            Direction of CNN. Either :attr:`CNN2D.Mode.Forward` or :attr:`CNN2D.Mode.Backward`
+
+        Returns
+        -------
+        :obj:`list(int)`
+            List of two integers indicating height and width of input layer.
+
+        """
         assert(len(output_shape) == 2)
         if (type(kernel_size) is int):
             kernel_size = [kernel_size, kernel_size]
@@ -345,6 +562,28 @@ def compute_input_layer_size(cls, output_shape, kernel_size, stride, padding, di
 
     @classmethod
     def compute_output_layer_size(cls, input_shape, kernel_size, stride, padding, dilation, mode):
+        """Compute output layer size produced from given input shape and kernel size.
+
+        The formula is based on torch API documentation
+        for `Conv2d <https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html>`_
+        and `ConvTranspose2d <https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html>`_.
+        
+        Parameters
+        ----------
+        input_shape : :obj:`int` or :obj:`tuple(int)`
+        kernel_size : :obj:`int` or :obj:`tuple(int)`
+        stride : :obj:`int` or :obj:`tuple(int)`
+        padding : :obj:`int` or :obj:`tuple(int)`
+        dilation : :obj:`int` or :obj:`tuple(int)`
+        mode : :class:`CNN2D.Mode`
+            Direction of CNN. Either :attr:`CNN2D.Mode.Forward` or :attr:`CNN2D.Mode.Backward`
+
+        Returns
+        -------
+        :obj:`list(int)`
+            List of two integers indicating height and width of output layer.
+
+        """
         assert(len(input_shape) == 2)
         if (type(kernel_size) is int):
             kernel_size = [kernel_size, kernel_size]
@@ -377,6 +616,7 @@ def compute_output_layer_size(cls, input_shape, kernel_size, stride, padding, di
         return [int(np.floor(Hout)), int(np.floor(Wout))]
     
     def init_weight(self):
+        """Initialize weights of linear features according to Xavier uniform distribution."""
         # TODO(kevin): support other initializations?
         for fc in self.fcs:
             torch.nn.init.xavier_uniform_(fc.weight)

From 58367fa726522e49d4cb871b1a31075608ab6b31 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Thu, 21 Nov 2024 16:54:18 -0800
Subject: [PATCH 14/15] documenting activation type

---
 src/lasdi/networks.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index dfd17e3..691f870 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -1,7 +1,6 @@
 import torch
 import numpy as np
 
-# activation dict
 act_dict = {'ELU': torch.nn.ELU,
             'hardshrink': torch.nn.Hardshrink,
             'hardsigmoid': torch.nn.Hardsigmoid,
@@ -26,6 +25,32 @@
             'tanhshrink': torch.nn.Tanhshrink,
             'threshold': torch.nn.Threshold,
             }
+""":obj:`dict` : Dictionary to activation functions.
+
+- :obj:`'ELU'`: :obj:`torch.nn.ELU`
+- :obj:`'hardshrink'`: :obj:`torch.nn.Hardshrink`
+- :obj:`'hardsigmoid'`: :obj:`torch.nn.Hardsigmoid`
+- :obj:`'hardtanh'`: :obj:`torch.nn.Hardtanh`
+- :obj:`'hardswish'`: :obj:`torch.nn.Hardswish`
+- :obj:`'leakyReLU'`: :obj:`torch.nn.LeakyReLU`
+- :obj:`'logsigmoid'`: :obj:`torch.nn.LogSigmoid`
+- :obj:`'multihead'`: :obj:`torch.nn.MultiheadAttention`
+- :obj:`'PReLU'`: :obj:`torch.nn.PReLU`
+- :obj:`'ReLU'`: :obj:`torch.nn.ReLU`
+- :obj:`'ReLU6'`: :obj:`torch.nn.ReLU6`
+- :obj:`'RReLU'`: :obj:`torch.nn.RReLU`
+- :obj:`'SELU'`: :obj:`torch.nn.SELU`
+- :obj:`'CELU'`: :obj:`torch.nn.CELU`
+- :obj:`'GELU'`: :obj:`torch.nn.GELU`
+- :obj:`'sigmoid'`: :obj:`torch.nn.Sigmoid`
+- :obj:`'SiLU'`: :obj:`torch.nn.SiLU`
+- :obj:`'mish'`: :obj:`torch.nn.Mish`
+- :obj:`'softplus'`: :obj:`torch.nn.Softplus`
+- :obj:`'softshrink'`: :obj:`torch.nn.Softshrink`
+- :obj:`'tanh'`: :obj:`torch.nn.Tanh`
+- :obj:`'tanhshrink'`: :obj:`torch.nn.Tanhshrink`
+- :obj:`'threshold'`: :obj:`torch.nn.Threshold`
+"""
 
 class MultiLayerPerceptron(torch.nn.Module):
     """Vanilla multi-layer perceptron neural networks module.

From c96881e2cf5aa92edf6646c82f93f92e396a20b2 Mon Sep 17 00:00:00 2001
From: Kevin Chung <dreamer2368@gmail.com>
Date: Thu, 21 Nov 2024 16:58:38 -0800
Subject: [PATCH 15/15] minor

---
 src/lasdi/networks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lasdi/networks.py b/src/lasdi/networks.py
index 691f870..f15195f 100644
--- a/src/lasdi/networks.py
+++ b/src/lasdi/networks.py
@@ -471,7 +471,7 @@ def forward(self, x):
 
             - Forward mode: 3d array of shape :obj:`self.layer_sizes[-1]`,
               or 4d array of shape :obj:`[self.batch_reshape[0]] + self.layer_sizes[-1]`
-            - Backward mode: nd array of shape :attr:`batch_reshape`
+            - Backward mode: nd array of shape :attr:`data_shape` (equal to :attr:`batch_shape`)
         """
         if ((self.batch_reshape is not None) and (self.mode == CNN2D.Mode.Forward)):
             x = x.view(self.batch_reshape)