travisdesell · rahulkfernandes · Mar 2, 2026 · Feb 26, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/financial_loss_functions/config/hparams.json b/financial_loss_functions/config/hparams.json
@@ -1,45 +1,205 @@
-{
+{   
+    "seed": 50,
     "rolling_windows": {
-        "in_size": 200,
-        "out_size": 50,
+        "in_size": 120,
+        "out_size": 60,
         "stride": 1
     },
     "nn_models": {
         "BaseLSTM": {
             "model": {
-                "hidden_size": 256,
-                "num_layers": 2,
+                "hidden_size": 16,
+                "num_layers": 4,
                 "dropout": 0.2,
-                "equal_prior": true
+                "equal_prior": false
             },
             "optimizer": {
                 "lr": 1e-4,
-                "weight_decay": 1e-5
+                "weight_decay": 1e-2
             },
             "train" : {
-                "train_batch_size": 256,
-                "val_batch_size": 2,
+                "train_batch_size": 64,
+                "val_batch_size": 64,
                 "clip_grad_norm": 0.5,
-                "epochs": 100
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
             }
         },
         "AttentionLSTM": {
             "model": {
-                "hidden_size": 128,
-                "num_layers": 3,
+                "hidden_size": 16,
+                "num_layers": 4,
+                "attention_heads": 2,
+                "dropout": 0.2,
+                "equal_prior": false
+            },
+            "optimizer": {
+                "lr": 1e-4,
+                "weight_decay": 1e-2
+            },
+            "train": {
+                "train_batch_size": 64,
+                "val_batch_size": 64,
+                "clip_grad_norm": 0.5,
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
+            }
+        },
+        "TemporalTransformer": {
+            "model": {
+                "hidden_size": 16,
+                "num_layers": 2,
                 "attention_heads": 2,
                 "dropout": 0.2,
-                "equal_prior": true
+                "expansion_factor": 4,
+                "max_seq_len": 120
             },
             "optimizer": {
                 "lr": 1e-4,
-                "weight_decay": 1e-4
+                "weight_decay": 1e-2
             },
             "train": {
-                "train_batch_size": 256,
-                "val_batch_size": 2,
+                "train_batch_size": 64,
+                "val_batch_size": 64,
+                "clip_grad_norm": 0.1,
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
+            }
+        }, 
+        "TFT": {
+            "model": {
+                "hidden_size": 16,
+                "num_layers": 1,
+                "attention_heads": 2,
+                "dropout": 0.4,
+                "expansion_factor": 2,
+                "max_seq_len": 120
+            },
+            "optimizer": {
+                "lr": 1e-4,
+                "weight_decay": 1e-2
+            },
+            "train": {
+                "train_batch_size": 64,
+                "val_batch_size": 64,
                 "clip_grad_norm": 0.5,
-                "epochs": 100
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
+            }
+        },
+        "LSTMTransformer": {
+            "model": {
+                "hidden_size": 32,
+                "num_layers": 2,
+                "attention_heads": 2,
+                "dropout": 0.5,
+                "expansion_factor": 4, 
+                "max_seq_len": 120
+            },
+            "optimizer": {
+                "lr": 1e-5,
+                "weight_decay": 1e-3
+            },
+            "train": {
+                "train_batch_size": 64,
+                "val_batch_size": 64,
+                "clip_grad_norm": 0.5,
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-8
+            }
+        },
+        "InvertedAttentionLSTM": {
+            "model": {
+                "hidden_size": 16,
+                "num_layers": 4,
+                "attention_heads": 8,
+                "dropout": 0.5,
+                "max_seq_len": 120
+            },
+            "optimizer": {
+                "lr": 1e-4,
+                "weight_decay": 1e-3
+            },
+            "train": {
+                "train_batch_size": 64,
+                "val_batch_size": 64,
+                "clip_grad_norm": 0.5,
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
+            }
+        },
+        "DeformTime": {
+            "model": {
+                "seq_len": 120,
+                "e_layers": 2,
+                "d_layers": 2,
+                "d_model": 16,
+                "attention_heads": 4,
+                "kernel_size": 4,
+                "dropout": 0.2,
+                "n_reshape": 12,
+                "patch_len": 6,
+                "stride": 6
+            },
+            "optimizer": {
+                "lr": 1e-4,
+                "weight_decay": 1e-3
+            },
+            "train": {
+                "train_batch_size": 64,
+                "val_batch_size": 64,
+                "clip_grad_norm": 0.5,
+                "epochs": 200,
+                "early_stopping": true,
+                "early_stop_patience": 20,
+                "early_stop_min_delta": 1e-5
+            },
+            "scheduler": {
+                "factor": 0.5,
+                "patience": 10,
+                "min_lr": 1e-6
             }
         }
     },
@@ -68,13 +228,17 @@
             "lambda2": 0.1
         },
         "custom_loss_8" : {
-            "lambda1": 0.01,
-            "lambda2": 0.01,
-            "lambda3": 0.1
+            "log_ret_lambda": 0.01,
+            "cvar_lambda": 0.1,
+            "risk_p_lambda": 0.1
         },
         "custom_loss_9" : {
             "lambda1": 0.01,
             "lambda2": 0.1
+        },
+        "custom_loss_10" : {
+            "cvar_lambda": 0.1,
+            "risk_p_lambda": 0.1
         }
     },
     "trad_models": {

diff --git a/financial_loss_functions/config/paths.json b/financial_loss_functions/config/paths.json
@@ -7,9 +7,9 @@
         "crsp_dir": ""
     },
     "raw_files": {
-        "train": "combined_predictors_train.csv",
-        "val": "combined_predictors_validation.csv",
-        "test": "combined_predictors_test.csv"
+        "train": "crsp_train_2019.csv",
+        "val": "crsp_val_2021.csv",
+        "test": "crsp_test_2023.csv"
     },
     "processed_paths": {
         "returns_train": "data/processed/ret_train.csv",

diff --git a/...a/raw/sample/combined_predictors_test.csv → ...ctions/data/raw/sample/crsp_test_2023.csv b/...a/raw/sample/combined_predictors_test.csv → ...ctions/data/raw/sample/crsp_test_2023.csv
diff --git a/.../raw/sample/combined_predictors_train.csv → ...tions/data/raw/sample/crsp_train_2019.csv b/.../raw/sample/combined_predictors_train.csv → ...tions/data/raw/sample/crsp_train_2019.csv
diff --git a/...sample/combined_predictors_validation.csv → ...nctions/data/raw/sample/crsp_val_2021.csv b/...sample/combined_predictors_validation.csv → ...nctions/data/raw/sample/crsp_val_2021.csv
diff --git a/financial_loss_functions/requirements.txt b/financial_loss_functions/requirements.txt
@@ -12,4 +12,7 @@ seaborn==0.13.0
 statsmodels==0.14.5
 torch==2.9.1
 torchvision==0.24.1
-optuna==4.6.0
+optuna==4.6.0
+tqdm=4.67.1
+einops==0.8.2
+timm==1.0.25
diff --git a/financial_loss_functions/scripts/run_training_one.py b/financial_loss_functions/scripts/run_training_one.py
@@ -3,6 +3,10 @@
 import signal
 import argparse
 from src.utils.io import load_path_config, load_config
+
+# @author: Atharva Vaidya - This fallback helps in allowing unsupported MPS ops to run through CPU when DeformTime triggers them.
+os.environ.setdefault('PYTORCH_ENABLE_MPS_FALLBACK', '1')
+
 from src.training.pipeline import run_training_one_model
 
 _interrupted = False
@@ -123,4 +127,4 @@ def cleanup_on_interrupt():
         print(f"\nPipeline failed with error: {e}")
         import traceback
         traceback.print_exc()
-        sys.exit(1)
+        sys.exit(1)