HariDaCoder · HariDaCoder · Sep 24, 2025 · Oct 1, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/environment.yml b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - defaults
 dependencies:
   - pip=21.2.4
-  - python=3.8.12
+  - python=3.9
   - pytorch=1.11.0
   - pip:
     - jupyter==1.0.0

diff --git a/src/conf/ar2.yaml b/src/conf/ar2.yaml
@@ -0,0 +1,15 @@
+defaults:
+  - base
+
+training:
+  data: ar2
+  data_kwargs:
+    rho1: 0.5
+    rho2: 0.3
+    noise_std: 0.1
+
+  curriculum:
+    points:
+      start: 5
+      end: 40
+      step: 5
diff --git a/src/conf/base.yaml b/src/conf/base.yaml
@@ -9,7 +9,7 @@ model:
 training:
     data: gaussian
     task_kwargs: {}
-    batch_size: 64
+    batch_size: 256
     learning_rate: 0.0001
     save_every_steps: 1000
     keep_every_steps: 100000

diff --git a/src/conf/case1_w_sparse_uniform_x.yaml b/src/conf/case1_w_sparse_uniform_x.yaml
@@ -0,0 +1,37 @@
+inherit:
+  - base.yaml
+
+model:
+  n_dims: 20
+  n_positions: 101
+
+training:
+  task: sparse_regression_killer
+  task_kwargs:
+    k_sparse: 2
+    scale: 1.0
+  data: uniform
+  data_kwargs: {}
+  curriculum:
+    dims:
+      start: 5
+      end: 20
+      inc: 1
+      interval: 2000
+    points:
+      start: 11
+      end: 41
+      inc: 2
+      interval: 2000
+  batch_size: 64
+  learning_rate: 0.0001
+  train_steps: 500001
+
+out_dir: ../models/sparse_regression_killer
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "case1_sparse_regression"
+    notes: "Case 1: Sparse Regression - only k=2 dims non-zero - Ridge Trap"
+    log_every_steps: 100
diff --git a/src/conf/case2.yaml b/src/conf/case2.yaml
@@ -0,0 +1,38 @@
+inherit: 
+    - base.yaml
+
+model:
+    n_dims: 20
+    n_positions: 101
+
+training:
+    task: heavy_tail_noise_killer
+    task_kwargs:
+        noise_type: "t-student"
+        df: 3.0
+        noise_scale: 0.5
+    data: gaussian
+    data_kwargs: {}
+    curriculum:
+        dims:
+            start: 5
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 11
+            end: 41
+            inc: 2
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+
+out_dir: ../models/heavy_tail_noise_killer
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "case2_heavy_tail_t_student"
+    notes: "Case 2: Heavy-tail noise (t-student df=3, scale=0.5) - OLS Enemy"
+    log_every_steps: 100
diff --git a/src/conf/case4.yaml b/src/conf/case4.yaml
@@ -0,0 +1,36 @@
+inherit: 
+    - base.yaml
+
+model:
+    n_dims: 20
+    n_positions: 101
+
+training:
+    task: mixture_tasks_killer
+    task_kwargs:
+        scale: 1.0
+    data: gaussian
+    data_kwargs: {}
+    curriculum:
+        dims:
+            start: 5
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 11
+            end: 41
+            inc: 2
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+
+out_dir: ../models/mixture_tasks_killer
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "case4_mixture_tasks"
+    notes: "Case 4: Mixture of Tasks - 50% y=w^T x, 50% y=-w^T x - Averaging Death"
+    log_every_steps: 100
diff --git a/src/conf/case5.yaml b/src/conf/case5.yaml
@@ -0,0 +1,38 @@
+inherit: 
+    - base.yaml
+
+model:
+    n_dims: 20
+    n_positions: 101
+
+training:
+    task: transfer_tradeoff_task
+    task_kwargs:
+        prior_type: "mixture_gaussian"
+        mixture_std: 2.0
+        scale: 1.0
+    data: gaussian
+    data_kwargs: {}
+    curriculum:
+        dims:
+            start: 20
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 5
+            end: 10
+            inc: 1
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+
+out_dir: ../models/transfer_tradeoff_task
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "case5_transfer_tradeoff"
+    notes: "Case 5: Transfer Tradeoff - p×N experiment (Wakayama) - Mixture Gaussian prior"
+    log_every_steps: 100
diff --git a/src/conf/case_3.yaml b/src/conf/case_3.yaml
@@ -0,0 +1,41 @@
+inherit: 
+    - base.yaml
+
+model:
+    n_dims: 20
+    n_positions: 101
+
+training:
+    task: bounded_support_killer
+    task_kwargs:
+        rate: 1.0
+        scale: 1.0
+    # Use positive-only input distribution
+    data: uniform 
+    data_kwargs: {}
+    # data: exponential
+    # data_kwargs:
+    #     rate: 1.0
+    curriculum:
+        dims:
+            start: 5
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 11
+            end: 41
+            inc: 2
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+
+out_dir: ../models/bounded_support_killer
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "case3_bounded_support"
+    notes: "Case 3: Bounded Support - w~Exp(1), x~Exp(1) - Sign Constraint"
+    log_every_steps: 100
diff --git a/src/conf/exponential_weighted_regression.yaml b/src/conf/exponential_weighted_regression.yaml
@@ -0,0 +1,43 @@
+inherit: 
+    - base.yaml
+
+model:
+    family: gpt2
+    n_dims: 20
+    n_embd: 128
+    n_head: 8
+    n_layer: 4
+    n_positions: 101
+
+training:
+    task: exponential_weighted_regression
+    task_kwargs:
+        rate: 1.0        # exponential distribution rate parameter
+        scale: 1.0
+    data: gaussian
+    data_kwargs: {}
+    curriculum:
+        dims:
+            start: 5
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 11
+            end: 41
+            inc: 2
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+    save_every_steps: 100
+    keep_every_steps: 10000
+
+out_dir: /content/models/exponential_weighted_regression
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "exponential_weights_experiment"
+    notes: "Training with exponential-distributed weights (non-uniform on hypersphere)"
+    log_every_steps: 100
diff --git a/src/conf/laplace_weighted_regression.yaml b/src/conf/laplace_weighted_regression.yaml
@@ -0,0 +1,43 @@
+inherit: 
+    - base.yaml
+
+model:
+    family: gpt2
+    n_dims: 20
+    n_embd: 128
+    n_head: 8
+    n_layer: 4
+    n_positions: 101
+
+training:
+    task: laplace_weighted_regression
+    task_kwargs:
+        weight_scale: 1.0        # laplace distribution weight scale parameter
+        scale: 1.0
+    data: gaussian
+    data_kwargs: {}
+    curriculum:
+        dims:
+            start: 5
+            end: 20
+            inc: 1
+            interval: 2000
+        points:
+            start: 11
+            end: 41
+            inc: 2
+            interval: 2000
+    batch_size: 64
+    learning_rate: 0.0001
+    train_steps: 500001
+    save_every_steps: 100
+    keep_every_steps: 10000
+
+out_dir: /content/models/laplace_weighted_regression
+
+wandb:
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "laplace_weights_experiment"
+    notes: "Training with laplace-distributed weights (non-uniform on hypersphere)"
+    log_every_steps: 100
diff --git a/src/conf/linear_regression.yaml b/src/conf/linear_regression.yaml
@@ -10,7 +10,15 @@ training:
             inc: 2
             interval: 2000
 
-out_dir: ../models/linear_regression
+# out_dir: ../models/linear_regression
+out_dir: D:\Henry-Projects\ChestXray\data\in-context-learning\models\linear_regression
+
 
 wandb:
-    name: "linear_regression_standard"
+    project: "in-context-training"
+    entity: "hai-trinh220970-ho-chi-minh-city-university-of-technology"
+    name: "noisy_linear_regression"
+    notes: "Training with laplace-distributed weights (non-uniform on hypersphere)"
+    log_every_steps: 100
+
+
diff --git a/src/conf/lr_wx.yaml b/src/conf/lr_wx.yaml
@@ -0,0 +1,31 @@
+model:
+  family: gpt2
+  n_dims: 20
+  n_embd: 256
+  n_head: 12
+  n_layer: 8
+  n_positions: 101
+
+training:
+  batch_size: 64
+  curriculum:
+    dims:
+      start: 5
+      end: 20
+      inc: 1
+      interval: 2000
+    points:
+      start: 11
+      end: 41
+      inc: 2
+      interval: 2000
+  learning_rate: 0.0001
+  train_steps: 500001
+  data: tstudent         # ví dụ: gaussian, uniform, laplace, tstudent, cauchy, poisson, rayleigh
+  task: linear_regression
+  task_kwargs:
+    w_distribution: ${w_distribution}   # ví dụ: gaussian, uniform, laplace, tstudent, cauchy, poisson, rayleigh
+
+wandb:
+  project: in-context-training
+  name: linear_regression_custom