dhruvsgarg · gdadlaney · Jan 21, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 28, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,5 @@
+# Agent Guidelines
+
+- Do not delete comments or refactor code, unless it is objectively wrong.
+- Minimize addition of excess conditionals like None checks if it isn't adding value to the codebase. No usage/ incomplete or undersired result. When in doubt, confirm with yes, no questions.
+- Assume that code is being run only on a GPU env. No need to check for torch.cuda.is_available() or similar checks.
diff --git a/lib/python/examples/fwdllm/README.md b/lib/python/examples/fwdllm/README.md
@@ -0,0 +1,95 @@
+# Aggregator Class Hierarchy in FwdLLM
+
+This document outlines the inheritance hierarchy of the Aggregator classes used in FwdLLM to help understand where specific methods are defined and why some might be redundant.
+
+## Hierarchy Overview
+```mermaid
+classDiagram
+    class SyncTopAgg ["flame/mode/horizontal/syncfl/top_aggregator.py:TopAggregator"] {
+        +internal_init()
+        +_aggregate_weights()
+        +_distribute_weights()
+    }
+
+    class AsyncTopAgg ["flame/mode/horizontal/asyncfl/top_aggregator.py:TopAggregator"] {
+        +oracular_trainer_avail_check(end: str) : bool
+        +hearbeat_trainer_avail_check(end: str) : bool
+        +_aggregate_weights(tag: str)
+    }
+
+    class FwdLLMAggregator ["flame/mode/horizontal/syncfl/fwdllm_aggregator.py:TopAggregator"] {
+        +oracular_trainer_avail_check(end: str) : bool
+        +hearbeat_trainer_avail_check(end: str) : bool
+        +_aggregate_grads_sync(tag: str)
+        +_aggregate_grads_async(tag: str)
+    }
+
+    class FedSGDAggregator ["examples/fwdllm/aggregator/FedSgdAggregator.py:FedSGDAggregator"] {
+        +aggregate(current_round)
+    }
+
+    SyncTopAgg <|-- AsyncTopAgg : Inherits
+    AsyncTopAgg <|-- FwdLLMAggregator : Inherits
+    FwdLLMAggregator <|-- FedSGDAggregator : Inherits
+```
+
+## Aggregation & Distribution Hierarchy (Sync & Async)
+
+Here is the function call hierarchy for both the aggregate and distribute methods across their async and sync workflows. Methods decorated with `@timer_decorator` are marked with a ⏱️.
+
+### 1. `_aggregate_weights(tag)`
+
+#### **Sync Flow** (`is_async == False`)
+```text
+_aggregate_weights
+└── ⏱️ _aggregate_grads_sync
+    ├── ⏱️ collect_and_accumulate_grads (Loops multiple times until agg goal is met)
+    │   └── ⏱️ _process_single_trainer_message
+    │       └── aggregate_grads_from_trainers
+    │
+    └── ⏱️ _process_aggregation_goal_met (Called if agg goal is reached)
+        ├── add_local_trained_result
+        ├── ⏱️ aggregate (Computes variance/model update)
+        ├── ⏱️ eval_model (Called if variance is good)
+        └── ⏱️ _force_cuda_memory_cleanup
+```
+
+#### **Async Flow** (`is_async == True`)
+```text
+_aggregate_weights
+└── ⏱️ _aggregate_grads_async
+    ├── ⏱️ _process_single_trainer_message (Processes precisely one received message)
+    │   └── aggregate_grads_from_trainers
+    │
+    └── ⏱️ _process_aggregation_goal_met (Called if agg goal is reached)
+        ├── add_local_trained_result
+        ├── ⏱️ aggregate (Computes variance/model update)
+        ├── ⏱️ eval_model (Called if variance is good)
+        └── ⏱️ _force_cuda_memory_cleanup
+```
+
+---
+
+### 2. `_distribute_weights(tag, task_to_perform)`
+
+#### **Sync Flow** (`is_async == False`)
+```text
+_distribute_weights
+└── ⏱️ _distribute_weights_sync
+    ├── (Waits for peers and fetches ends blockingly)
+    ├── ⏱️ _prepare_distribution_payload (Called if variance is good)
+    │   ├── get_trainable_param_state_dict
+    │   └── aggregate_grad_pool
+    └── (Sends the pre-computed payload or the variance requests to the ends)
+```
+
+#### **Async Flow** (`is_async == True`)
+```text
+_distribute_weights
+└── ⏱️ _distribute_weights_async
+    ├── (Uses selector to fetch valid subsets of ends over AsyncOortSelector)
+    ├── ⏱️ _prepare_distribution_payload (Called if variance is good)
+    │   ├── get_trainable_param_state_dict
+    │   └── aggregate_grad_pool
+    └── (Sends the pre-computed payload or the variance requests to the ends)
+```
diff --git a/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py b/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py
@@ -53,7 +53,6 @@ def __init__(
 
         for idx in range(self.worker_num):
             self.flag_client_model_uploaded_dict[idx] = False
-        # ratio is one and the comm_round is 30 rn
         self.warmup_rounds = math.ceil(self.args.comm_round * self.args.warmup_ratio)
 
         # 之前的v不够，暂存在cached_v

diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json
@@ -87,7 +87,7 @@
         },
         "aggGoal": 10,
         "rounds": 300,
-        "rejectStaleUpdates": true,
+        "rejectStaleUpdates": false,
         "inc_model_version_per_data_id": true
     },
     "baseModel": {
@@ -103,12 +103,17 @@
         "uri": ""
     },
     "selector": {
-        "sort": "random",
+        "sort": "async_oort",
         "kwargs": {
             "//": "c: concurrency level",
             "c": 30,
-            "minInitialTrainers": 10,
-            "k": 10
+            "aggGoal": 10,
+            "evalGoalFactor": 0.5,
+            "selectType": "default",
+            "roundNudgeType": "last_eval",
+            "minInitialTrainers": 5,
+            "k": 10,
+            "is_async": true
         }
     },
     "optimizer": {

diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json
@@ -50,7 +50,7 @@
         "comm_round": 3000,
         "ci": 0,
         "dataset": "agnews",
-                "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
+        "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
         "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5",
         "partition_method": "uniform",
         "fl_algorithm": "FedFwd",
@@ -71,7 +71,7 @@
         "var_control": true,
         "perturbation_sampling": true,
         "evaluate_during_training_steps": 100,
-        "fp16": false,
+        "fp16": true,
         "output_dir": "/tmp/",
         "is_debug_mode": 0,
         "fedprox_mu": 1,
@@ -81,7 +81,7 @@
         "manual_seed": 42,
         "client_num_in_total": 0,
         "warmup_ratio": 1,
-        "training_delay_enabled": "True",
+        "training_delay_enabled": "False",
         "training_delay_s": "4.0",
         "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (319273, 'AVL_EVAL'), (335119, 'AVL_TRAIN'), (338559, 'AVL_EVAL'), (344104, 'UN_AVL'), (345589, 'AVL_TRAIN'), (345593, 'UN_AVL'), (345607, 'AVL_TRAIN'), (348282, 'AVL_EVAL'), (362502, 'UN_AVL'), (385856, 'AVL_TRAIN'), (388120, 'AVL_EVAL'), (390794, 'AVL_TRAIN'), (393354, 'AVL_EVAL'), (398433, 'AVL_TRAIN'), (400757, 'UN_AVL')]",
         "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (319273, 'AVL_EVAL'), (327767, 'UN_AVL'), (335119, 'AVL_TRAIN'), (338559, 'UN_AVL'), (345589, 'AVL_TRAIN'), (345593, 'UN_AVL'), (345607, 'AVL_TRAIN'), (348282, 'UN_AVL'), (385856, 'AVL_TRAIN'), (388120, 'UN_AVL'), (390794, 'AVL_TRAIN'), (393354, 'AVL_EVAL'), (398433, 'AVL_TRAIN'), (400757, 'UN_AVL')]",
@@ -94,7 +94,6 @@
         "avl_events_syn_0": "[(0, 'AVL_TRAIN')]",
         "avl_events_syn_20": "[(0, 'AVL_TRAIN'), (16200, 'UN_AVL'), (16800, 'AVL_TRAIN'), (18600, 'UN_AVL'), (19200, 'AVL_TRAIN'), (25800, 'UN_AVL'), (27000, 'AVL_TRAIN'), (30000, 'UN_AVL'), (30600, 'AVL_TRAIN'), (36000, 'UN_AVL'), (37200, 'AVL_TRAIN'), (46200, 'UN_AVL'), (46800, 'AVL_TRAIN'), (51000, 'UN_AVL'), (51600, 'AVL_TRAIN'), (67200, 'UN_AVL'), (67800, 'AVL_TRAIN'), (73800, 'UN_AVL'), (74400, 'AVL_TRAIN'), (85200, 'UN_AVL')]",
         "avl_events_syn_50": "[(0, 'AVL_TRAIN'), (1200, 'UN_AVL'), (2400, 'AVL_TRAIN'), (4200, 'UN_AVL'), (6600, 'AVL_TRAIN'), (7200, 'UN_AVL'), (8400, 'AVL_TRAIN'), (9000, 'UN_AVL'), (9600, 'AVL_TRAIN'), (10200, 'UN_AVL'), (10800, 'AVL_TRAIN'), (11400, 'UN_AVL'), (12000, 'AVL_TRAIN'), (13200, 'UN_AVL'), (13800, 'AVL_TRAIN'), (15000, 'UN_AVL'), (15600, 'AVL_TRAIN'), (16200, 'UN_AVL'), (19200, 'AVL_TRAIN'), (19800, 'UN_AVL'), (20400, 'AVL_TRAIN'), (21000, 'UN_AVL'), (22200, 'AVL_TRAIN'), (23400, 'UN_AVL'), (24000, 'AVL_TRAIN'), (24600, 'UN_AVL'), (27000, 'AVL_TRAIN'), (27600, 'UN_AVL'), (28200, 'AVL_TRAIN'), (28800, 'UN_AVL'), (29400, 'AVL_TRAIN'), (30000, 'UN_AVL'), (30600, 'AVL_TRAIN'), (31200, 'UN_AVL'), (31800, 'AVL_TRAIN'), (36000, 'UN_AVL'), (37200, 'AVL_TRAIN'), (39600, 'UN_AVL'), (40200, 'AVL_TRAIN'), (40800, 'UN_AVL'), (41400, 'AVL_TRAIN'), (43800, 'UN_AVL'), (44400, 'AVL_TRAIN'), (46200, 'UN_AVL'), (47400, 'AVL_TRAIN'), (48000, 'UN_AVL'), (48600, 'AVL_TRAIN'), (49200, 'UN_AVL'), (49800, 'AVL_TRAIN'), (51000, 'UN_AVL'), (52800, 'AVL_TRAIN'), (53400, 'UN_AVL'), (55200, 'AVL_TRAIN'), (55800, 'UN_AVL'), (57000, 'AVL_TRAIN'), (58800, 'UN_AVL'), (59400, 'AVL_TRAIN'), (62400, 'UN_AVL'), (63000, 'AVL_TRAIN'), (64800, 'UN_AVL'), (66600, 'AVL_TRAIN'), (67200, 'UN_AVL'), (67800, 'AVL_TRAIN'), (68400, 'UN_AVL'), (69000, 'AVL_TRAIN'), (69600, 'UN_AVL'), (70200, 'AVL_TRAIN'), (71400, 'UN_AVL'), (72000, 'AVL_TRAIN'), (73200, 'UN_AVL'), (75000, 'AVL_TRAIN'), (75600, 'UN_AVL'), (76200, 'AVL_TRAIN'), (78000, 'UN_AVL'), (78600, 'AVL_TRAIN'), (79800, 'UN_AVL'), (81000, 'AVL_TRAIN'), (81600, 'UN_AVL'), (84000, 'AVL_TRAIN'), (84600, 'UN_AVL')]"
-
     },
     "baseModel": {
         "name": "",

diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json
@@ -50,7 +50,7 @@
         "comm_round": 3000,
         "ci": 0,
         "dataset": "agnews",
-                "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
+        "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
         "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5",
         "partition_method": "uniform",
         "fl_algorithm": "FedFwd",
@@ -71,7 +71,7 @@
         "var_control": true,
         "perturbation_sampling": true,
         "evaluate_during_training_steps": 100,
-        "fp16": false,
+        "fp16": true,
         "output_dir": "/tmp/",
         "is_debug_mode": 0,
         "fedprox_mu": 1,
@@ -81,7 +81,7 @@
         "manual_seed": 42,
         "client_num_in_total": 0,
         "warmup_ratio": 1,
-        "training_delay_enabled": "True",
+        "training_delay_enabled": "False",
         "training_delay_s": "16.0",
         "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (250, 'UN_AVL'), (1053, 'AVL_TRAIN'), (7162, 'AVL_EVAL'), (25536, 'UN_AVL'), (25536, 'AVL_TRAIN'), (28540, 'AVL_EVAL'), (41113, 'AVL_TRAIN'), (46728, 'AVL_EVAL'), (46729, 'AVL_TRAIN'), (49430, 'AVL_EVAL'), (70897, 'UN_AVL'), (82393, 'AVL_TRAIN'), (82416, 'UN_AVL'), (82418, 'AVL_TRAIN'), (82771, 'UN_AVL'), (82773, 'AVL_TRAIN'), (85462, 'UN_AVL'), (85464, 'AVL_TRAIN'), (87018, 'UN_AVL'), (87019, 'AVL_TRAIN'), (87422, 'UN_AVL'), (87424, 'AVL_TRAIN'), (87593, 'UN_AVL'), (87593, 'AVL_TRAIN'), (87603, 'UN_AVL'), (87881, 'AVL_TRAIN'), (87942, 'UN_AVL'), (87986, 'AVL_TRAIN'), (88540, 'UN_AVL'), (88678, 'AVL_TRAIN'), (88944, 'UN_AVL'), (90360, 'AVL_EVAL'), (90360, 'AVL_TRAIN'), (90381, 'AVL_EVAL'), (91048, 'UN_AVL'), (99745, 'AVL_TRAIN'), (100257, 'UN_AVL'), (100258, 'AVL_TRAIN'), (100557, 'UN_AVL'), (100558, 'AVL_TRAIN'), (104033, 'UN_AVL'), (104048, 'AVL_TRAIN'), (105450, 'UN_AVL'), (105741, 'AVL_TRAIN'), (107180, 'UN_AVL'), (109017, 'AVL_TRAIN'), (112236, 'UN_AVL'), (112236, 'AVL_TRAIN'), (136022, 'AVL_EVAL'), (160213, 'UN_AVL'), (164156, 'AVL_TRAIN'), (164594, 'UN_AVL'), (164651, 'AVL_TRAIN'), (166084, 'AVL_EVAL'), (173372, 'AVL_TRAIN'), (179115, 'AVL_EVAL'), (190676, 'AVL_TRAIN'), (214769, 'AVL_EVAL'), (214770, 'AVL_TRAIN'), (214773, 'AVL_EVAL'), (236404, 'UN_AVL'), (236404, 'AVL_TRAIN'), (238132, 'AVL_EVAL'), (240851, 'AVL_TRAIN'), (240936, 'AVL_EVAL'), (240975, 'AVL_TRAIN'), (247016, 'AVL_EVAL'), (261220, 'AVL_TRAIN'), (261795, 'AVL_EVAL'), (261795, 'AVL_TRAIN'), (264001, 'AVL_EVAL'), (264002, 'AVL_TRAIN'), (264003, 'AVL_EVAL'), (305427, 'AVL_TRAIN'), (310176, 'AVL_EVAL'), (310210, 'AVL_TRAIN'), (310213, 'AVL_EVAL'), (310765, 'AVL_TRAIN'), (312095, 'AVL_EVAL'), (312095, 'AVL_TRAIN'), (312105, 'AVL_EVAL'), (317629, 'AVL_TRAIN'), (317635, 'AVL_EVAL'), (317636, 'AVL_TRAIN'), (317637, 'AVL_EVAL'), (317637, 'AVL_TRAIN'), (317638, 'AVL_EVAL'), (317638, 'AVL_TRAIN'), (317639, 'AVL_EVAL'), (317639, 'AVL_TRAIN'), (317642, 'AVL_EVAL'), (317642, 'AVL_TRAIN'), (317645, 'AVL_EVAL'), (317647, 'AVL_TRAIN'), (317659, 'AVL_EVAL'), (317678, 'AVL_TRAIN'), (318323, 'AVL_EVAL'), (332409, 'UN_AVL'), (336461, 'AVL_TRAIN'), (341841, 'AVL_EVAL'), (341846, 'AVL_TRAIN'), (341992, 'AVL_EVAL'), (342694, 'AVL_TRAIN'), (344375, 'AVL_EVAL'), (346579, 'AVL_TRAIN'), (347263, 'AVL_EVAL'), (357902, 'UN_AVL'), (364184, 'AVL_TRAIN'), (364887, 'UN_AVL'), (386041, 'AVL_TRAIN'), (394420, 'AVL_EVAL'), (418953, 'UN_AVL'), (421325, 'AVL_TRAIN'), (425216, 'AVL_EVAL'), (425217, 'AVL_TRAIN'), (425541, 'AVL_EVAL'), (425542, 'AVL_TRAIN'), (425543, 'AVL_EVAL'), (434131, 'AVL_TRAIN'), (435716, 'AVL_EVAL'), (435720, 'AVL_TRAIN'), (438737, 'AVL_EVAL'), (449946, 'UN_AVL'), (472426, 'AVL_TRAIN'), (473276, 'UN_AVL'), (473287, 'AVL_TRAIN'), (474779, 'AVL_EVAL'), (475599, 'AVL_TRAIN'), (478630, 'AVL_EVAL'), (481838, 'AVL_TRAIN'), (481859, 'AVL_EVAL'), (483147, 'UN_AVL')]",
         "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1053, 'AVL_TRAIN'), (7162, 'AVL_EVAL'), (16664, 'UN_AVL'), (25536, 'AVL_TRAIN'), (28540, 'UN_AVL'), (41113, 'AVL_TRAIN'), (46728, 'AVL_EVAL'), (46729, 'AVL_TRAIN'), (49430, 'AVL_EVAL'), (62206, 'UN_AVL'), (82393, 'AVL_TRAIN'), (82416, 'UN_AVL'), (82418, 'AVL_TRAIN'), (82771, 'UN_AVL'), (82773, 'AVL_TRAIN'), (85462, 'UN_AVL'), (85464, 'AVL_TRAIN'), (87018, 'UN_AVL'), (87019, 'AVL_TRAIN'), (87422, 'UN_AVL'), (87424, 'AVL_TRAIN'), (87593, 'UN_AVL'), (87593, 'AVL_TRAIN'), (87603, 'UN_AVL'), (87881, 'AVL_TRAIN'), (87942, 'UN_AVL'), (87986, 'AVL_TRAIN'), (88540, 'UN_AVL'), (88678, 'AVL_TRAIN'), (88944, 'UN_AVL'), (90360, 'AVL_TRAIN'), (90381, 'UN_AVL'), (99745, 'AVL_TRAIN'), (100257, 'UN_AVL'), (100258, 'AVL_TRAIN'), (100557, 'UN_AVL'), (100558, 'AVL_TRAIN'), (104033, 'UN_AVL'), (104048, 'AVL_TRAIN'), (105450, 'UN_AVL'), (105741, 'AVL_TRAIN'), (107180, 'UN_AVL'), (109017, 'AVL_TRAIN'), (112236, 'UN_AVL'), (112236, 'AVL_TRAIN'), (136022, 'AVL_EVAL'), (148169, 'UN_AVL'), (164156, 'AVL_TRAIN'), (164594, 'UN_AVL'), (164651, 'AVL_TRAIN'), (166084, 'UN_AVL'), (173372, 'AVL_TRAIN'), (179115, 'AVL_EVAL'), (190555, 'UN_AVL'), (190676, 'AVL_TRAIN'), (214769, 'AVL_EVAL'), (214770, 'AVL_TRAIN'), (214773, 'AVL_EVAL'), (222241, 'UN_AVL'), (236404, 'AVL_TRAIN'), (238132, 'UN_AVL'), (240851, 'AVL_TRAIN'), (240936, 'UN_AVL'), (240975, 'AVL_TRAIN'), (247016, 'AVL_EVAL'), (261220, 'AVL_TRAIN'), (261795, 'AVL_EVAL'), (261795, 'AVL_TRAIN'), (264001, 'AVL_EVAL'), (264002, 'AVL_TRAIN'), (264003, 'AVL_EVAL'), (274444, 'UN_AVL'), (305427, 'AVL_TRAIN'), (310176, 'AVL_EVAL'), (310210, 'AVL_TRAIN'), (310213, 'AVL_EVAL'), (310765, 'AVL_TRAIN'), (312095, 'AVL_EVAL'), (312095, 'AVL_TRAIN'), (312105, 'AVL_EVAL'), (317629, 'AVL_TRAIN'), (317635, 'AVL_EVAL'), (317636, 'AVL_TRAIN'), (317637, 'AVL_EVAL'), (317637, 'AVL_TRAIN'), (317638, 'AVL_EVAL'), (317638, 'AVL_TRAIN'), (317639, 'AVL_EVAL'), (317639, 'AVL_TRAIN'), (317642, 'AVL_EVAL'), (317642, 'AVL_TRAIN'), (317645, 'AVL_EVAL'), (317647, 'AVL_TRAIN'), (317659, 'AVL_EVAL'), (317678, 'AVL_TRAIN'), (318323, 'AVL_EVAL'), (332409, 'UN_AVL'), (336461, 'AVL_TRAIN'), (341841, 'AVL_EVAL'), (341846, 'AVL_TRAIN'), (341992, 'AVL_EVAL'), (342694, 'AVL_TRAIN'), (344375, 'AVL_EVAL'), (346579, 'AVL_TRAIN'), (347263, 'AVL_EVAL'), (352390, 'UN_AVL'), (364184, 'AVL_TRAIN'), (364887, 'UN_AVL'), (386041, 'AVL_TRAIN'), (394420, 'AVL_EVAL'), (417347, 'UN_AVL'), (421325, 'AVL_TRAIN'), (425216, 'AVL_EVAL'), (425217, 'AVL_TRAIN'), (425541, 'AVL_EVAL'), (425542, 'AVL_TRAIN'), (425543, 'AVL_EVAL'), (433136, 'UN_AVL'), (434131, 'AVL_TRAIN'), (435716, 'AVL_EVAL'), (435720, 'AVL_TRAIN'), (438737, 'AVL_EVAL'), (449946, 'UN_AVL'), (472426, 'AVL_TRAIN'), (473276, 'UN_AVL'), (473287, 'AVL_TRAIN'), (474779, 'UN_AVL'), (475599, 'AVL_TRAIN'), (478630, 'UN_AVL'), (481838, 'AVL_TRAIN'), (481859, 'UN_AVL')]",

diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json
@@ -50,7 +50,7 @@
         "comm_round": 3000,
         "ci": 0,
         "dataset": "agnews",
-                "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
+        "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5",
         "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5",
         "partition_method": "uniform",
         "fl_algorithm": "FedFwd",
@@ -71,7 +71,7 @@
         "var_control": true,
         "perturbation_sampling": true,
         "evaluate_during_training_steps": 100,
-        "fp16": false,
+        "fp16": true,
         "output_dir": "/tmp/",
         "is_debug_mode": 0,
         "fedprox_mu": 1,
@@ -81,7 +81,7 @@
         "manual_seed": 42,
         "client_num_in_total": 0,
         "warmup_ratio": 1,
-        "training_delay_enabled": "True",
+        "training_delay_enabled": "False",
         "training_delay_s": "11.0",
         "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12839, 'AVL_EVAL'), (62636, 'AVL_TRAIN'), (71903, 'AVL_EVAL'), (135083, 'UN_AVL'), (156109, 'AVL_TRAIN'), (164237, 'AVL_EVAL'), (189998, 'AVL_TRAIN'), (191541, 'AVL_EVAL'), (191547, 'AVL_TRAIN'), (191550, 'AVL_EVAL'), (191876, 'AVL_TRAIN'), (204944, 'AVL_EVAL'), (204947, 'AVL_TRAIN'), (205073, 'AVL_EVAL'), (205077, 'AVL_TRAIN'), (205079, 'AVL_EVAL'), (205376, 'AVL_TRAIN'), (205381, 'AVL_EVAL'), (205386, 'AVL_TRAIN'), (205425, 'AVL_EVAL'), (205428, 'AVL_TRAIN'), (205640, 'AVL_EVAL'), (205644, 'AVL_TRAIN'), (205651, 'AVL_EVAL'), (210583, 'AVL_TRAIN'), (223064, 'AVL_EVAL'), (276050, 'UN_AVL'), (326481, 'AVL_EVAL'), (331715, 'UN_AVL')]",
         "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12839, 'AVL_EVAL'), (50151, 'UN_AVL'), (62636, 'AVL_TRAIN'), (71903, 'AVL_EVAL'), (102525, 'UN_AVL'), (156109, 'AVL_TRAIN'), (164237, 'AVL_EVAL'), (186714, 'UN_AVL'), (189998, 'AVL_TRAIN'), (191541, 'UN_AVL'), (191547, 'AVL_TRAIN'), (191550, 'UN_AVL'), (191876, 'AVL_TRAIN'), (204944, 'AVL_EVAL'), (204947, 'AVL_TRAIN'), (205073, 'AVL_EVAL'), (205077, 'AVL_TRAIN'), (205079, 'AVL_EVAL'), (205376, 'AVL_TRAIN'), (205381, 'AVL_EVAL'), (205386, 'AVL_TRAIN'), (205425, 'AVL_EVAL'), (205428, 'AVL_TRAIN'), (205640, 'AVL_EVAL'), (205644, 'AVL_TRAIN'), (205651, 'AVL_EVAL'), (210583, 'AVL_TRAIN'), (223064, 'AVL_EVAL'), (262901, 'UN_AVL'), (326481, 'AVL_EVAL'), (331715, 'UN_AVL')]",