diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..dfdfd6005 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,5 @@ +# Agent Guidelines + +- Do not delete comments or refactor code, unless it is objectively wrong. +- Minimize addition of excess conditionals like None checks if it isn't adding value to the codebase. No usage/ incomplete or undersired result. When in doubt, confirm with yes, no questions. +- Assume that code is being run only on a GPU env. No need to check for torch.cuda.is_available() or similar checks. diff --git a/lib/python/examples/fwdllm/README.md b/lib/python/examples/fwdllm/README.md new file mode 100644 index 000000000..fec2b726f --- /dev/null +++ b/lib/python/examples/fwdllm/README.md @@ -0,0 +1,95 @@ +# Aggregator Class Hierarchy in FwdLLM + +This document outlines the inheritance hierarchy of the Aggregator classes used in FwdLLM to help understand where specific methods are defined and why some might be redundant. + +## Hierarchy Overview +```mermaid +classDiagram + class SyncTopAgg ["flame/mode/horizontal/syncfl/top_aggregator.py:TopAggregator"] { + +internal_init() + +_aggregate_weights() + +_distribute_weights() + } + + class AsyncTopAgg ["flame/mode/horizontal/asyncfl/top_aggregator.py:TopAggregator"] { + +oracular_trainer_avail_check(end: str) : bool + +hearbeat_trainer_avail_check(end: str) : bool + +_aggregate_weights(tag: str) + } + + class FwdLLMAggregator ["flame/mode/horizontal/syncfl/fwdllm_aggregator.py:TopAggregator"] { + +oracular_trainer_avail_check(end: str) : bool + +hearbeat_trainer_avail_check(end: str) : bool + +_aggregate_grads_sync(tag: str) + +_aggregate_grads_async(tag: str) + } + + class FedSGDAggregator ["examples/fwdllm/aggregator/FedSgdAggregator.py:FedSGDAggregator"] { + +aggregate(current_round) + } + + SyncTopAgg <|-- AsyncTopAgg : Inherits + AsyncTopAgg <|-- FwdLLMAggregator : Inherits + FwdLLMAggregator <|-- FedSGDAggregator : Inherits +``` + +## Aggregation & Distribution Hierarchy (Sync & Async) + +Here is the function call hierarchy for both the aggregate and distribute methods across their async and sync workflows. Methods decorated with `@timer_decorator` are marked with a ⏱️. + +### 1. `_aggregate_weights(tag)` + +#### **Sync Flow** (`is_async == False`) +```text +_aggregate_weights +└── ⏱️ _aggregate_grads_sync + ├── ⏱️ collect_and_accumulate_grads (Loops multiple times until agg goal is met) + │ └── ⏱️ _process_single_trainer_message + │ └── aggregate_grads_from_trainers + │ + └── ⏱️ _process_aggregation_goal_met (Called if agg goal is reached) + ├── add_local_trained_result + ├── ⏱️ aggregate (Computes variance/model update) + ├── ⏱️ eval_model (Called if variance is good) + └── ⏱️ _force_cuda_memory_cleanup +``` + +#### **Async Flow** (`is_async == True`) +```text +_aggregate_weights +└── ⏱️ _aggregate_grads_async + ├── ⏱️ _process_single_trainer_message (Processes precisely one received message) + │ └── aggregate_grads_from_trainers + │ + └── ⏱️ _process_aggregation_goal_met (Called if agg goal is reached) + ├── add_local_trained_result + ├── ⏱️ aggregate (Computes variance/model update) + ├── ⏱️ eval_model (Called if variance is good) + └── ⏱️ _force_cuda_memory_cleanup +``` + +--- + +### 2. `_distribute_weights(tag, task_to_perform)` + +#### **Sync Flow** (`is_async == False`) +```text +_distribute_weights +└── ⏱️ _distribute_weights_sync + ├── (Waits for peers and fetches ends blockingly) + ├── ⏱️ _prepare_distribution_payload (Called if variance is good) + │ ├── get_trainable_param_state_dict + │ └── aggregate_grad_pool + └── (Sends the pre-computed payload or the variance requests to the ends) +``` + +#### **Async Flow** (`is_async == True`) +```text +_distribute_weights +└── ⏱️ _distribute_weights_async + ├── (Uses selector to fetch valid subsets of ends over AsyncOortSelector) + ├── ⏱️ _prepare_distribution_payload (Called if variance is good) + │ ├── get_trainable_param_state_dict + │ └── aggregate_grad_pool + └── (Sends the pre-computed payload or the variance requests to the ends) +``` diff --git a/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py b/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py index 945290c8f..c7cab9e00 100755 --- a/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py +++ b/lib/python/examples/fwdllm/aggregator/FedSgdAggregator.py @@ -53,7 +53,6 @@ def __init__( for idx in range(self.worker_num): self.flag_client_model_uploaded_dict[idx] = False - # ratio is one and the comm_round is 30 rn self.warmup_rounds = math.ceil(self.args.comm_round * self.args.warmup_ratio) # 之前的v不够,暂存在cached_v diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json index 32b12a9ce..f4a0a4b95 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json @@ -87,7 +87,7 @@ }, "aggGoal": 10, "rounds": 300, - "rejectStaleUpdates": true, + "rejectStaleUpdates": false, "inc_model_version_per_data_id": true }, "baseModel": { @@ -103,12 +103,17 @@ "uri": "" }, "selector": { - "sort": "random", + "sort": "async_oort", "kwargs": { "//": "c: concurrency level", "c": 30, - "minInitialTrainers": 10, - "k": 10 + "aggGoal": 10, + "evalGoalFactor": 0.5, + "selectType": "default", + "roundNudgeType": "last_eval", + "minInitialTrainers": 5, + "k": 10, + "is_async": true } }, "optimizer": { diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json index cbd2760b8..3bb8f356c 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_0.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (319273, 'AVL_EVAL'), (335119, 'AVL_TRAIN'), (338559, 'AVL_EVAL'), (344104, 'UN_AVL'), (345589, 'AVL_TRAIN'), (345593, 'UN_AVL'), (345607, 'AVL_TRAIN'), (348282, 'AVL_EVAL'), (362502, 'UN_AVL'), (385856, 'AVL_TRAIN'), (388120, 'AVL_EVAL'), (390794, 'AVL_TRAIN'), (393354, 'AVL_EVAL'), (398433, 'AVL_TRAIN'), (400757, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (319273, 'AVL_EVAL'), (327767, 'UN_AVL'), (335119, 'AVL_TRAIN'), (338559, 'UN_AVL'), (345589, 'AVL_TRAIN'), (345593, 'UN_AVL'), (345607, 'AVL_TRAIN'), (348282, 'UN_AVL'), (385856, 'AVL_TRAIN'), (388120, 'UN_AVL'), (390794, 'AVL_TRAIN'), (393354, 'AVL_EVAL'), (398433, 'AVL_TRAIN'), (400757, 'UN_AVL')]", @@ -94,7 +94,6 @@ "avl_events_syn_0": "[(0, 'AVL_TRAIN')]", "avl_events_syn_20": "[(0, 'AVL_TRAIN'), (16200, 'UN_AVL'), (16800, 'AVL_TRAIN'), (18600, 'UN_AVL'), (19200, 'AVL_TRAIN'), (25800, 'UN_AVL'), (27000, 'AVL_TRAIN'), (30000, 'UN_AVL'), (30600, 'AVL_TRAIN'), (36000, 'UN_AVL'), (37200, 'AVL_TRAIN'), (46200, 'UN_AVL'), (46800, 'AVL_TRAIN'), (51000, 'UN_AVL'), (51600, 'AVL_TRAIN'), (67200, 'UN_AVL'), (67800, 'AVL_TRAIN'), (73800, 'UN_AVL'), (74400, 'AVL_TRAIN'), (85200, 'UN_AVL')]", "avl_events_syn_50": "[(0, 'AVL_TRAIN'), (1200, 'UN_AVL'), (2400, 'AVL_TRAIN'), (4200, 'UN_AVL'), (6600, 'AVL_TRAIN'), (7200, 'UN_AVL'), (8400, 'AVL_TRAIN'), (9000, 'UN_AVL'), (9600, 'AVL_TRAIN'), (10200, 'UN_AVL'), (10800, 'AVL_TRAIN'), (11400, 'UN_AVL'), (12000, 'AVL_TRAIN'), (13200, 'UN_AVL'), (13800, 'AVL_TRAIN'), (15000, 'UN_AVL'), (15600, 'AVL_TRAIN'), (16200, 'UN_AVL'), (19200, 'AVL_TRAIN'), (19800, 'UN_AVL'), (20400, 'AVL_TRAIN'), (21000, 'UN_AVL'), (22200, 'AVL_TRAIN'), (23400, 'UN_AVL'), (24000, 'AVL_TRAIN'), (24600, 'UN_AVL'), (27000, 'AVL_TRAIN'), (27600, 'UN_AVL'), (28200, 'AVL_TRAIN'), (28800, 'UN_AVL'), (29400, 'AVL_TRAIN'), (30000, 'UN_AVL'), (30600, 'AVL_TRAIN'), (31200, 'UN_AVL'), (31800, 'AVL_TRAIN'), (36000, 'UN_AVL'), (37200, 'AVL_TRAIN'), (39600, 'UN_AVL'), (40200, 'AVL_TRAIN'), (40800, 'UN_AVL'), (41400, 'AVL_TRAIN'), (43800, 'UN_AVL'), (44400, 'AVL_TRAIN'), (46200, 'UN_AVL'), (47400, 'AVL_TRAIN'), (48000, 'UN_AVL'), (48600, 'AVL_TRAIN'), (49200, 'UN_AVL'), (49800, 'AVL_TRAIN'), (51000, 'UN_AVL'), (52800, 'AVL_TRAIN'), (53400, 'UN_AVL'), (55200, 'AVL_TRAIN'), (55800, 'UN_AVL'), (57000, 'AVL_TRAIN'), (58800, 'UN_AVL'), (59400, 'AVL_TRAIN'), (62400, 'UN_AVL'), (63000, 'AVL_TRAIN'), (64800, 'UN_AVL'), (66600, 'AVL_TRAIN'), (67200, 'UN_AVL'), (67800, 'AVL_TRAIN'), (68400, 'UN_AVL'), (69000, 'AVL_TRAIN'), (69600, 'UN_AVL'), (70200, 'AVL_TRAIN'), (71400, 'UN_AVL'), (72000, 'AVL_TRAIN'), (73200, 'UN_AVL'), (75000, 'AVL_TRAIN'), (75600, 'UN_AVL'), (76200, 'AVL_TRAIN'), (78000, 'UN_AVL'), (78600, 'AVL_TRAIN'), (79800, 'UN_AVL'), (81000, 'AVL_TRAIN'), (81600, 'UN_AVL'), (84000, 'AVL_TRAIN'), (84600, 'UN_AVL')]" - }, "baseModel": { "name": "", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json index 82c21a936..ec9d75a70 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_1.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "16.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (250, 'UN_AVL'), (1053, 'AVL_TRAIN'), (7162, 'AVL_EVAL'), (25536, 'UN_AVL'), (25536, 'AVL_TRAIN'), (28540, 'AVL_EVAL'), (41113, 'AVL_TRAIN'), (46728, 'AVL_EVAL'), (46729, 'AVL_TRAIN'), (49430, 'AVL_EVAL'), (70897, 'UN_AVL'), (82393, 'AVL_TRAIN'), (82416, 'UN_AVL'), (82418, 'AVL_TRAIN'), (82771, 'UN_AVL'), (82773, 'AVL_TRAIN'), (85462, 'UN_AVL'), (85464, 'AVL_TRAIN'), (87018, 'UN_AVL'), (87019, 'AVL_TRAIN'), (87422, 'UN_AVL'), (87424, 'AVL_TRAIN'), (87593, 'UN_AVL'), (87593, 'AVL_TRAIN'), (87603, 'UN_AVL'), (87881, 'AVL_TRAIN'), (87942, 'UN_AVL'), (87986, 'AVL_TRAIN'), (88540, 'UN_AVL'), (88678, 'AVL_TRAIN'), (88944, 'UN_AVL'), (90360, 'AVL_EVAL'), (90360, 'AVL_TRAIN'), (90381, 'AVL_EVAL'), (91048, 'UN_AVL'), (99745, 'AVL_TRAIN'), (100257, 'UN_AVL'), (100258, 'AVL_TRAIN'), (100557, 'UN_AVL'), (100558, 'AVL_TRAIN'), (104033, 'UN_AVL'), (104048, 'AVL_TRAIN'), (105450, 'UN_AVL'), (105741, 'AVL_TRAIN'), (107180, 'UN_AVL'), (109017, 'AVL_TRAIN'), (112236, 'UN_AVL'), (112236, 'AVL_TRAIN'), (136022, 'AVL_EVAL'), (160213, 'UN_AVL'), (164156, 'AVL_TRAIN'), (164594, 'UN_AVL'), (164651, 'AVL_TRAIN'), (166084, 'AVL_EVAL'), (173372, 'AVL_TRAIN'), (179115, 'AVL_EVAL'), (190676, 'AVL_TRAIN'), (214769, 'AVL_EVAL'), (214770, 'AVL_TRAIN'), (214773, 'AVL_EVAL'), (236404, 'UN_AVL'), (236404, 'AVL_TRAIN'), (238132, 'AVL_EVAL'), (240851, 'AVL_TRAIN'), (240936, 'AVL_EVAL'), (240975, 'AVL_TRAIN'), (247016, 'AVL_EVAL'), (261220, 'AVL_TRAIN'), (261795, 'AVL_EVAL'), (261795, 'AVL_TRAIN'), (264001, 'AVL_EVAL'), (264002, 'AVL_TRAIN'), (264003, 'AVL_EVAL'), (305427, 'AVL_TRAIN'), (310176, 'AVL_EVAL'), (310210, 'AVL_TRAIN'), (310213, 'AVL_EVAL'), (310765, 'AVL_TRAIN'), (312095, 'AVL_EVAL'), (312095, 'AVL_TRAIN'), (312105, 'AVL_EVAL'), (317629, 'AVL_TRAIN'), (317635, 'AVL_EVAL'), (317636, 'AVL_TRAIN'), (317637, 'AVL_EVAL'), (317637, 'AVL_TRAIN'), (317638, 'AVL_EVAL'), (317638, 'AVL_TRAIN'), (317639, 'AVL_EVAL'), (317639, 'AVL_TRAIN'), (317642, 'AVL_EVAL'), (317642, 'AVL_TRAIN'), (317645, 'AVL_EVAL'), (317647, 'AVL_TRAIN'), (317659, 'AVL_EVAL'), (317678, 'AVL_TRAIN'), (318323, 'AVL_EVAL'), (332409, 'UN_AVL'), (336461, 'AVL_TRAIN'), (341841, 'AVL_EVAL'), (341846, 'AVL_TRAIN'), (341992, 'AVL_EVAL'), (342694, 'AVL_TRAIN'), (344375, 'AVL_EVAL'), (346579, 'AVL_TRAIN'), (347263, 'AVL_EVAL'), (357902, 'UN_AVL'), (364184, 'AVL_TRAIN'), (364887, 'UN_AVL'), (386041, 'AVL_TRAIN'), (394420, 'AVL_EVAL'), (418953, 'UN_AVL'), (421325, 'AVL_TRAIN'), (425216, 'AVL_EVAL'), (425217, 'AVL_TRAIN'), (425541, 'AVL_EVAL'), (425542, 'AVL_TRAIN'), (425543, 'AVL_EVAL'), (434131, 'AVL_TRAIN'), (435716, 'AVL_EVAL'), (435720, 'AVL_TRAIN'), (438737, 'AVL_EVAL'), (449946, 'UN_AVL'), (472426, 'AVL_TRAIN'), (473276, 'UN_AVL'), (473287, 'AVL_TRAIN'), (474779, 'AVL_EVAL'), (475599, 'AVL_TRAIN'), (478630, 'AVL_EVAL'), (481838, 'AVL_TRAIN'), (481859, 'AVL_EVAL'), (483147, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1053, 'AVL_TRAIN'), (7162, 'AVL_EVAL'), (16664, 'UN_AVL'), (25536, 'AVL_TRAIN'), (28540, 'UN_AVL'), (41113, 'AVL_TRAIN'), (46728, 'AVL_EVAL'), (46729, 'AVL_TRAIN'), (49430, 'AVL_EVAL'), (62206, 'UN_AVL'), (82393, 'AVL_TRAIN'), (82416, 'UN_AVL'), (82418, 'AVL_TRAIN'), (82771, 'UN_AVL'), (82773, 'AVL_TRAIN'), (85462, 'UN_AVL'), (85464, 'AVL_TRAIN'), (87018, 'UN_AVL'), (87019, 'AVL_TRAIN'), (87422, 'UN_AVL'), (87424, 'AVL_TRAIN'), (87593, 'UN_AVL'), (87593, 'AVL_TRAIN'), (87603, 'UN_AVL'), (87881, 'AVL_TRAIN'), (87942, 'UN_AVL'), (87986, 'AVL_TRAIN'), (88540, 'UN_AVL'), (88678, 'AVL_TRAIN'), (88944, 'UN_AVL'), (90360, 'AVL_TRAIN'), (90381, 'UN_AVL'), (99745, 'AVL_TRAIN'), (100257, 'UN_AVL'), (100258, 'AVL_TRAIN'), (100557, 'UN_AVL'), (100558, 'AVL_TRAIN'), (104033, 'UN_AVL'), (104048, 'AVL_TRAIN'), (105450, 'UN_AVL'), (105741, 'AVL_TRAIN'), (107180, 'UN_AVL'), (109017, 'AVL_TRAIN'), (112236, 'UN_AVL'), (112236, 'AVL_TRAIN'), (136022, 'AVL_EVAL'), (148169, 'UN_AVL'), (164156, 'AVL_TRAIN'), (164594, 'UN_AVL'), (164651, 'AVL_TRAIN'), (166084, 'UN_AVL'), (173372, 'AVL_TRAIN'), (179115, 'AVL_EVAL'), (190555, 'UN_AVL'), (190676, 'AVL_TRAIN'), (214769, 'AVL_EVAL'), (214770, 'AVL_TRAIN'), (214773, 'AVL_EVAL'), (222241, 'UN_AVL'), (236404, 'AVL_TRAIN'), (238132, 'UN_AVL'), (240851, 'AVL_TRAIN'), (240936, 'UN_AVL'), (240975, 'AVL_TRAIN'), (247016, 'AVL_EVAL'), (261220, 'AVL_TRAIN'), (261795, 'AVL_EVAL'), (261795, 'AVL_TRAIN'), (264001, 'AVL_EVAL'), (264002, 'AVL_TRAIN'), (264003, 'AVL_EVAL'), (274444, 'UN_AVL'), (305427, 'AVL_TRAIN'), (310176, 'AVL_EVAL'), (310210, 'AVL_TRAIN'), (310213, 'AVL_EVAL'), (310765, 'AVL_TRAIN'), (312095, 'AVL_EVAL'), (312095, 'AVL_TRAIN'), (312105, 'AVL_EVAL'), (317629, 'AVL_TRAIN'), (317635, 'AVL_EVAL'), (317636, 'AVL_TRAIN'), (317637, 'AVL_EVAL'), (317637, 'AVL_TRAIN'), (317638, 'AVL_EVAL'), (317638, 'AVL_TRAIN'), (317639, 'AVL_EVAL'), (317639, 'AVL_TRAIN'), (317642, 'AVL_EVAL'), (317642, 'AVL_TRAIN'), (317645, 'AVL_EVAL'), (317647, 'AVL_TRAIN'), (317659, 'AVL_EVAL'), (317678, 'AVL_TRAIN'), (318323, 'AVL_EVAL'), (332409, 'UN_AVL'), (336461, 'AVL_TRAIN'), (341841, 'AVL_EVAL'), (341846, 'AVL_TRAIN'), (341992, 'AVL_EVAL'), (342694, 'AVL_TRAIN'), (344375, 'AVL_EVAL'), (346579, 'AVL_TRAIN'), (347263, 'AVL_EVAL'), (352390, 'UN_AVL'), (364184, 'AVL_TRAIN'), (364887, 'UN_AVL'), (386041, 'AVL_TRAIN'), (394420, 'AVL_EVAL'), (417347, 'UN_AVL'), (421325, 'AVL_TRAIN'), (425216, 'AVL_EVAL'), (425217, 'AVL_TRAIN'), (425541, 'AVL_EVAL'), (425542, 'AVL_TRAIN'), (425543, 'AVL_EVAL'), (433136, 'UN_AVL'), (434131, 'AVL_TRAIN'), (435716, 'AVL_EVAL'), (435720, 'AVL_TRAIN'), (438737, 'AVL_EVAL'), (449946, 'UN_AVL'), (472426, 'AVL_TRAIN'), (473276, 'UN_AVL'), (473287, 'AVL_TRAIN'), (474779, 'UN_AVL'), (475599, 'AVL_TRAIN'), (478630, 'UN_AVL'), (481838, 'AVL_TRAIN'), (481859, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json index 8f3260b66..ebe22610b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_10.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12839, 'AVL_EVAL'), (62636, 'AVL_TRAIN'), (71903, 'AVL_EVAL'), (135083, 'UN_AVL'), (156109, 'AVL_TRAIN'), (164237, 'AVL_EVAL'), (189998, 'AVL_TRAIN'), (191541, 'AVL_EVAL'), (191547, 'AVL_TRAIN'), (191550, 'AVL_EVAL'), (191876, 'AVL_TRAIN'), (204944, 'AVL_EVAL'), (204947, 'AVL_TRAIN'), (205073, 'AVL_EVAL'), (205077, 'AVL_TRAIN'), (205079, 'AVL_EVAL'), (205376, 'AVL_TRAIN'), (205381, 'AVL_EVAL'), (205386, 'AVL_TRAIN'), (205425, 'AVL_EVAL'), (205428, 'AVL_TRAIN'), (205640, 'AVL_EVAL'), (205644, 'AVL_TRAIN'), (205651, 'AVL_EVAL'), (210583, 'AVL_TRAIN'), (223064, 'AVL_EVAL'), (276050, 'UN_AVL'), (326481, 'AVL_EVAL'), (331715, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12839, 'AVL_EVAL'), (50151, 'UN_AVL'), (62636, 'AVL_TRAIN'), (71903, 'AVL_EVAL'), (102525, 'UN_AVL'), (156109, 'AVL_TRAIN'), (164237, 'AVL_EVAL'), (186714, 'UN_AVL'), (189998, 'AVL_TRAIN'), (191541, 'UN_AVL'), (191547, 'AVL_TRAIN'), (191550, 'UN_AVL'), (191876, 'AVL_TRAIN'), (204944, 'AVL_EVAL'), (204947, 'AVL_TRAIN'), (205073, 'AVL_EVAL'), (205077, 'AVL_TRAIN'), (205079, 'AVL_EVAL'), (205376, 'AVL_TRAIN'), (205381, 'AVL_EVAL'), (205386, 'AVL_TRAIN'), (205425, 'AVL_EVAL'), (205428, 'AVL_TRAIN'), (205640, 'AVL_EVAL'), (205644, 'AVL_TRAIN'), (205651, 'AVL_EVAL'), (210583, 'AVL_TRAIN'), (223064, 'AVL_EVAL'), (262901, 'UN_AVL'), (326481, 'AVL_EVAL'), (331715, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_100.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_100.json index 905e8974b..1ecca8993 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_100.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_100.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_101.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_101.json index 37b0f2abc..4b9d6a132 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_101.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_101.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "20.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_102.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_102.json index 2b3048d23..12c97e2d5 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_102.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_102.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_103.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_103.json index 9a609f0c8..83339950b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_103.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_103.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "17.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_104.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_104.json index 4df4dc603..0908841ec 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_104.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_104.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_105.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_105.json index 0fb06d1da..c1888401f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_105.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_105.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "26.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_106.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_106.json index c1dbdf562..cf169c8d7 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_106.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_106.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_107.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_107.json index b0f86f0d1..6d55c24df 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_107.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_107.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_108.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_108.json index 075426e5d..852990a76 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_108.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_108.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "29.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_109.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_109.json index 32249f140..f4a98e146 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_109.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_109.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_11.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_11.json index 056b56b66..283b69160 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_11.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_11.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "19.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (28302, 'AVL_TRAIN'), (29195, 'UN_AVL'), (36894, 'AVL_TRAIN'), (63583, 'AVL_EVAL'), (100060, 'UN_AVL'), (112501, 'AVL_TRAIN'), (116318, 'UN_AVL'), (116350, 'AVL_TRAIN'), (118390, 'UN_AVL'), (148083, 'AVL_TRAIN'), (162644, 'AVL_EVAL'), (201373, 'UN_AVL'), (208438, 'AVL_TRAIN'), (208622, 'UN_AVL'), (208636, 'AVL_TRAIN'), (243721, 'AVL_EVAL'), (280584, 'UN_AVL'), (298120, 'AVL_TRAIN'), (318895, 'AVL_EVAL'), (318951, 'AVL_TRAIN'), (319530, 'AVL_EVAL'), (364681, 'UN_AVL'), (379241, 'AVL_TRAIN'), (381391, 'UN_AVL'), (381417, 'AVL_TRAIN'), (414188, 'AVL_EVAL'), (436582, 'UN_AVL'), (443187, 'AVL_TRAIN'), (445392, 'UN_AVL'), (445782, 'AVL_TRAIN'), (451835, 'UN_AVL'), (458984, 'AVL_TRAIN'), (459293, 'UN_AVL'), (463604, 'AVL_TRAIN'), (464329, 'UN_AVL'), (464335, 'AVL_TRAIN'), (464729, 'UN_AVL'), (464762, 'AVL_TRAIN'), (475119, 'UN_AVL'), (496411, 'AVL_TRAIN'), (506831, 'AVL_EVAL'), (531066, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (28302, 'AVL_TRAIN'), (29195, 'UN_AVL'), (36894, 'AVL_TRAIN'), (63583, 'AVL_EVAL'), (80500, 'UN_AVL'), (112501, 'AVL_TRAIN'), (116318, 'UN_AVL'), (116350, 'AVL_TRAIN'), (118390, 'UN_AVL'), (148083, 'AVL_TRAIN'), (162644, 'AVL_EVAL'), (183615, 'UN_AVL'), (208438, 'AVL_TRAIN'), (208622, 'UN_AVL'), (208636, 'AVL_TRAIN'), (243721, 'AVL_EVAL'), (262827, 'UN_AVL'), (298120, 'AVL_TRAIN'), (318895, 'AVL_EVAL'), (318951, 'AVL_TRAIN'), (319530, 'AVL_EVAL'), (337965, 'UN_AVL'), (379241, 'AVL_TRAIN'), (381391, 'UN_AVL'), (381417, 'AVL_TRAIN'), (414188, 'AVL_EVAL'), (424696, 'UN_AVL'), (443187, 'AVL_TRAIN'), (445392, 'UN_AVL'), (445782, 'AVL_TRAIN'), (451835, 'UN_AVL'), (458984, 'AVL_TRAIN'), (459293, 'UN_AVL'), (463604, 'AVL_TRAIN'), (464329, 'UN_AVL'), (464335, 'AVL_TRAIN'), (464729, 'UN_AVL'), (464762, 'AVL_TRAIN'), (475119, 'UN_AVL'), (496411, 'AVL_TRAIN'), (506831, 'AVL_EVAL'), (526457, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_110.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_110.json index e2252134b..0b58bd33b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_110.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_110.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_111.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_111.json index cd8852e9a..5eca25d95 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_111.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_111.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_112.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_112.json index 7a2b86661..e8983fbc4 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_112.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_112.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_113.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_113.json index 8f31da885..9c2301949 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_113.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_113.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_114.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_114.json index 4f65079d9..36bf9048b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_114.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_114.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "6.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_115.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_115.json index 7396f906b..efdb009d3 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_115.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_115.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "16.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_116.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_116.json index df2f1601b..f6282af8e 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_116.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_116.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_117.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_117.json index 459393622..475cb31ce 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_117.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_117.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_118.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_118.json index 8ee39087f..da5e6992f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_118.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_118.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_119.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_119.json index bb5eefe2b..729de7a07 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_119.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_119.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_12.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_12.json index b3a6b0868..b87e13984 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_12.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_12.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (44572, 'UN_AVL'), (89085, 'AVL_TRAIN'), (99966, 'AVL_EVAL'), (153474, 'UN_AVL'), (178052, 'AVL_TRAIN'), (194784, 'AVL_EVAL'), (304870, 'UN_AVL'), (361337, 'AVL_TRAIN'), (383532, 'AVL_EVAL'), (450806, 'UN_AVL'), (450873, 'AVL_TRAIN'), (474438, 'AVL_EVAL'), (496013, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (89085, 'AVL_TRAIN'), (99966, 'AVL_EVAL'), (131714, 'UN_AVL'), (178052, 'AVL_TRAIN'), (194784, 'AVL_EVAL'), (251414, 'UN_AVL'), (361337, 'AVL_TRAIN'), (383532, 'AVL_EVAL'), (415526, 'UN_AVL'), (450873, 'AVL_TRAIN'), (474438, 'AVL_EVAL'), (496013, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_120.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_120.json index 552bc0f03..c97933269 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_120.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_120.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_121.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_121.json index 2c84fc30d..49455788b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_121.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_121.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "17.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_122.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_122.json index 3e0ff45a2..1a562a764 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_122.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_122.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_123.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_123.json index 73c07e9bf..56d357002 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_123.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_123.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_124.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_124.json index 137ab215e..34a0346f0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_124.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_124.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_125.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_125.json index fb5cbfb42..520153c81 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_125.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_125.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_126.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_126.json index 3a8793972..ccb4200e3 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_126.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_126.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_127.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_127.json index 1c4a3f1b6..6567dc2c0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_127.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_127.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_128.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_128.json index f8fc1b135..e55e90a07 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_128.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_128.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "22.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_129.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_129.json index 88542ff6a..5a402bfd8 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_129.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_129.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_13.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_13.json index 0ea110801..cbdfa68dc 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_13.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_13.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (244912, 'AVL_TRAIN'), (245929, 'UN_AVL'), (246546, 'AVL_TRAIN'), (251386, 'AVL_EVAL'), (266250, 'UN_AVL'), (269540, 'AVL_TRAIN'), (274003, 'AVL_EVAL'), (276200, 'UN_AVL'), (288068, 'AVL_TRAIN'), (292699, 'AVL_EVAL'), (310101, 'UN_AVL'), (310428, 'AVL_TRAIN'), (316236, 'AVL_EVAL'), (360109, 'UN_AVL'), (360149, 'AVL_TRAIN'), (366301, 'AVL_EVAL'), (423844, 'UN_AVL'), (427556, 'AVL_TRAIN'), (431311, 'AVL_EVAL'), (432665, 'AVL_TRAIN'), (433852, 'AVL_EVAL'), (443170, 'UN_AVL'), (443616, 'AVL_TRAIN'), (477502, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (244912, 'AVL_TRAIN'), (245929, 'UN_AVL'), (246546, 'AVL_TRAIN'), (251386, 'UN_AVL'), (269540, 'AVL_TRAIN'), (274003, 'UN_AVL'), (288068, 'AVL_TRAIN'), (292699, 'UN_AVL'), (310428, 'AVL_TRAIN'), (316236, 'AVL_EVAL'), (355778, 'UN_AVL'), (360149, 'AVL_TRAIN'), (366301, 'AVL_EVAL'), (418927, 'UN_AVL'), (427556, 'AVL_TRAIN'), (431311, 'UN_AVL'), (432665, 'AVL_TRAIN'), (433852, 'UN_AVL'), (443616, 'AVL_TRAIN'), (477502, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_130.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_130.json index 54803b749..93df5ec2f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_130.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_130.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_131.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_131.json index 0c8ecbff8..b3b230cdd 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_131.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_131.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_132.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_132.json index ea2f125dc..6dd68c8f3 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_132.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_132.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_133.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_133.json index 103219eb1..6d2f764f0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_133.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_133.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_134.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_134.json index 1a201d8a6..65faf562c 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_134.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_134.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_135.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_135.json index 4e83cc954..d7a0d47fd 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_135.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_135.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_136.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_136.json index 2ec8456c0..5f38bc530 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_136.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_136.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "22.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_137.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_137.json index 307ed012b..268dd0409 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_137.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_137.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_138.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_138.json index 75ee506f2..c3323a99c 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_138.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_138.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_139.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_139.json index 68db711aa..b95882448 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_139.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_139.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_14.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_14.json index 9f37b60ec..dd6ddd991 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_14.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_14.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "6.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (24725, 'AVL_EVAL'), (40876, 'AVL_TRAIN'), (43307, 'AVL_EVAL'), (98187, 'UN_AVL'), (98792, 'AVL_TRAIN'), (99888, 'AVL_EVAL'), (101704, 'UN_AVL'), (102200, 'AVL_TRAIN'), (106252, 'AVL_EVAL'), (109247, 'AVL_TRAIN'), (109361, 'AVL_EVAL'), (127445, 'AVL_TRAIN'), (129934, 'AVL_EVAL'), (182620, 'UN_AVL'), (182639, 'AVL_TRAIN'), (186618, 'AVL_EVAL'), (186618, 'AVL_TRAIN'), (186643, 'AVL_EVAL'), (198934, 'UN_AVL'), (225988, 'AVL_TRAIN'), (232715, 'AVL_EVAL'), (265339, 'UN_AVL'), (283946, 'AVL_TRAIN'), (286819, 'AVL_EVAL'), (288121, 'AVL_TRAIN'), (293395, 'AVL_EVAL'), (349055, 'UN_AVL'), (356443, 'AVL_TRAIN'), (360551, 'AVL_EVAL'), (360724, 'AVL_TRAIN'), (363760, 'AVL_EVAL'), (430047, 'UN_AVL'), (447070, 'AVL_EVAL'), (452577, 'AVL_TRAIN'), (455553, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (24725, 'AVL_EVAL'), (25064, 'UN_AVL'), (40876, 'AVL_TRAIN'), (43307, 'AVL_EVAL'), (79959, 'UN_AVL'), (98792, 'AVL_TRAIN'), (99888, 'UN_AVL'), (102200, 'AVL_TRAIN'), (106252, 'AVL_EVAL'), (109247, 'AVL_TRAIN'), (109361, 'AVL_EVAL'), (127445, 'AVL_TRAIN'), (129934, 'AVL_EVAL'), (182620, 'UN_AVL'), (182639, 'AVL_TRAIN'), (186618, 'AVL_EVAL'), (186618, 'AVL_TRAIN'), (186643, 'AVL_EVAL'), (192481, 'UN_AVL'), (225988, 'AVL_TRAIN'), (232715, 'AVL_EVAL'), (247296, 'UN_AVL'), (283946, 'AVL_TRAIN'), (286819, 'UN_AVL'), (288121, 'AVL_TRAIN'), (293395, 'AVL_EVAL'), (339045, 'UN_AVL'), (356443, 'AVL_TRAIN'), (360551, 'UN_AVL'), (360724, 'AVL_TRAIN'), (363760, 'AVL_EVAL'), (400165, 'UN_AVL'), (447070, 'AVL_EVAL'), (450035, 'UN_AVL'), (452577, 'AVL_TRAIN'), (455553, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_140.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_140.json index 725fc0080..a5920c0cb 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_140.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_140.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_141.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_141.json index c51a67169..467f188e0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_141.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_141.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_142.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_142.json index 25f1d20d6..f9330abff 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_142.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_142.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "16.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_143.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_143.json index 8eaedb7f4..051846a4b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_143.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_143.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_144.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_144.json index 0fa61fb2e..01b17684b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_144.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_144.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_145.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_145.json index efa7939f1..684770b45 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_145.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_145.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_146.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_146.json index 854874e85..7360ff1d8 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_146.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_146.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "29.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_147.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_147.json index 45f94d2ae..33e329854 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_147.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_147.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_148.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_148.json index a239df4e5..01145cd99 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_148.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_148.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_149.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_149.json index 083692621..8a6ed140a 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_149.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_149.json @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_15.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_15.json index 80a9c15cc..b74577b7d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_15.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_15.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8737, 'AVL_EVAL'), (8737, 'AVL_TRAIN'), (10357, 'AVL_EVAL'), (14357, 'AVL_TRAIN'), (15634, 'AVL_EVAL'), (25336, 'AVL_TRAIN'), (27196, 'AVL_EVAL'), (42670, 'AVL_TRAIN'), (42710, 'AVL_EVAL'), (99637, 'AVL_TRAIN'), (102235, 'AVL_EVAL'), (129391, 'AVL_TRAIN'), (130491, 'AVL_EVAL'), (174494, 'AVL_TRAIN'), (174534, 'AVL_EVAL'), (177832, 'AVL_TRAIN'), (178886, 'AVL_EVAL'), (181894, 'AVL_TRAIN'), (183171, 'AVL_EVAL'), (185229, 'AVL_TRAIN'), (187002, 'AVL_EVAL'), (201911, 'AVL_TRAIN'), (216131, 'AVL_EVAL'), (251549, 'AVL_TRAIN'), (254018, 'AVL_EVAL'), (267318, 'AVL_TRAIN'), (267542, 'AVL_EVAL'), (272642, 'AVL_TRAIN'), (274378, 'AVL_EVAL'), (278228, 'AVL_TRAIN'), (302582, 'AVL_EVAL'), (335211, 'AVL_TRAIN'), (336985, 'AVL_EVAL'), (372940, 'AVL_TRAIN'), (391688, 'AVL_EVAL'), (462304, 'AVL_TRAIN'), (474928, 'AVL_EVAL'), (524661, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8737, 'AVL_TRAIN'), (10357, 'UN_AVL'), (14357, 'AVL_TRAIN'), (15634, 'AVL_EVAL'), (25336, 'AVL_TRAIN'), (27196, 'AVL_EVAL'), (42670, 'AVL_TRAIN'), (42710, 'AVL_EVAL'), (92020, 'UN_AVL'), (99637, 'AVL_TRAIN'), (102235, 'AVL_EVAL'), (108124, 'UN_AVL'), (129391, 'AVL_TRAIN'), (130491, 'AVL_EVAL'), (149882, 'UN_AVL'), (174494, 'AVL_TRAIN'), (174534, 'UN_AVL'), (177832, 'AVL_TRAIN'), (178886, 'UN_AVL'), (181894, 'AVL_TRAIN'), (183171, 'AVL_EVAL'), (185229, 'AVL_TRAIN'), (187002, 'AVL_EVAL'), (201907, 'UN_AVL'), (201911, 'AVL_TRAIN'), (216131, 'AVL_EVAL'), (239232, 'UN_AVL'), (251549, 'AVL_TRAIN'), (254018, 'AVL_EVAL'), (266909, 'UN_AVL'), (267318, 'AVL_TRAIN'), (267542, 'UN_AVL'), (272642, 'AVL_TRAIN'), (274378, 'AVL_EVAL'), (278215, 'UN_AVL'), (278228, 'AVL_TRAIN'), (302582, 'AVL_EVAL'), (320301, 'UN_AVL'), (335211, 'AVL_TRAIN'), (336985, 'AVL_EVAL'), (340822, 'UN_AVL'), (353879, 'AVL_EVAL'), (372940, 'AVL_TRAIN'), (391688, 'AVL_EVAL'), (450555, 'UN_AVL'), (462304, 'AVL_TRAIN'), (474928, 'AVL_EVAL'), (524661, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_16.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_16.json index 4298d3e4d..54a6eb037 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_16.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_16.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "14.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (281291, 'AVL_TRAIN'), (281431, 'UN_AVL'), (281465, 'AVL_TRAIN'), (281754, 'UN_AVL'), (289955, 'AVL_TRAIN'), (302790, 'AVL_EVAL'), (344351, 'UN_AVL'), (363641, 'AVL_TRAIN'), (385200, 'AVL_EVAL'), (436924, 'AVL_TRAIN'), (440237, 'AVL_EVAL'), (447244, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (281291, 'AVL_TRAIN'), (281431, 'UN_AVL'), (281465, 'AVL_TRAIN'), (281754, 'UN_AVL'), (289955, 'AVL_TRAIN'), (302790, 'AVL_EVAL'), (327060, 'UN_AVL'), (363641, 'AVL_TRAIN'), (385200, 'AVL_EVAL'), (424575, 'UN_AVL'), (436924, 'AVL_TRAIN'), (440237, 'AVL_EVAL'), (442890, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_17.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_17.json index 4c046bb58..3147a8e26 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_17.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_17.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (6576, 'AVL_TRAIN'), (9540, 'UN_AVL'), (9541, 'AVL_TRAIN'), (12486, 'UN_AVL'), (12487, 'AVL_TRAIN'), (13229, 'UN_AVL'), (13231, 'AVL_TRAIN'), (13233, 'UN_AVL'), (13234, 'AVL_TRAIN'), (13236, 'UN_AVL'), (13237, 'AVL_TRAIN'), (13239, 'UN_AVL'), (14466, 'AVL_TRAIN'), (14688, 'UN_AVL'), (14690, 'AVL_TRAIN'), (15972, 'UN_AVL'), (15972, 'AVL_TRAIN'), (15976, 'UN_AVL'), (15977, 'AVL_TRAIN'), (15979, 'UN_AVL'), (15979, 'AVL_TRAIN'), (15981, 'UN_AVL'), (15985, 'AVL_TRAIN'), (16078, 'UN_AVL'), (16079, 'AVL_TRAIN'), (16081, 'UN_AVL'), (16082, 'AVL_TRAIN'), (16084, 'UN_AVL'), (16086, 'AVL_TRAIN'), (16088, 'UN_AVL'), (16089, 'AVL_TRAIN'), (16978, 'UN_AVL'), (20578, 'AVL_TRAIN'), (21191, 'UN_AVL'), (21192, 'AVL_TRAIN'), (21194, 'UN_AVL'), (21195, 'AVL_TRAIN'), (23527, 'UN_AVL'), (23528, 'AVL_TRAIN'), (25952, 'UN_AVL'), (25953, 'AVL_TRAIN'), (26047, 'UN_AVL'), (26047, 'AVL_TRAIN'), (26048, 'UN_AVL'), (26049, 'AVL_TRAIN'), (26051, 'UN_AVL'), (26053, 'AVL_TRAIN'), (26736, 'UN_AVL'), (26738, 'AVL_TRAIN'), (26761, 'UN_AVL'), (26761, 'AVL_TRAIN'), (26763, 'UN_AVL'), (26764, 'AVL_TRAIN'), (26766, 'UN_AVL'), (26768, 'AVL_TRAIN'), (26861, 'UN_AVL'), (26865, 'AVL_TRAIN'), (43489, 'AVL_EVAL'), (43492, 'AVL_TRAIN'), (43523, 'AVL_EVAL'), (43524, 'AVL_TRAIN'), (43557, 'AVL_EVAL'), (43558, 'AVL_TRAIN'), (43591, 'AVL_EVAL'), (43592, 'AVL_TRAIN'), (43624, 'AVL_EVAL'), (43625, 'AVL_TRAIN'), (43658, 'AVL_EVAL'), (43659, 'AVL_TRAIN'), (43691, 'AVL_EVAL'), (43693, 'AVL_TRAIN'), (43725, 'AVL_EVAL'), (43726, 'AVL_TRAIN'), (43793, 'AVL_EVAL'), (43794, 'AVL_TRAIN'), (43826, 'AVL_EVAL'), (43827, 'AVL_TRAIN'), (43860, 'AVL_EVAL'), (43861, 'AVL_TRAIN'), (43894, 'AVL_EVAL'), (43895, 'AVL_TRAIN'), (43927, 'AVL_EVAL'), (43929, 'AVL_TRAIN'), (43961, 'AVL_EVAL'), (43963, 'AVL_TRAIN'), (43995, 'AVL_EVAL'), (43996, 'AVL_TRAIN'), (44029, 'AVL_EVAL'), (44030, 'AVL_TRAIN'), (44062, 'AVL_EVAL'), (44064, 'AVL_TRAIN'), (44096, 'AVL_EVAL'), (44097, 'AVL_TRAIN'), (44130, 'AVL_EVAL'), (44131, 'AVL_TRAIN'), (44164, 'AVL_EVAL'), (44165, 'AVL_TRAIN'), (44197, 'AVL_EVAL'), (44232, 'AVL_TRAIN'), (44265, 'AVL_EVAL'), (44266, 'AVL_TRAIN'), (44298, 'AVL_EVAL'), (44299, 'AVL_TRAIN'), (44332, 'AVL_EVAL'), (44333, 'AVL_TRAIN'), (44366, 'AVL_EVAL'), (44367, 'AVL_TRAIN'), (44400, 'AVL_EVAL'), (44401, 'AVL_TRAIN'), (44433, 'AVL_EVAL'), (44434, 'AVL_TRAIN'), (44467, 'AVL_EVAL'), (44468, 'AVL_TRAIN'), (44501, 'AVL_EVAL'), (44502, 'AVL_TRAIN'), (44534, 'AVL_EVAL'), (44535, 'AVL_TRAIN'), (44568, 'AVL_EVAL'), (44569, 'AVL_TRAIN'), (44602, 'AVL_EVAL'), (44603, 'AVL_TRAIN'), (44635, 'AVL_EVAL'), (44637, 'AVL_TRAIN'), (44669, 'AVL_EVAL'), (44670, 'AVL_TRAIN'), (44703, 'AVL_EVAL'), (44704, 'AVL_TRAIN'), (44737, 'AVL_EVAL'), (44738, 'AVL_TRAIN'), (44770, 'AVL_EVAL'), (44771, 'AVL_TRAIN'), (44804, 'AVL_EVAL'), (44805, 'AVL_TRAIN'), (44838, 'AVL_EVAL'), (44839, 'AVL_TRAIN'), (44872, 'AVL_EVAL'), (44873, 'AVL_TRAIN'), (44905, 'AVL_EVAL'), (44909, 'AVL_TRAIN'), (44939, 'AVL_EVAL'), (44940, 'AVL_TRAIN'), (44973, 'AVL_EVAL'), (44974, 'AVL_TRAIN'), (45006, 'AVL_EVAL'), (45007, 'AVL_TRAIN'), (45040, 'AVL_EVAL'), (45041, 'AVL_TRAIN'), (45074, 'AVL_EVAL'), (45075, 'AVL_TRAIN'), (45108, 'AVL_EVAL'), (45109, 'AVL_TRAIN'), (45141, 'AVL_EVAL'), (45142, 'AVL_TRAIN'), (45175, 'AVL_EVAL'), (45176, 'AVL_TRAIN'), (45209, 'AVL_EVAL'), (45210, 'AVL_TRAIN'), (45242, 'AVL_EVAL'), (45244, 'AVL_TRAIN'), (45276, 'AVL_EVAL'), (45277, 'AVL_TRAIN'), (45310, 'AVL_EVAL'), (45311, 'AVL_TRAIN'), (45344, 'AVL_EVAL'), (45345, 'AVL_TRAIN'), (45377, 'AVL_EVAL'), (45378, 'AVL_TRAIN'), (45411, 'AVL_EVAL'), (45412, 'AVL_TRAIN'), (45445, 'AVL_EVAL'), (45446, 'AVL_TRAIN'), (45478, 'AVL_EVAL'), (45480, 'AVL_TRAIN'), (45512, 'AVL_EVAL'), (45513, 'AVL_TRAIN'), (45546, 'AVL_EVAL'), (45547, 'AVL_TRAIN'), (45580, 'AVL_EVAL'), (45581, 'AVL_TRAIN'), (45613, 'AVL_EVAL'), (45614, 'AVL_TRAIN'), (45647, 'AVL_EVAL'), (45648, 'AVL_TRAIN'), (45681, 'AVL_EVAL'), (45682, 'AVL_TRAIN'), (45715, 'AVL_EVAL'), (45716, 'AVL_TRAIN'), (45748, 'AVL_EVAL'), (45749, 'AVL_TRAIN'), (45782, 'AVL_EVAL'), (45783, 'AVL_TRAIN'), (45816, 'AVL_EVAL'), (45817, 'AVL_TRAIN'), (45849, 'AVL_EVAL'), (45850, 'AVL_TRAIN'), (45883, 'AVL_EVAL'), (45884, 'AVL_TRAIN'), (45917, 'AVL_EVAL'), (45918, 'AVL_TRAIN'), (45950, 'AVL_EVAL'), (45952, 'AVL_TRAIN'), (45984, 'AVL_EVAL'), (45985, 'AVL_TRAIN'), (46052, 'AVL_EVAL'), (46053, 'AVL_TRAIN'), (46085, 'AVL_EVAL'), (46086, 'AVL_TRAIN'), (46119, 'AVL_EVAL'), (46120, 'AVL_TRAIN'), (46153, 'AVL_EVAL'), (46154, 'AVL_TRAIN'), (46186, 'AVL_EVAL'), (46188, 'AVL_TRAIN'), (46220, 'AVL_EVAL'), (46221, 'AVL_TRAIN'), (46254, 'AVL_EVAL'), (46255, 'AVL_TRAIN'), (46288, 'AVL_EVAL'), (46289, 'AVL_TRAIN'), (46321, 'AVL_EVAL'), (46322, 'AVL_TRAIN'), (46355, 'AVL_EVAL'), (46356, 'AVL_TRAIN'), (46389, 'AVL_EVAL'), (46390, 'AVL_TRAIN'), (46423, 'AVL_EVAL'), (46424, 'AVL_TRAIN'), (46456, 'AVL_EVAL'), (46457, 'AVL_TRAIN'), (46490, 'AVL_EVAL'), (46491, 'AVL_TRAIN'), (46524, 'AVL_EVAL'), (46525, 'AVL_TRAIN'), (46557, 'AVL_EVAL'), (46559, 'AVL_TRAIN'), (46591, 'AVL_EVAL'), (46592, 'AVL_TRAIN'), (46625, 'AVL_EVAL'), (46626, 'AVL_TRAIN'), (46659, 'AVL_EVAL'), (46660, 'AVL_TRAIN'), (46692, 'AVL_EVAL'), (46694, 'AVL_TRAIN'), (46726, 'AVL_EVAL'), (46727, 'AVL_TRAIN'), (46760, 'AVL_EVAL'), (46761, 'AVL_TRAIN'), (46793, 'AVL_EVAL'), (46795, 'AVL_TRAIN'), (46823, 'AVL_EVAL'), (46826, 'AVL_TRAIN'), (46829, 'AVL_EVAL'), (46833, 'AVL_TRAIN'), (46835, 'AVL_EVAL'), (46836, 'AVL_TRAIN'), (46838, 'AVL_EVAL'), (46840, 'AVL_TRAIN'), (50773, 'AVL_EVAL'), (50774, 'AVL_TRAIN'), (50776, 'AVL_EVAL'), (50777, 'AVL_TRAIN'), (50779, 'AVL_EVAL'), (50780, 'AVL_TRAIN'), (53322, 'AVL_EVAL'), (53323, 'AVL_TRAIN'), (54579, 'AVL_EVAL'), (54580, 'AVL_TRAIN'), (54974, 'AVL_EVAL'), (54975, 'AVL_TRAIN'), (56090, 'AVL_EVAL'), (56091, 'AVL_TRAIN'), (56375, 'AVL_EVAL'), (56376, 'AVL_TRAIN'), (61071, 'AVL_EVAL'), (61072, 'AVL_TRAIN'), (61153, 'AVL_EVAL'), (80461, 'UN_AVL'), (86811, 'AVL_TRAIN'), (89816, 'UN_AVL'), (104377, 'AVL_TRAIN'), (104380, 'UN_AVL'), (104381, 'AVL_TRAIN'), (111192, 'UN_AVL'), (111193, 'AVL_TRAIN'), (111361, 'UN_AVL'), (111361, 'AVL_TRAIN'), (111374, 'UN_AVL'), (111374, 'AVL_TRAIN'), (111448, 'UN_AVL'), (111450, 'AVL_TRAIN'), (111454, 'UN_AVL'), (111456, 'AVL_TRAIN'), (111458, 'UN_AVL'), (111459, 'AVL_TRAIN'), (111461, 'UN_AVL'), (111463, 'AVL_TRAIN'), (111469, 'UN_AVL'), (111470, 'AVL_TRAIN'), (111474, 'UN_AVL'), (111476, 'AVL_TRAIN'), (111481, 'UN_AVL'), (111482, 'AVL_TRAIN'), (111484, 'UN_AVL'), (111489, 'AVL_TRAIN'), (111491, 'UN_AVL'), (111492, 'AVL_TRAIN'), (111494, 'UN_AVL'), (111495, 'AVL_TRAIN'), (111500, 'UN_AVL'), (111502, 'AVL_TRAIN'), (111504, 'UN_AVL'), (111505, 'AVL_TRAIN'), (111519, 'UN_AVL'), (111520, 'AVL_TRAIN'), (111525, 'UN_AVL'), (111527, 'AVL_TRAIN'), (111529, 'UN_AVL'), (111530, 'AVL_TRAIN'), (111532, 'UN_AVL'), (111533, 'AVL_TRAIN'), (111535, 'UN_AVL'), (111536, 'AVL_TRAIN'), (111540, 'UN_AVL'), (111542, 'AVL_TRAIN'), (112928, 'UN_AVL'), (112928, 'AVL_TRAIN'), (114430, 'UN_AVL'), (114431, 'AVL_TRAIN'), (114432, 'UN_AVL'), (114433, 'AVL_TRAIN'), (114804, 'UN_AVL'), (114805, 'AVL_TRAIN'), (114838, 'UN_AVL'), (114839, 'AVL_TRAIN'), (114871, 'UN_AVL'), (114872, 'AVL_TRAIN'), (115096, 'UN_AVL'), (115097, 'AVL_TRAIN'), (115129, 'UN_AVL'), (115130, 'AVL_TRAIN'), (115136, 'UN_AVL'), (115137, 'AVL_TRAIN'), (115915, 'UN_AVL'), (115921, 'AVL_TRAIN'), (115923, 'UN_AVL'), (115926, 'AVL_TRAIN'), (115927, 'UN_AVL'), (115932, 'AVL_TRAIN'), (115933, 'UN_AVL'), (115934, 'AVL_TRAIN'), (115937, 'UN_AVL'), (115938, 'AVL_TRAIN'), (149100, 'AVL_EVAL'), (172871, 'UN_AVL'), (189348, 'AVL_TRAIN'), (189350, 'UN_AVL'), (189351, 'AVL_TRAIN'), (189368, 'UN_AVL'), (189368, 'AVL_TRAIN'), (189603, 'UN_AVL'), (189608, 'AVL_TRAIN'), (190099, 'UN_AVL'), (190101, 'AVL_TRAIN'), (190102, 'UN_AVL'), (190103, 'AVL_TRAIN'), (190105, 'UN_AVL'), (190106, 'AVL_TRAIN'), (190202, 'UN_AVL'), (190203, 'AVL_TRAIN'), (190206, 'UN_AVL'), (190207, 'AVL_TRAIN'), (190209, 'UN_AVL'), (190213, 'AVL_TRAIN'), (191724, 'UN_AVL'), (191725, 'AVL_TRAIN'), (193126, 'UN_AVL'), (193140, 'AVL_TRAIN'), (193142, 'UN_AVL'), (193188, 'AVL_TRAIN'), (195742, 'UN_AVL'), (195743, 'AVL_TRAIN'), (198854, 'UN_AVL'), (198856, 'AVL_TRAIN'), (198858, 'UN_AVL'), (198859, 'AVL_TRAIN'), (199155, 'UN_AVL'), (199156, 'AVL_TRAIN'), (199158, 'UN_AVL'), (199159, 'AVL_TRAIN'), (199161, 'UN_AVL'), (199163, 'AVL_TRAIN'), (199178, 'UN_AVL'), (199179, 'AVL_TRAIN'), (199181, 'UN_AVL'), (199182, 'AVL_TRAIN'), (199185, 'UN_AVL'), (199187, 'AVL_TRAIN'), (199189, 'UN_AVL'), (199220, 'AVL_TRAIN'), (199221, 'UN_AVL'), (199228, 'AVL_TRAIN'), (199233, 'UN_AVL'), (199249, 'AVL_TRAIN'), (199314, 'UN_AVL'), (199316, 'AVL_TRAIN'), (199319, 'UN_AVL'), (199322, 'AVL_TRAIN'), (199323, 'UN_AVL'), (199324, 'AVL_TRAIN'), (199354, 'UN_AVL'), (200142, 'AVL_EVAL'), (200142, 'AVL_TRAIN'), (200177, 'AVL_EVAL'), (200180, 'AVL_TRAIN'), (200186, 'UN_AVL'), (232022, 'AVL_TRAIN'), (232027, 'UN_AVL'), (232029, 'AVL_TRAIN'), (232031, 'UN_AVL'), (232032, 'AVL_TRAIN'), (233237, 'UN_AVL'), (233238, 'AVL_TRAIN'), (233249, 'UN_AVL'), (233251, 'AVL_TRAIN'), (233254, 'UN_AVL'), (233271, 'AVL_TRAIN'), (233282, 'UN_AVL'), (233284, 'AVL_TRAIN'), (233287, 'UN_AVL'), (233289, 'AVL_TRAIN'), (233294, 'UN_AVL'), (233295, 'AVL_TRAIN'), (233388, 'UN_AVL'), (233389, 'AVL_TRAIN'), (236099, 'UN_AVL'), (236100, 'AVL_TRAIN'), (236101, 'UN_AVL'), (236102, 'AVL_TRAIN'), (236132, 'UN_AVL'), (236134, 'AVL_TRAIN'), (236145, 'UN_AVL'), (236146, 'AVL_TRAIN'), (236148, 'UN_AVL'), (236149, 'AVL_TRAIN'), (236165, 'UN_AVL'), (236167, 'AVL_TRAIN'), (237499, 'UN_AVL'), (237500, 'AVL_TRAIN'), (237501, 'UN_AVL'), (237501, 'AVL_TRAIN'), (237502, 'UN_AVL'), (237504, 'AVL_TRAIN'), (237514, 'UN_AVL'), (237518, 'AVL_TRAIN'), (237554, 'UN_AVL'), (237563, 'AVL_TRAIN'), (237576, 'UN_AVL'), (237585, 'AVL_TRAIN'), (237586, 'UN_AVL'), (237586, 'AVL_TRAIN'), (237587, 'UN_AVL'), (237587, 'AVL_TRAIN'), (237605, 'UN_AVL'), (237605, 'AVL_TRAIN'), (237606, 'UN_AVL'), (237607, 'AVL_TRAIN'), (237608, 'UN_AVL'), (237610, 'AVL_TRAIN'), (237614, 'UN_AVL'), (237616, 'AVL_TRAIN'), (237618, 'UN_AVL'), (237619, 'AVL_TRAIN'), (237622, 'UN_AVL'), (237630, 'AVL_TRAIN'), (237632, 'UN_AVL'), (237635, 'AVL_TRAIN'), (237636, 'UN_AVL'), (237637, 'AVL_TRAIN'), (237639, 'UN_AVL'), (237640, 'AVL_TRAIN'), (237733, 'UN_AVL'), (237735, 'AVL_TRAIN'), (237875, 'UN_AVL'), (237878, 'AVL_TRAIN'), (237879, 'UN_AVL'), (237879, 'AVL_TRAIN'), (237883, 'UN_AVL'), (237885, 'AVL_TRAIN'), (238558, 'UN_AVL'), (238621, 'AVL_TRAIN'), (238624, 'UN_AVL'), (238729, 'AVL_TRAIN'), (239419, 'UN_AVL'), (239420, 'AVL_TRAIN'), (245185, 'AVL_EVAL'), (252630, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (6576, 'AVL_TRAIN'), (9540, 'UN_AVL'), (9541, 'AVL_TRAIN'), (12486, 'UN_AVL'), (12487, 'AVL_TRAIN'), (13229, 'UN_AVL'), (13231, 'AVL_TRAIN'), (13233, 'UN_AVL'), (13234, 'AVL_TRAIN'), (13236, 'UN_AVL'), (13237, 'AVL_TRAIN'), (13239, 'UN_AVL'), (14466, 'AVL_TRAIN'), (14688, 'UN_AVL'), (14690, 'AVL_TRAIN'), (15972, 'UN_AVL'), (15972, 'AVL_TRAIN'), (15976, 'UN_AVL'), (15977, 'AVL_TRAIN'), (15979, 'UN_AVL'), (15979, 'AVL_TRAIN'), (15981, 'UN_AVL'), (15985, 'AVL_TRAIN'), (16078, 'UN_AVL'), (16079, 'AVL_TRAIN'), (16081, 'UN_AVL'), (16082, 'AVL_TRAIN'), (16084, 'UN_AVL'), (16086, 'AVL_TRAIN'), (16088, 'UN_AVL'), (16089, 'AVL_TRAIN'), (16978, 'UN_AVL'), (20578, 'AVL_TRAIN'), (21191, 'UN_AVL'), (21192, 'AVL_TRAIN'), (21194, 'UN_AVL'), (21195, 'AVL_TRAIN'), (23527, 'UN_AVL'), (23528, 'AVL_TRAIN'), (25952, 'UN_AVL'), (25953, 'AVL_TRAIN'), (26047, 'UN_AVL'), (26047, 'AVL_TRAIN'), (26048, 'UN_AVL'), (26049, 'AVL_TRAIN'), (26051, 'UN_AVL'), (26053, 'AVL_TRAIN'), (26736, 'UN_AVL'), (26738, 'AVL_TRAIN'), (26761, 'UN_AVL'), (26761, 'AVL_TRAIN'), (26763, 'UN_AVL'), (26764, 'AVL_TRAIN'), (26766, 'UN_AVL'), (26768, 'AVL_TRAIN'), (26861, 'UN_AVL'), (26865, 'AVL_TRAIN'), (43489, 'AVL_EVAL'), (43492, 'AVL_TRAIN'), (43523, 'AVL_EVAL'), (43524, 'AVL_TRAIN'), (43557, 'AVL_EVAL'), (43558, 'AVL_TRAIN'), (43591, 'AVL_EVAL'), (43592, 'AVL_TRAIN'), (43624, 'AVL_EVAL'), (43625, 'AVL_TRAIN'), (43658, 'AVL_EVAL'), (43659, 'AVL_TRAIN'), (43691, 'AVL_EVAL'), (43693, 'AVL_TRAIN'), (43725, 'AVL_EVAL'), (43726, 'AVL_TRAIN'), (43793, 'AVL_EVAL'), (43794, 'AVL_TRAIN'), (43826, 'AVL_EVAL'), (43827, 'AVL_TRAIN'), (43860, 'AVL_EVAL'), (43861, 'AVL_TRAIN'), (43894, 'AVL_EVAL'), (43895, 'AVL_TRAIN'), (43927, 'AVL_EVAL'), (43929, 'AVL_TRAIN'), (43961, 'AVL_EVAL'), (43963, 'AVL_TRAIN'), (43995, 'AVL_EVAL'), (43996, 'AVL_TRAIN'), (44029, 'AVL_EVAL'), (44030, 'AVL_TRAIN'), (44062, 'AVL_EVAL'), (44064, 'AVL_TRAIN'), (44096, 'AVL_EVAL'), (44097, 'AVL_TRAIN'), (44130, 'AVL_EVAL'), (44131, 'AVL_TRAIN'), (44164, 'AVL_EVAL'), (44165, 'AVL_TRAIN'), (44197, 'AVL_EVAL'), (44232, 'AVL_TRAIN'), (44265, 'AVL_EVAL'), (44266, 'AVL_TRAIN'), (44298, 'AVL_EVAL'), (44299, 'AVL_TRAIN'), (44332, 'AVL_EVAL'), (44333, 'AVL_TRAIN'), (44366, 'AVL_EVAL'), (44367, 'AVL_TRAIN'), (44400, 'AVL_EVAL'), (44401, 'AVL_TRAIN'), (44433, 'AVL_EVAL'), (44434, 'AVL_TRAIN'), (44467, 'AVL_EVAL'), (44468, 'AVL_TRAIN'), (44501, 'AVL_EVAL'), (44502, 'AVL_TRAIN'), (44534, 'AVL_EVAL'), (44535, 'AVL_TRAIN'), (44568, 'AVL_EVAL'), (44569, 'AVL_TRAIN'), (44602, 'AVL_EVAL'), (44603, 'AVL_TRAIN'), (44635, 'AVL_EVAL'), (44637, 'AVL_TRAIN'), (44669, 'AVL_EVAL'), (44670, 'AVL_TRAIN'), (44703, 'AVL_EVAL'), (44704, 'AVL_TRAIN'), (44737, 'AVL_EVAL'), (44738, 'AVL_TRAIN'), (44770, 'AVL_EVAL'), (44771, 'AVL_TRAIN'), (44804, 'AVL_EVAL'), (44805, 'AVL_TRAIN'), (44838, 'AVL_EVAL'), (44839, 'AVL_TRAIN'), (44872, 'AVL_EVAL'), (44873, 'AVL_TRAIN'), (44905, 'AVL_EVAL'), (44909, 'AVL_TRAIN'), (44939, 'AVL_EVAL'), (44940, 'AVL_TRAIN'), (44973, 'AVL_EVAL'), (44974, 'AVL_TRAIN'), (45006, 'AVL_EVAL'), (45007, 'AVL_TRAIN'), (45040, 'AVL_EVAL'), (45041, 'AVL_TRAIN'), (45074, 'AVL_EVAL'), (45075, 'AVL_TRAIN'), (45108, 'AVL_EVAL'), (45109, 'AVL_TRAIN'), (45141, 'AVL_EVAL'), (45142, 'AVL_TRAIN'), (45175, 'AVL_EVAL'), (45176, 'AVL_TRAIN'), (45209, 'AVL_EVAL'), (45210, 'AVL_TRAIN'), (45242, 'AVL_EVAL'), (45244, 'AVL_TRAIN'), (45276, 'AVL_EVAL'), (45277, 'AVL_TRAIN'), (45310, 'AVL_EVAL'), (45311, 'AVL_TRAIN'), (45344, 'AVL_EVAL'), (45345, 'AVL_TRAIN'), (45377, 'AVL_EVAL'), (45378, 'AVL_TRAIN'), (45411, 'AVL_EVAL'), (45412, 'AVL_TRAIN'), (45445, 'AVL_EVAL'), (45446, 'AVL_TRAIN'), (45478, 'AVL_EVAL'), (45480, 'AVL_TRAIN'), (45512, 'AVL_EVAL'), (45513, 'AVL_TRAIN'), (45546, 'AVL_EVAL'), (45547, 'AVL_TRAIN'), (45580, 'AVL_EVAL'), (45581, 'AVL_TRAIN'), (45613, 'AVL_EVAL'), (45614, 'AVL_TRAIN'), (45647, 'AVL_EVAL'), (45648, 'AVL_TRAIN'), (45681, 'AVL_EVAL'), (45682, 'AVL_TRAIN'), (45715, 'AVL_EVAL'), (45716, 'AVL_TRAIN'), (45748, 'AVL_EVAL'), (45749, 'AVL_TRAIN'), (45782, 'AVL_EVAL'), (45783, 'AVL_TRAIN'), (45816, 'AVL_EVAL'), (45817, 'AVL_TRAIN'), (45849, 'AVL_EVAL'), (45850, 'AVL_TRAIN'), (45883, 'AVL_EVAL'), (45884, 'AVL_TRAIN'), (45917, 'AVL_EVAL'), (45918, 'AVL_TRAIN'), (45950, 'AVL_EVAL'), (45952, 'AVL_TRAIN'), (45984, 'AVL_EVAL'), (45985, 'AVL_TRAIN'), (46052, 'AVL_EVAL'), (46053, 'AVL_TRAIN'), (46085, 'AVL_EVAL'), (46086, 'AVL_TRAIN'), (46119, 'AVL_EVAL'), (46120, 'AVL_TRAIN'), (46153, 'AVL_EVAL'), (46154, 'AVL_TRAIN'), (46186, 'AVL_EVAL'), (46188, 'AVL_TRAIN'), (46220, 'AVL_EVAL'), (46221, 'AVL_TRAIN'), (46254, 'AVL_EVAL'), (46255, 'AVL_TRAIN'), (46288, 'AVL_EVAL'), (46289, 'AVL_TRAIN'), (46321, 'AVL_EVAL'), (46322, 'AVL_TRAIN'), (46355, 'AVL_EVAL'), (46356, 'AVL_TRAIN'), (46389, 'AVL_EVAL'), (46390, 'AVL_TRAIN'), (46423, 'AVL_EVAL'), (46424, 'AVL_TRAIN'), (46456, 'AVL_EVAL'), (46457, 'AVL_TRAIN'), (46490, 'AVL_EVAL'), (46491, 'AVL_TRAIN'), (46524, 'AVL_EVAL'), (46525, 'AVL_TRAIN'), (46557, 'AVL_EVAL'), (46559, 'AVL_TRAIN'), (46591, 'AVL_EVAL'), (46592, 'AVL_TRAIN'), (46625, 'AVL_EVAL'), (46626, 'AVL_TRAIN'), (46659, 'AVL_EVAL'), (46660, 'AVL_TRAIN'), (46692, 'AVL_EVAL'), (46694, 'AVL_TRAIN'), (46726, 'AVL_EVAL'), (46727, 'AVL_TRAIN'), (46760, 'AVL_EVAL'), (46761, 'AVL_TRAIN'), (46793, 'AVL_EVAL'), (46795, 'AVL_TRAIN'), (46823, 'AVL_EVAL'), (46826, 'AVL_TRAIN'), (46829, 'AVL_EVAL'), (46833, 'AVL_TRAIN'), (46835, 'AVL_EVAL'), (46836, 'AVL_TRAIN'), (46838, 'AVL_EVAL'), (46840, 'AVL_TRAIN'), (50773, 'AVL_EVAL'), (50774, 'AVL_TRAIN'), (50776, 'AVL_EVAL'), (50777, 'AVL_TRAIN'), (50779, 'AVL_EVAL'), (50780, 'AVL_TRAIN'), (53322, 'AVL_EVAL'), (53323, 'AVL_TRAIN'), (54579, 'AVL_EVAL'), (54580, 'AVL_TRAIN'), (54974, 'AVL_EVAL'), (54975, 'AVL_TRAIN'), (56090, 'AVL_EVAL'), (56091, 'AVL_TRAIN'), (56375, 'AVL_EVAL'), (56376, 'AVL_TRAIN'), (61071, 'AVL_EVAL'), (61072, 'AVL_TRAIN'), (61153, 'AVL_EVAL'), (70988, 'UN_AVL'), (86811, 'AVL_TRAIN'), (89816, 'UN_AVL'), (104377, 'AVL_TRAIN'), (104380, 'UN_AVL'), (104381, 'AVL_TRAIN'), (111192, 'UN_AVL'), (111193, 'AVL_TRAIN'), (111361, 'UN_AVL'), (111361, 'AVL_TRAIN'), (111374, 'UN_AVL'), (111374, 'AVL_TRAIN'), (111448, 'UN_AVL'), (111450, 'AVL_TRAIN'), (111454, 'UN_AVL'), (111456, 'AVL_TRAIN'), (111458, 'UN_AVL'), (111459, 'AVL_TRAIN'), (111461, 'UN_AVL'), (111463, 'AVL_TRAIN'), (111469, 'UN_AVL'), (111470, 'AVL_TRAIN'), (111474, 'UN_AVL'), (111476, 'AVL_TRAIN'), (111481, 'UN_AVL'), (111482, 'AVL_TRAIN'), (111484, 'UN_AVL'), (111489, 'AVL_TRAIN'), (111491, 'UN_AVL'), (111492, 'AVL_TRAIN'), (111494, 'UN_AVL'), (111495, 'AVL_TRAIN'), (111500, 'UN_AVL'), (111502, 'AVL_TRAIN'), (111504, 'UN_AVL'), (111505, 'AVL_TRAIN'), (111519, 'UN_AVL'), (111520, 'AVL_TRAIN'), (111525, 'UN_AVL'), (111527, 'AVL_TRAIN'), (111529, 'UN_AVL'), (111530, 'AVL_TRAIN'), (111532, 'UN_AVL'), (111533, 'AVL_TRAIN'), (111535, 'UN_AVL'), (111536, 'AVL_TRAIN'), (111540, 'UN_AVL'), (111542, 'AVL_TRAIN'), (112928, 'UN_AVL'), (112928, 'AVL_TRAIN'), (114430, 'UN_AVL'), (114431, 'AVL_TRAIN'), (114432, 'UN_AVL'), (114433, 'AVL_TRAIN'), (114804, 'UN_AVL'), (114805, 'AVL_TRAIN'), (114838, 'UN_AVL'), (114839, 'AVL_TRAIN'), (114871, 'UN_AVL'), (114872, 'AVL_TRAIN'), (115096, 'UN_AVL'), (115097, 'AVL_TRAIN'), (115129, 'UN_AVL'), (115130, 'AVL_TRAIN'), (115136, 'UN_AVL'), (115137, 'AVL_TRAIN'), (115915, 'UN_AVL'), (115921, 'AVL_TRAIN'), (115923, 'UN_AVL'), (115926, 'AVL_TRAIN'), (115927, 'UN_AVL'), (115932, 'AVL_TRAIN'), (115933, 'UN_AVL'), (115934, 'AVL_TRAIN'), (115937, 'UN_AVL'), (115938, 'AVL_TRAIN'), (149100, 'AVL_EVAL'), (165210, 'UN_AVL'), (189348, 'AVL_TRAIN'), (189350, 'UN_AVL'), (189351, 'AVL_TRAIN'), (189368, 'UN_AVL'), (189368, 'AVL_TRAIN'), (189603, 'UN_AVL'), (189608, 'AVL_TRAIN'), (190099, 'UN_AVL'), (190101, 'AVL_TRAIN'), (190102, 'UN_AVL'), (190103, 'AVL_TRAIN'), (190105, 'UN_AVL'), (190106, 'AVL_TRAIN'), (190202, 'UN_AVL'), (190203, 'AVL_TRAIN'), (190206, 'UN_AVL'), (190207, 'AVL_TRAIN'), (190209, 'UN_AVL'), (190213, 'AVL_TRAIN'), (191724, 'UN_AVL'), (191725, 'AVL_TRAIN'), (193126, 'UN_AVL'), (193140, 'AVL_TRAIN'), (193142, 'UN_AVL'), (193188, 'AVL_TRAIN'), (195742, 'UN_AVL'), (195743, 'AVL_TRAIN'), (198854, 'UN_AVL'), (198856, 'AVL_TRAIN'), (198858, 'UN_AVL'), (198859, 'AVL_TRAIN'), (199155, 'UN_AVL'), (199156, 'AVL_TRAIN'), (199158, 'UN_AVL'), (199159, 'AVL_TRAIN'), (199161, 'UN_AVL'), (199163, 'AVL_TRAIN'), (199178, 'UN_AVL'), (199179, 'AVL_TRAIN'), (199181, 'UN_AVL'), (199182, 'AVL_TRAIN'), (199185, 'UN_AVL'), (199187, 'AVL_TRAIN'), (199189, 'UN_AVL'), (199220, 'AVL_TRAIN'), (199221, 'UN_AVL'), (199228, 'AVL_TRAIN'), (199233, 'UN_AVL'), (199249, 'AVL_TRAIN'), (199314, 'UN_AVL'), (199316, 'AVL_TRAIN'), (199319, 'UN_AVL'), (199322, 'AVL_TRAIN'), (199323, 'UN_AVL'), (199324, 'AVL_TRAIN'), (199354, 'UN_AVL'), (200142, 'AVL_TRAIN'), (200177, 'UN_AVL'), (200180, 'AVL_TRAIN'), (200186, 'UN_AVL'), (232022, 'AVL_TRAIN'), (232027, 'UN_AVL'), (232029, 'AVL_TRAIN'), (232031, 'UN_AVL'), (232032, 'AVL_TRAIN'), (233237, 'UN_AVL'), (233238, 'AVL_TRAIN'), (233249, 'UN_AVL'), (233251, 'AVL_TRAIN'), (233254, 'UN_AVL'), (233271, 'AVL_TRAIN'), (233282, 'UN_AVL'), (233284, 'AVL_TRAIN'), (233287, 'UN_AVL'), (233289, 'AVL_TRAIN'), (233294, 'UN_AVL'), (233295, 'AVL_TRAIN'), (233388, 'UN_AVL'), (233389, 'AVL_TRAIN'), (236099, 'UN_AVL'), (236100, 'AVL_TRAIN'), (236101, 'UN_AVL'), (236102, 'AVL_TRAIN'), (236132, 'UN_AVL'), (236134, 'AVL_TRAIN'), (236145, 'UN_AVL'), (236146, 'AVL_TRAIN'), (236148, 'UN_AVL'), (236149, 'AVL_TRAIN'), (236165, 'UN_AVL'), (236167, 'AVL_TRAIN'), (237499, 'UN_AVL'), (237500, 'AVL_TRAIN'), (237501, 'UN_AVL'), (237501, 'AVL_TRAIN'), (237502, 'UN_AVL'), (237504, 'AVL_TRAIN'), (237514, 'UN_AVL'), (237518, 'AVL_TRAIN'), (237554, 'UN_AVL'), (237563, 'AVL_TRAIN'), (237576, 'UN_AVL'), (237585, 'AVL_TRAIN'), (237586, 'UN_AVL'), (237586, 'AVL_TRAIN'), (237587, 'UN_AVL'), (237587, 'AVL_TRAIN'), (237605, 'UN_AVL'), (237605, 'AVL_TRAIN'), (237606, 'UN_AVL'), (237607, 'AVL_TRAIN'), (237608, 'UN_AVL'), (237610, 'AVL_TRAIN'), (237614, 'UN_AVL'), (237616, 'AVL_TRAIN'), (237618, 'UN_AVL'), (237619, 'AVL_TRAIN'), (237622, 'UN_AVL'), (237630, 'AVL_TRAIN'), (237632, 'UN_AVL'), (237635, 'AVL_TRAIN'), (237636, 'UN_AVL'), (237637, 'AVL_TRAIN'), (237639, 'UN_AVL'), (237640, 'AVL_TRAIN'), (237733, 'UN_AVL'), (237735, 'AVL_TRAIN'), (237875, 'UN_AVL'), (237878, 'AVL_TRAIN'), (237879, 'UN_AVL'), (237879, 'AVL_TRAIN'), (237883, 'UN_AVL'), (237885, 'AVL_TRAIN'), (238558, 'UN_AVL'), (238621, 'AVL_TRAIN'), (238624, 'UN_AVL'), (238729, 'AVL_TRAIN'), (239419, 'UN_AVL'), (239420, 'AVL_TRAIN'), (245185, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_18.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_18.json index 8666dfea6..33314d48d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_18.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_18.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (26654, 'AVL_TRAIN'), (46189, 'AVL_EVAL'), (99957, 'UN_AVL'), (136656, 'AVL_EVAL'), (151128, 'UN_AVL'), (196350, 'AVL_TRAIN'), (202296, 'UN_AVL'), (229684, 'AVL_TRAIN'), (232670, 'AVL_EVAL'), (237324, 'UN_AVL'), (315306, 'AVL_TRAIN'), (320133, 'AVL_EVAL'), (321773, 'UN_AVL'), (345671, 'AVL_TRAIN'), (357090, 'AVL_EVAL'), (414921, 'UN_AVL'), (478397, 'AVL_TRAIN'), (482409, 'AVL_EVAL'), (488483, 'AVL_TRAIN'), (490407, 'AVL_EVAL'), (508615, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (26654, 'AVL_TRAIN'), (46189, 'AVL_EVAL'), (70035, 'UN_AVL'), (196350, 'AVL_TRAIN'), (202296, 'UN_AVL'), (229684, 'AVL_TRAIN'), (232670, 'UN_AVL'), (315306, 'AVL_TRAIN'), (320133, 'UN_AVL'), (345671, 'AVL_TRAIN'), (357090, 'AVL_EVAL'), (396783, 'UN_AVL'), (478397, 'AVL_TRAIN'), (482409, 'UN_AVL'), (488483, 'AVL_TRAIN'), (490407, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_19.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_19.json index 30eaa2393..4c07c309a 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_19.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_19.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "47.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9203, 'AVL_EVAL'), (72571, 'UN_AVL'), (72571, 'AVL_TRAIN'), (75048, 'UN_AVL'), (84940, 'AVL_TRAIN'), (85051, 'UN_AVL'), (85065, 'AVL_TRAIN'), (87122, 'UN_AVL'), (87660, 'AVL_TRAIN'), (88289, 'UN_AVL'), (94131, 'AVL_TRAIN'), (94838, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (72571, 'AVL_TRAIN'), (75048, 'UN_AVL'), (84940, 'AVL_TRAIN'), (85051, 'UN_AVL'), (85065, 'AVL_TRAIN'), (87122, 'UN_AVL'), (87660, 'AVL_TRAIN'), (88289, 'UN_AVL'), (94131, 'AVL_TRAIN'), (94838, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_2.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_2.json index f0d15735f..dc9a001b5 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_2.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_2.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (391492, 'AVL_TRAIN'), (392455, 'UN_AVL'), (392456, 'AVL_TRAIN'), (397781, 'UN_AVL'), (397813, 'AVL_TRAIN'), (403293, 'UN_AVL'), (406742, 'AVL_TRAIN'), (407792, 'UN_AVL'), (408447, 'AVL_TRAIN'), (408449, 'UN_AVL'), (408449, 'AVL_TRAIN'), (408451, 'UN_AVL'), (408459, 'AVL_TRAIN'), (409921, 'UN_AVL'), (409923, 'AVL_TRAIN'), (409935, 'UN_AVL'), (413045, 'AVL_TRAIN'), (413146, 'UN_AVL'), (413147, 'AVL_TRAIN'), (413162, 'UN_AVL'), (413239, 'AVL_TRAIN'), (413248, 'UN_AVL'), (413287, 'AVL_TRAIN'), (413316, 'UN_AVL'), (413332, 'AVL_TRAIN'), (413376, 'UN_AVL'), (413385, 'AVL_TRAIN'), (413419, 'UN_AVL'), (413434, 'AVL_TRAIN'), (413488, 'UN_AVL'), (413512, 'AVL_TRAIN'), (414931, 'UN_AVL'), (414942, 'AVL_TRAIN'), (415602, 'UN_AVL'), (415614, 'AVL_TRAIN'), (415705, 'UN_AVL'), (415728, 'AVL_TRAIN'), (416902, 'UN_AVL'), (416915, 'AVL_TRAIN'), (419524, 'UN_AVL'), (419526, 'AVL_TRAIN'), (419575, 'UN_AVL'), (419575, 'AVL_TRAIN'), (420540, 'UN_AVL'), (420848, 'AVL_TRAIN'), (420987, 'UN_AVL'), (420989, 'AVL_TRAIN'), (421302, 'UN_AVL'), (421387, 'AVL_TRAIN'), (422656, 'UN_AVL'), (422658, 'AVL_TRAIN'), (422659, 'UN_AVL'), (422660, 'AVL_TRAIN'), (422727, 'UN_AVL'), (435710, 'AVL_EVAL'), (435710, 'AVL_TRAIN'), (438969, 'AVL_EVAL'), (454335, 'UN_AVL'), (462425, 'AVL_TRAIN'), (462830, 'UN_AVL'), (462830, 'AVL_TRAIN'), (467985, 'UN_AVL'), (468378, 'AVL_TRAIN'), (468426, 'UN_AVL'), (470475, 'AVL_TRAIN'), (471678, 'UN_AVL'), (471715, 'AVL_TRAIN'), (477047, 'UN_AVL'), (477048, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (391492, 'AVL_TRAIN'), (392455, 'UN_AVL'), (392456, 'AVL_TRAIN'), (397781, 'UN_AVL'), (397813, 'AVL_TRAIN'), (403293, 'UN_AVL'), (406742, 'AVL_TRAIN'), (407792, 'UN_AVL'), (408447, 'AVL_TRAIN'), (408449, 'UN_AVL'), (408449, 'AVL_TRAIN'), (408451, 'UN_AVL'), (408459, 'AVL_TRAIN'), (409921, 'UN_AVL'), (409923, 'AVL_TRAIN'), (409935, 'UN_AVL'), (413045, 'AVL_TRAIN'), (413146, 'UN_AVL'), (413147, 'AVL_TRAIN'), (413162, 'UN_AVL'), (413239, 'AVL_TRAIN'), (413248, 'UN_AVL'), (413287, 'AVL_TRAIN'), (413316, 'UN_AVL'), (413332, 'AVL_TRAIN'), (413376, 'UN_AVL'), (413385, 'AVL_TRAIN'), (413419, 'UN_AVL'), (413434, 'AVL_TRAIN'), (413488, 'UN_AVL'), (413512, 'AVL_TRAIN'), (414931, 'UN_AVL'), (414942, 'AVL_TRAIN'), (415602, 'UN_AVL'), (415614, 'AVL_TRAIN'), (415705, 'UN_AVL'), (415728, 'AVL_TRAIN'), (416902, 'UN_AVL'), (416915, 'AVL_TRAIN'), (419524, 'UN_AVL'), (419526, 'AVL_TRAIN'), (419575, 'UN_AVL'), (419575, 'AVL_TRAIN'), (420540, 'UN_AVL'), (420848, 'AVL_TRAIN'), (420987, 'UN_AVL'), (420989, 'AVL_TRAIN'), (421302, 'UN_AVL'), (421387, 'AVL_TRAIN'), (422656, 'UN_AVL'), (422658, 'AVL_TRAIN'), (422659, 'UN_AVL'), (422660, 'AVL_TRAIN'), (422727, 'UN_AVL'), (435710, 'AVL_TRAIN'), (438969, 'AVL_EVAL'), (450043, 'UN_AVL'), (462425, 'AVL_TRAIN'), (462830, 'UN_AVL'), (462830, 'AVL_TRAIN'), (467985, 'UN_AVL'), (468378, 'AVL_TRAIN'), (468426, 'UN_AVL'), (470475, 'AVL_TRAIN'), (471678, 'UN_AVL'), (471715, 'AVL_TRAIN'), (477047, 'UN_AVL'), (477048, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_20.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_20.json index a196f6944..bc8b1fab4 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_20.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_20.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (313212, 'AVL_EVAL'), (334653, 'UN_AVL'), (356385, 'AVL_TRAIN'), (356387, 'UN_AVL'), (356412, 'AVL_TRAIN'), (356413, 'UN_AVL'), (356416, 'AVL_TRAIN'), (356419, 'UN_AVL'), (356420, 'AVL_TRAIN'), (356428, 'UN_AVL'), (356429, 'AVL_TRAIN'), (356440, 'UN_AVL'), (356441, 'AVL_TRAIN'), (356444, 'UN_AVL'), (356444, 'AVL_TRAIN'), (356450, 'UN_AVL'), (356451, 'AVL_TRAIN'), (356454, 'UN_AVL'), (356459, 'AVL_TRAIN'), (356466, 'UN_AVL'), (356466, 'AVL_TRAIN'), (356478, 'UN_AVL'), (356479, 'AVL_TRAIN'), (356490, 'UN_AVL'), (356491, 'AVL_TRAIN'), (356495, 'UN_AVL'), (356502, 'AVL_TRAIN'), (356503, 'UN_AVL'), (356522, 'AVL_TRAIN'), (356523, 'UN_AVL'), (356524, 'AVL_TRAIN'), (356527, 'UN_AVL'), (356561, 'AVL_TRAIN'), (377020, 'AVL_EVAL'), (379578, 'AVL_TRAIN'), (379582, 'AVL_EVAL'), (379583, 'AVL_TRAIN'), (379585, 'AVL_EVAL'), (379586, 'AVL_TRAIN'), (379590, 'AVL_EVAL'), (379592, 'AVL_TRAIN'), (381039, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (313212, 'AVL_EVAL'), (325109, 'UN_AVL'), (356385, 'AVL_TRAIN'), (356387, 'UN_AVL'), (356412, 'AVL_TRAIN'), (356413, 'UN_AVL'), (356416, 'AVL_TRAIN'), (356419, 'UN_AVL'), (356420, 'AVL_TRAIN'), (356428, 'UN_AVL'), (356429, 'AVL_TRAIN'), (356440, 'UN_AVL'), (356441, 'AVL_TRAIN'), (356444, 'UN_AVL'), (356444, 'AVL_TRAIN'), (356450, 'UN_AVL'), (356451, 'AVL_TRAIN'), (356454, 'UN_AVL'), (356459, 'AVL_TRAIN'), (356466, 'UN_AVL'), (356466, 'AVL_TRAIN'), (356478, 'UN_AVL'), (356479, 'AVL_TRAIN'), (356490, 'UN_AVL'), (356491, 'AVL_TRAIN'), (356495, 'UN_AVL'), (356502, 'AVL_TRAIN'), (356503, 'UN_AVL'), (356522, 'AVL_TRAIN'), (356523, 'UN_AVL'), (356524, 'AVL_TRAIN'), (356527, 'UN_AVL'), (356561, 'AVL_TRAIN'), (377020, 'AVL_EVAL'), (379578, 'AVL_TRAIN'), (379582, 'AVL_EVAL'), (379583, 'AVL_TRAIN'), (379585, 'AVL_EVAL'), (379586, 'AVL_TRAIN'), (379590, 'AVL_EVAL'), (379592, 'AVL_TRAIN'), (381039, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_21.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_21.json index c2f40b27a..74b116d86 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_21.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_21.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (828, 'AVL_TRAIN'), (3072, 'UN_AVL'), (12583, 'AVL_TRAIN'), (12895, 'UN_AVL'), (12897, 'AVL_TRAIN'), (12917, 'UN_AVL'), (12943, 'AVL_TRAIN'), (13050, 'UN_AVL'), (13056, 'AVL_TRAIN'), (13057, 'UN_AVL'), (13080, 'AVL_TRAIN'), (13265, 'UN_AVL'), (13385, 'AVL_TRAIN'), (13390, 'UN_AVL'), (69177, 'AVL_EVAL'), (115212, 'UN_AVL'), (141110, 'AVL_EVAL'), (141110, 'AVL_TRAIN'), (144187, 'AVL_EVAL'), (192607, 'UN_AVL'), (241710, 'AVL_TRAIN'), (242453, 'UN_AVL'), (257108, 'AVL_TRAIN'), (262679, 'AVL_EVAL'), (292973, 'UN_AVL'), (377424, 'AVL_EVAL'), (399833, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (828, 'AVL_TRAIN'), (3072, 'UN_AVL'), (12583, 'AVL_TRAIN'), (12895, 'UN_AVL'), (12897, 'AVL_TRAIN'), (12917, 'UN_AVL'), (12943, 'AVL_TRAIN'), (13050, 'UN_AVL'), (13056, 'AVL_TRAIN'), (13057, 'UN_AVL'), (13080, 'AVL_TRAIN'), (13265, 'UN_AVL'), (13385, 'AVL_TRAIN'), (13390, 'UN_AVL'), (69177, 'AVL_EVAL'), (89108, 'UN_AVL'), (141110, 'AVL_EVAL'), (141110, 'AVL_TRAIN'), (144187, 'AVL_EVAL'), (182611, 'UN_AVL'), (241710, 'AVL_TRAIN'), (242453, 'UN_AVL'), (257108, 'AVL_TRAIN'), (262679, 'AVL_EVAL'), (272391, 'UN_AVL'), (377424, 'AVL_EVAL'), (399833, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_22.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_22.json index ec3c7a3ca..9808c7d0d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_22.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_22.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2866, 'AVL_TRAIN'), (5183, 'AVL_EVAL'), (64497, 'UN_AVL'), (73502, 'AVL_EVAL'), (79246, 'UN_AVL'), (84598, 'AVL_TRAIN'), (137675, 'AVL_EVAL'), (155633, 'AVL_TRAIN'), (155946, 'AVL_EVAL'), (155946, 'AVL_TRAIN'), (155956, 'AVL_EVAL'), (171708, 'UN_AVL'), (188215, 'AVL_EVAL'), (233701, 'AVL_TRAIN'), (236332, 'AVL_EVAL'), (254099, 'UN_AVL'), (331378, 'AVL_EVAL'), (348460, 'UN_AVL'), (348488, 'AVL_TRAIN'), (352131, 'AVL_EVAL'), (413676, 'UN_AVL'), (443938, 'AVL_EVAL'), (459441, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2866, 'AVL_TRAIN'), (5183, 'UN_AVL'), (50090, 'AVL_EVAL'), (59204, 'UN_AVL'), (84598, 'AVL_TRAIN'), (137675, 'AVL_EVAL'), (150458, 'UN_AVL'), (155633, 'AVL_TRAIN'), (155946, 'UN_AVL'), (155946, 'AVL_TRAIN'), (155956, 'UN_AVL'), (160644, 'AVL_EVAL'), (161386, 'UN_AVL'), (188215, 'AVL_EVAL'), (233701, 'UN_AVL'), (233701, 'AVL_TRAIN'), (236332, 'AVL_EVAL'), (245196, 'UN_AVL'), (331378, 'AVL_EVAL'), (338061, 'UN_AVL'), (348488, 'AVL_TRAIN'), (352131, 'AVL_EVAL'), (356504, 'UN_AVL'), (403009, 'AVL_EVAL'), (408689, 'UN_AVL'), (443938, 'AVL_EVAL'), (454764, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_23.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_23.json index d69d00ce7..348df7a00 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_23.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_23.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (44154, 'UN_AVL'), (80604, 'AVL_TRAIN'), (86390, 'AVL_EVAL'), (138512, 'UN_AVL'), (153815, 'AVL_TRAIN'), (159369, 'AVL_EVAL'), (163051, 'AVL_TRAIN'), (163162, 'AVL_EVAL'), (185967, 'UN_AVL'), (232745, 'AVL_TRAIN'), (241563, 'AVL_EVAL'), (266498, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (4167, 'UN_AVL'), (80604, 'AVL_TRAIN'), (86390, 'AVL_EVAL'), (96018, 'UN_AVL'), (153815, 'AVL_TRAIN'), (159369, 'AVL_EVAL'), (163051, 'AVL_TRAIN'), (163162, 'AVL_EVAL'), (174377, 'UN_AVL'), (232745, 'AVL_TRAIN'), (241563, 'AVL_EVAL'), (259340, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_24.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_24.json index d763e6ebd..e7c5efd43 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_24.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_24.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1435, 'AVL_EVAL'), (39044, 'AVL_TRAIN'), (42386, 'AVL_EVAL'), (70496, 'UN_AVL'), (94915, 'AVL_TRAIN'), (95587, 'UN_AVL'), (97037, 'AVL_TRAIN'), (97213, 'UN_AVL'), (99256, 'AVL_TRAIN'), (100235, 'UN_AVL'), (100236, 'AVL_TRAIN'), (100256, 'UN_AVL'), (100256, 'AVL_TRAIN'), (100266, 'UN_AVL'), (126438, 'AVL_TRAIN'), (127763, 'UN_AVL'), (127764, 'AVL_TRAIN'), (129273, 'UN_AVL'), (131869, 'AVL_TRAIN'), (135147, 'AVL_EVAL'), (148611, 'AVL_TRAIN'), (153451, 'AVL_EVAL'), (164152, 'UN_AVL'), (175954, 'AVL_TRAIN'), (176589, 'UN_AVL'), (219108, 'AVL_TRAIN'), (224999, 'AVL_EVAL'), (228056, 'UN_AVL'), (236840, 'AVL_TRAIN'), (237398, 'UN_AVL'), (244809, 'AVL_TRAIN'), (247758, 'UN_AVL'), (252048, 'AVL_TRAIN'), (256597, 'UN_AVL'), (299838, 'AVL_TRAIN'), (316249, 'AVL_EVAL'), (337094, 'UN_AVL'), (347628, 'AVL_TRAIN'), (348846, 'UN_AVL'), (349017, 'AVL_TRAIN'), (349084, 'UN_AVL'), (350300, 'AVL_TRAIN'), (350339, 'UN_AVL'), (350340, 'AVL_TRAIN'), (353682, 'UN_AVL'), (354932, 'AVL_TRAIN'), (354941, 'UN_AVL'), (354942, 'AVL_TRAIN'), (355114, 'UN_AVL'), (355115, 'AVL_TRAIN'), (355308, 'UN_AVL'), (355310, 'AVL_TRAIN'), (355313, 'UN_AVL'), (355315, 'AVL_TRAIN'), (355317, 'UN_AVL'), (355318, 'AVL_TRAIN'), (356801, 'UN_AVL'), (356801, 'AVL_TRAIN'), (357089, 'UN_AVL'), (357089, 'AVL_TRAIN'), (357311, 'UN_AVL'), (357312, 'AVL_TRAIN'), (357313, 'UN_AVL'), (358086, 'AVL_TRAIN'), (358844, 'UN_AVL'), (358846, 'AVL_TRAIN'), (358886, 'UN_AVL'), (366429, 'AVL_TRAIN'), (388336, 'AVL_EVAL'), (414433, 'AVL_TRAIN'), (421914, 'AVL_EVAL'), (448779, 'AVL_TRAIN'), (449766, 'AVL_EVAL'), (490033, 'UN_AVL'), (492093, 'AVL_TRAIN'), (496041, 'AVL_EVAL'), (496044, 'AVL_TRAIN'), (496052, 'AVL_EVAL'), (510524, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1435, 'AVL_EVAL'), (19222, 'UN_AVL'), (39044, 'AVL_TRAIN'), (42386, 'AVL_EVAL'), (54366, 'UN_AVL'), (94915, 'AVL_TRAIN'), (95587, 'UN_AVL'), (97037, 'AVL_TRAIN'), (97213, 'UN_AVL'), (99256, 'AVL_TRAIN'), (100235, 'UN_AVL'), (100236, 'AVL_TRAIN'), (100256, 'UN_AVL'), (100256, 'AVL_TRAIN'), (100266, 'UN_AVL'), (126438, 'AVL_TRAIN'), (127763, 'UN_AVL'), (127764, 'AVL_TRAIN'), (129273, 'UN_AVL'), (131869, 'AVL_TRAIN'), (135147, 'UN_AVL'), (148611, 'AVL_TRAIN'), (153451, 'UN_AVL'), (175954, 'AVL_TRAIN'), (176589, 'UN_AVL'), (219108, 'AVL_TRAIN'), (224999, 'UN_AVL'), (236840, 'AVL_TRAIN'), (237398, 'UN_AVL'), (244809, 'AVL_TRAIN'), (247758, 'UN_AVL'), (252048, 'AVL_TRAIN'), (256597, 'UN_AVL'), (299838, 'AVL_TRAIN'), (316249, 'AVL_EVAL'), (337094, 'UN_AVL'), (347628, 'AVL_TRAIN'), (348846, 'UN_AVL'), (349017, 'AVL_TRAIN'), (349084, 'UN_AVL'), (350300, 'AVL_TRAIN'), (350339, 'UN_AVL'), (350340, 'AVL_TRAIN'), (353682, 'UN_AVL'), (354932, 'AVL_TRAIN'), (354941, 'UN_AVL'), (354942, 'AVL_TRAIN'), (355114, 'UN_AVL'), (355115, 'AVL_TRAIN'), (355308, 'UN_AVL'), (355310, 'AVL_TRAIN'), (355313, 'UN_AVL'), (355315, 'AVL_TRAIN'), (355317, 'UN_AVL'), (355318, 'AVL_TRAIN'), (356801, 'UN_AVL'), (356801, 'AVL_TRAIN'), (357089, 'UN_AVL'), (357089, 'AVL_TRAIN'), (357311, 'UN_AVL'), (357312, 'AVL_TRAIN'), (357313, 'UN_AVL'), (358086, 'AVL_TRAIN'), (358844, 'UN_AVL'), (358846, 'AVL_TRAIN'), (358886, 'UN_AVL'), (366429, 'AVL_TRAIN'), (388336, 'AVL_EVAL'), (406981, 'UN_AVL'), (414433, 'AVL_TRAIN'), (421914, 'AVL_EVAL'), (442350, 'UN_AVL'), (448779, 'AVL_TRAIN'), (449766, 'AVL_EVAL'), (469807, 'UN_AVL'), (492093, 'AVL_TRAIN'), (496041, 'UN_AVL'), (496044, 'AVL_TRAIN'), (496052, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_25.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_25.json index 5f030c350..de5e0c0b9 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_25.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_25.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (186155, 'AVL_TRAIN'), (186873, 'UN_AVL'), (186876, 'AVL_TRAIN'), (187668, 'UN_AVL'), (188128, 'AVL_TRAIN'), (189551, 'UN_AVL'), (190013, 'AVL_TRAIN'), (190611, 'UN_AVL'), (226790, 'AVL_EVAL'), (264325, 'UN_AVL'), (314991, 'AVL_TRAIN'), (316045, 'UN_AVL'), (322000, 'AVL_TRAIN'), (322996, 'UN_AVL'), (322998, 'AVL_TRAIN'), (323006, 'UN_AVL'), (333884, 'AVL_TRAIN'), (334665, 'UN_AVL'), (355081, 'AVL_TRAIN'), (355087, 'UN_AVL'), (355087, 'AVL_TRAIN'), (355092, 'UN_AVL'), (359729, 'AVL_EVAL'), (426325, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (186155, 'AVL_TRAIN'), (186873, 'UN_AVL'), (186876, 'AVL_TRAIN'), (187668, 'UN_AVL'), (188128, 'AVL_TRAIN'), (189551, 'UN_AVL'), (190013, 'AVL_TRAIN'), (190611, 'UN_AVL'), (226790, 'AVL_EVAL'), (241384, 'UN_AVL'), (314991, 'AVL_TRAIN'), (316045, 'UN_AVL'), (322000, 'AVL_TRAIN'), (322996, 'UN_AVL'), (322998, 'AVL_TRAIN'), (323006, 'UN_AVL'), (333884, 'AVL_TRAIN'), (334665, 'UN_AVL'), (355081, 'AVL_TRAIN'), (355087, 'UN_AVL'), (355087, 'AVL_TRAIN'), (355092, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_26.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_26.json index ecdd3ee39..fd62b9f08 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_26.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_26.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8428, 'AVL_TRAIN'), (28464, 'AVL_EVAL'), (47374, 'UN_AVL'), (78938, 'AVL_TRAIN'), (98544, 'AVL_EVAL'), (116497, 'UN_AVL'), (126713, 'AVL_TRAIN'), (134789, 'AVL_EVAL'), (170734, 'UN_AVL'), (192337, 'AVL_TRAIN'), (203506, 'AVL_EVAL'), (237632, 'UN_AVL'), (244107, 'AVL_TRAIN'), (246582, 'UN_AVL'), (249472, 'AVL_TRAIN'), (259857, 'AVL_EVAL'), (312106, 'UN_AVL'), (312106, 'AVL_TRAIN'), (328650, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8428, 'AVL_TRAIN'), (28464, 'AVL_EVAL'), (35903, 'UN_AVL'), (78938, 'AVL_TRAIN'), (98544, 'AVL_EVAL'), (109777, 'UN_AVL'), (126713, 'AVL_TRAIN'), (134789, 'AVL_EVAL'), (160415, 'UN_AVL'), (192337, 'AVL_TRAIN'), (203506, 'AVL_EVAL'), (213767, 'UN_AVL'), (244107, 'AVL_TRAIN'), (246582, 'UN_AVL'), (249472, 'AVL_TRAIN'), (259857, 'AVL_EVAL'), (271897, 'UN_AVL'), (312106, 'AVL_TRAIN'), (328650, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_27.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_27.json index 862a9bd32..44ee2dfa0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_27.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_27.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (12984, 'UN_AVL'), (13124, 'AVL_TRAIN'), (16085, 'UN_AVL'), (16085, 'AVL_TRAIN'), (16102, 'UN_AVL'), (16689, 'AVL_TRAIN'), (52609, 'AVL_EVAL'), (59381, 'UN_AVL'), (137700, 'AVL_EVAL'), (141364, 'UN_AVL'), (144363, 'AVL_TRAIN'), (144366, 'UN_AVL'), (144370, 'AVL_TRAIN'), (144915, 'UN_AVL'), (144915, 'AVL_TRAIN'), (154153, 'AVL_EVAL'), (168218, 'UN_AVL'), (171748, 'AVL_TRAIN'), (171752, 'UN_AVL'), (171755, 'AVL_TRAIN'), (171758, 'UN_AVL'), (171766, 'AVL_TRAIN'), (171770, 'UN_AVL'), (171770, 'AVL_TRAIN'), (177927, 'UN_AVL'), (177928, 'AVL_TRAIN'), (178050, 'UN_AVL'), (178050, 'AVL_TRAIN'), (178058, 'UN_AVL'), (178058, 'AVL_TRAIN'), (178059, 'UN_AVL'), (178059, 'AVL_TRAIN'), (178060, 'UN_AVL'), (178060, 'AVL_TRAIN'), (178064, 'UN_AVL'), (178064, 'AVL_TRAIN'), (178083, 'UN_AVL'), (178083, 'AVL_TRAIN'), (178084, 'UN_AVL'), (178084, 'AVL_TRAIN'), (178087, 'UN_AVL'), (178087, 'AVL_TRAIN'), (178088, 'UN_AVL'), (178088, 'AVL_TRAIN'), (178090, 'UN_AVL'), (178090, 'AVL_TRAIN'), (178091, 'UN_AVL'), (178092, 'AVL_TRAIN'), (178093, 'UN_AVL'), (178093, 'AVL_TRAIN'), (178106, 'UN_AVL'), (178106, 'AVL_TRAIN'), (178116, 'UN_AVL'), (178117, 'AVL_TRAIN'), (178118, 'UN_AVL'), (178118, 'AVL_TRAIN'), (178120, 'UN_AVL'), (178120, 'AVL_TRAIN'), (178121, 'UN_AVL'), (178121, 'AVL_TRAIN'), (178138, 'UN_AVL'), (178138, 'AVL_TRAIN'), (178139, 'UN_AVL'), (178139, 'AVL_TRAIN'), (178140, 'UN_AVL'), (178140, 'AVL_TRAIN'), (178141, 'UN_AVL'), (178141, 'AVL_TRAIN'), (178142, 'UN_AVL'), (178142, 'AVL_TRAIN'), (178143, 'UN_AVL'), (178143, 'AVL_TRAIN'), (178144, 'UN_AVL'), (178144, 'AVL_TRAIN'), (178145, 'UN_AVL'), (178145, 'AVL_TRAIN'), (178146, 'UN_AVL'), (178147, 'AVL_TRAIN'), (178155, 'UN_AVL'), (178155, 'AVL_TRAIN'), (178157, 'UN_AVL'), (178158, 'AVL_TRAIN'), (178159, 'UN_AVL'), (178159, 'AVL_TRAIN'), (178160, 'UN_AVL'), (178160, 'AVL_TRAIN'), (178161, 'UN_AVL'), (178162, 'AVL_TRAIN'), (178163, 'UN_AVL'), (178163, 'AVL_TRAIN'), (178164, 'UN_AVL'), (178164, 'AVL_TRAIN'), (178166, 'UN_AVL'), (178167, 'AVL_TRAIN'), (178168, 'UN_AVL'), (178168, 'AVL_TRAIN'), (178170, 'UN_AVL'), (178170, 'AVL_TRAIN'), (178171, 'UN_AVL'), (178171, 'AVL_TRAIN'), (178174, 'UN_AVL'), (178174, 'AVL_TRAIN'), (178175, 'UN_AVL'), (178176, 'AVL_TRAIN'), (178177, 'UN_AVL'), (178177, 'AVL_TRAIN'), (178179, 'UN_AVL'), (178180, 'AVL_TRAIN'), (178181, 'UN_AVL'), (178181, 'AVL_TRAIN'), (178185, 'UN_AVL'), (178185, 'AVL_TRAIN'), (178186, 'UN_AVL'), (178186, 'AVL_TRAIN'), (178188, 'UN_AVL'), (178188, 'AVL_TRAIN'), (178191, 'UN_AVL'), (178191, 'AVL_TRAIN'), (178192, 'UN_AVL'), (178193, 'AVL_TRAIN'), (178194, 'UN_AVL'), (178194, 'AVL_TRAIN'), (178196, 'UN_AVL'), (178196, 'AVL_TRAIN'), (178200, 'UN_AVL'), (178201, 'AVL_TRAIN'), (184920, 'AVL_EVAL'), (186238, 'UN_AVL'), (189880, 'AVL_TRAIN'), (209457, 'AVL_EVAL'), (209460, 'AVL_TRAIN'), (209463, 'AVL_EVAL'), (261757, 'UN_AVL'), (263299, 'AVL_TRAIN'), (272259, 'UN_AVL'), (272259, 'AVL_TRAIN'), (272261, 'UN_AVL'), (272262, 'AVL_TRAIN'), (272272, 'UN_AVL'), (272272, 'AVL_TRAIN'), (272277, 'UN_AVL'), (272277, 'AVL_TRAIN'), (272279, 'UN_AVL'), (272279, 'AVL_TRAIN'), (272564, 'UN_AVL'), (272565, 'AVL_TRAIN'), (273146, 'UN_AVL'), (273146, 'AVL_TRAIN'), (273179, 'UN_AVL'), (273179, 'AVL_TRAIN'), (273181, 'UN_AVL'), (273181, 'AVL_TRAIN'), (273182, 'UN_AVL'), (273182, 'AVL_TRAIN'), (273183, 'UN_AVL'), (273183, 'AVL_TRAIN'), (273208, 'UN_AVL'), (273208, 'AVL_TRAIN'), (273212, 'UN_AVL'), (273212, 'AVL_TRAIN'), (273214, 'UN_AVL'), (273214, 'AVL_TRAIN'), (273309, 'UN_AVL'), (273309, 'AVL_TRAIN'), (273317, 'UN_AVL'), (273317, 'AVL_TRAIN'), (273318, 'UN_AVL'), (273318, 'AVL_TRAIN'), (273321, 'UN_AVL'), (273321, 'AVL_TRAIN'), (274613, 'UN_AVL'), (274614, 'AVL_TRAIN'), (274617, 'UN_AVL'), (274617, 'AVL_TRAIN'), (274619, 'UN_AVL'), (274620, 'AVL_TRAIN'), (274622, 'UN_AVL'), (274622, 'AVL_TRAIN'), (274629, 'UN_AVL'), (274629, 'AVL_TRAIN'), (274631, 'UN_AVL'), (274631, 'AVL_TRAIN'), (274635, 'UN_AVL'), (274635, 'AVL_TRAIN'), (274641, 'UN_AVL'), (274641, 'AVL_TRAIN'), (274645, 'UN_AVL'), (274645, 'AVL_TRAIN'), (274649, 'UN_AVL'), (274649, 'AVL_TRAIN'), (274651, 'UN_AVL'), (274651, 'AVL_TRAIN'), (274654, 'UN_AVL'), (274654, 'AVL_TRAIN'), (274656, 'UN_AVL'), (274657, 'AVL_TRAIN'), (274659, 'UN_AVL'), (274659, 'AVL_TRAIN'), (274660, 'UN_AVL'), (274660, 'AVL_TRAIN'), (274661, 'UN_AVL'), (274661, 'AVL_TRAIN'), (274663, 'UN_AVL'), (274664, 'AVL_TRAIN'), (276623, 'AVL_EVAL'), (276659, 'AVL_TRAIN'), (294032, 'AVL_EVAL'), (318786, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (12984, 'UN_AVL'), (13124, 'AVL_TRAIN'), (16085, 'UN_AVL'), (16085, 'AVL_TRAIN'), (16102, 'UN_AVL'), (16689, 'AVL_TRAIN'), (52609, 'AVL_EVAL'), (55879, 'UN_AVL'), (137700, 'AVL_EVAL'), (141364, 'UN_AVL'), (144363, 'AVL_TRAIN'), (144366, 'UN_AVL'), (144370, 'AVL_TRAIN'), (144915, 'UN_AVL'), (144915, 'AVL_TRAIN'), (154153, 'AVL_EVAL'), (163689, 'UN_AVL'), (171748, 'AVL_TRAIN'), (171752, 'UN_AVL'), (171755, 'AVL_TRAIN'), (171758, 'UN_AVL'), (171766, 'AVL_TRAIN'), (171770, 'UN_AVL'), (171770, 'AVL_TRAIN'), (177927, 'UN_AVL'), (177928, 'AVL_TRAIN'), (178050, 'UN_AVL'), (178050, 'AVL_TRAIN'), (178058, 'UN_AVL'), (178058, 'AVL_TRAIN'), (178059, 'UN_AVL'), (178059, 'AVL_TRAIN'), (178060, 'UN_AVL'), (178060, 'AVL_TRAIN'), (178064, 'UN_AVL'), (178064, 'AVL_TRAIN'), (178083, 'UN_AVL'), (178083, 'AVL_TRAIN'), (178084, 'UN_AVL'), (178084, 'AVL_TRAIN'), (178087, 'UN_AVL'), (178087, 'AVL_TRAIN'), (178088, 'UN_AVL'), (178088, 'AVL_TRAIN'), (178090, 'UN_AVL'), (178090, 'AVL_TRAIN'), (178091, 'UN_AVL'), (178092, 'AVL_TRAIN'), (178093, 'UN_AVL'), (178093, 'AVL_TRAIN'), (178106, 'UN_AVL'), (178106, 'AVL_TRAIN'), (178116, 'UN_AVL'), (178117, 'AVL_TRAIN'), (178118, 'UN_AVL'), (178118, 'AVL_TRAIN'), (178120, 'UN_AVL'), (178120, 'AVL_TRAIN'), (178121, 'UN_AVL'), (178121, 'AVL_TRAIN'), (178138, 'UN_AVL'), (178138, 'AVL_TRAIN'), (178139, 'UN_AVL'), (178139, 'AVL_TRAIN'), (178140, 'UN_AVL'), (178140, 'AVL_TRAIN'), (178141, 'UN_AVL'), (178141, 'AVL_TRAIN'), (178142, 'UN_AVL'), (178142, 'AVL_TRAIN'), (178143, 'UN_AVL'), (178143, 'AVL_TRAIN'), (178144, 'UN_AVL'), (178144, 'AVL_TRAIN'), (178145, 'UN_AVL'), (178145, 'AVL_TRAIN'), (178146, 'UN_AVL'), (178147, 'AVL_TRAIN'), (178155, 'UN_AVL'), (178155, 'AVL_TRAIN'), (178157, 'UN_AVL'), (178158, 'AVL_TRAIN'), (178159, 'UN_AVL'), (178159, 'AVL_TRAIN'), (178160, 'UN_AVL'), (178160, 'AVL_TRAIN'), (178161, 'UN_AVL'), (178162, 'AVL_TRAIN'), (178163, 'UN_AVL'), (178163, 'AVL_TRAIN'), (178164, 'UN_AVL'), (178164, 'AVL_TRAIN'), (178166, 'UN_AVL'), (178167, 'AVL_TRAIN'), (178168, 'UN_AVL'), (178168, 'AVL_TRAIN'), (178170, 'UN_AVL'), (178170, 'AVL_TRAIN'), (178171, 'UN_AVL'), (178171, 'AVL_TRAIN'), (178174, 'UN_AVL'), (178174, 'AVL_TRAIN'), (178175, 'UN_AVL'), (178176, 'AVL_TRAIN'), (178177, 'UN_AVL'), (178177, 'AVL_TRAIN'), (178179, 'UN_AVL'), (178180, 'AVL_TRAIN'), (178181, 'UN_AVL'), (178181, 'AVL_TRAIN'), (178185, 'UN_AVL'), (178185, 'AVL_TRAIN'), (178186, 'UN_AVL'), (178186, 'AVL_TRAIN'), (178188, 'UN_AVL'), (178188, 'AVL_TRAIN'), (178191, 'UN_AVL'), (178191, 'AVL_TRAIN'), (178192, 'UN_AVL'), (178193, 'AVL_TRAIN'), (178194, 'UN_AVL'), (178194, 'AVL_TRAIN'), (178196, 'UN_AVL'), (178196, 'AVL_TRAIN'), (178200, 'UN_AVL'), (178201, 'AVL_TRAIN'), (184920, 'UN_AVL'), (189880, 'AVL_TRAIN'), (209457, 'AVL_EVAL'), (209460, 'AVL_TRAIN'), (209463, 'AVL_EVAL'), (227276, 'UN_AVL'), (263299, 'AVL_TRAIN'), (272259, 'UN_AVL'), (272259, 'AVL_TRAIN'), (272261, 'UN_AVL'), (272262, 'AVL_TRAIN'), (272272, 'UN_AVL'), (272272, 'AVL_TRAIN'), (272277, 'UN_AVL'), (272277, 'AVL_TRAIN'), (272279, 'UN_AVL'), (272279, 'AVL_TRAIN'), (272564, 'UN_AVL'), (272565, 'AVL_TRAIN'), (273146, 'UN_AVL'), (273146, 'AVL_TRAIN'), (273179, 'UN_AVL'), (273179, 'AVL_TRAIN'), (273181, 'UN_AVL'), (273181, 'AVL_TRAIN'), (273182, 'UN_AVL'), (273182, 'AVL_TRAIN'), (273183, 'UN_AVL'), (273183, 'AVL_TRAIN'), (273208, 'UN_AVL'), (273208, 'AVL_TRAIN'), (273212, 'UN_AVL'), (273212, 'AVL_TRAIN'), (273214, 'UN_AVL'), (273214, 'AVL_TRAIN'), (273309, 'UN_AVL'), (273309, 'AVL_TRAIN'), (273317, 'UN_AVL'), (273317, 'AVL_TRAIN'), (273318, 'UN_AVL'), (273318, 'AVL_TRAIN'), (273321, 'UN_AVL'), (273321, 'AVL_TRAIN'), (274613, 'UN_AVL'), (274614, 'AVL_TRAIN'), (274617, 'UN_AVL'), (274617, 'AVL_TRAIN'), (274619, 'UN_AVL'), (274620, 'AVL_TRAIN'), (274622, 'UN_AVL'), (274622, 'AVL_TRAIN'), (274629, 'UN_AVL'), (274629, 'AVL_TRAIN'), (274631, 'UN_AVL'), (274631, 'AVL_TRAIN'), (274635, 'UN_AVL'), (274635, 'AVL_TRAIN'), (274641, 'UN_AVL'), (274641, 'AVL_TRAIN'), (274645, 'UN_AVL'), (274645, 'AVL_TRAIN'), (274649, 'UN_AVL'), (274649, 'AVL_TRAIN'), (274651, 'UN_AVL'), (274651, 'AVL_TRAIN'), (274654, 'UN_AVL'), (274654, 'AVL_TRAIN'), (274656, 'UN_AVL'), (274657, 'AVL_TRAIN'), (274659, 'UN_AVL'), (274659, 'AVL_TRAIN'), (274660, 'UN_AVL'), (274660, 'AVL_TRAIN'), (274661, 'UN_AVL'), (274661, 'AVL_TRAIN'), (274663, 'UN_AVL'), (274664, 'AVL_TRAIN'), (276623, 'UN_AVL'), (276659, 'AVL_TRAIN'), (294032, 'AVL_EVAL'), (316371, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_28.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_28.json index 514cfeb28..61d571330 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_28.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_28.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (17912, 'UN_AVL'), (49715, 'AVL_TRAIN'), (59780, 'AVL_EVAL'), (62711, 'UN_AVL'), (73902, 'AVL_TRAIN'), (88800, 'AVL_EVAL'), (146634, 'UN_AVL'), (274738, 'AVL_TRAIN'), (313232, 'AVL_EVAL'), (360068, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (49715, 'AVL_TRAIN'), (59780, 'UN_AVL'), (73902, 'AVL_TRAIN'), (88800, 'AVL_EVAL'), (100118, 'UN_AVL'), (274738, 'AVL_TRAIN'), (313232, 'AVL_EVAL'), (326127, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_29.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_29.json index e82daf674..57ef451ee 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_29.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_29.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (13988, 'AVL_TRAIN'), (16565, 'AVL_EVAL'), (16566, 'AVL_TRAIN'), (16568, 'AVL_EVAL'), (16573, 'AVL_TRAIN'), (16574, 'AVL_EVAL'), (16575, 'AVL_TRAIN'), (17226, 'AVL_EVAL'), (17479, 'AVL_TRAIN'), (25539, 'AVL_EVAL'), (45036, 'AVL_TRAIN'), (51708, 'AVL_EVAL'), (91993, 'UN_AVL'), (94791, 'AVL_TRAIN'), (96644, 'UN_AVL'), (96797, 'AVL_TRAIN'), (96803, 'UN_AVL'), (96834, 'AVL_TRAIN'), (96836, 'UN_AVL'), (96841, 'AVL_TRAIN'), (96844, 'UN_AVL'), (96846, 'AVL_TRAIN'), (96847, 'UN_AVL'), (97137, 'AVL_TRAIN'), (106362, 'AVL_EVAL'), (131185, 'AVL_TRAIN'), (133446, 'AVL_EVAL'), (133677, 'AVL_TRAIN'), (133680, 'AVL_EVAL'), (133682, 'AVL_TRAIN'), (133705, 'AVL_EVAL'), (133706, 'AVL_TRAIN'), (133708, 'AVL_EVAL'), (133709, 'AVL_TRAIN'), (133712, 'AVL_EVAL'), (133716, 'AVL_TRAIN'), (134781, 'AVL_EVAL'), (141673, 'AVL_TRAIN'), (142359, 'AVL_EVAL'), (162181, 'AVL_TRAIN'), (163739, 'AVL_EVAL'), (178541, 'AVL_TRAIN'), (178547, 'AVL_EVAL'), (179975, 'AVL_TRAIN'), (179976, 'AVL_EVAL'), (186814, 'AVL_TRAIN'), (186822, 'AVL_EVAL'), (187001, 'AVL_TRAIN'), (187005, 'AVL_EVAL'), (187006, 'AVL_TRAIN'), (187479, 'AVL_EVAL'), (187480, 'AVL_TRAIN'), (188860, 'AVL_EVAL'), (189043, 'AVL_TRAIN'), (189044, 'AVL_EVAL'), (189055, 'AVL_TRAIN'), (189072, 'AVL_EVAL'), (189640, 'AVL_TRAIN'), (200370, 'AVL_EVAL'), (214023, 'AVL_TRAIN'), (214027, 'AVL_EVAL'), (214575, 'AVL_TRAIN'), (214576, 'AVL_EVAL'), (214576, 'AVL_TRAIN'), (217873, 'AVL_EVAL'), (239732, 'AVL_TRAIN'), (241988, 'AVL_EVAL'), (242070, 'AVL_TRAIN'), (242424, 'AVL_EVAL'), (242425, 'AVL_TRAIN'), (244385, 'AVL_EVAL'), (244386, 'AVL_TRAIN'), (244388, 'AVL_EVAL'), (246282, 'AVL_TRAIN'), (246288, 'AVL_EVAL'), (263815, 'AVL_TRAIN'), (263816, 'AVL_EVAL'), (263826, 'AVL_TRAIN'), (263827, 'AVL_EVAL'), (263832, 'AVL_TRAIN'), (268716, 'AVL_EVAL'), (277017, 'AVL_TRAIN'), (277020, 'AVL_EVAL'), (277032, 'AVL_TRAIN'), (278587, 'AVL_EVAL'), (297476, 'AVL_TRAIN'), (298521, 'AVL_EVAL'), (298521, 'AVL_TRAIN'), (304378, 'AVL_EVAL'), (307421, 'AVL_TRAIN'), (308955, 'AVL_EVAL'), (343915, 'AVL_TRAIN'), (343937, 'AVL_EVAL'), (343937, 'AVL_TRAIN'), (347996, 'AVL_EVAL'), (348021, 'AVL_TRAIN'), (348110, 'AVL_EVAL'), (348111, 'AVL_TRAIN'), (348122, 'AVL_EVAL'), (349238, 'AVL_TRAIN'), (349255, 'AVL_EVAL'), (349255, 'AVL_TRAIN'), (351098, 'AVL_EVAL'), (368744, 'AVL_TRAIN'), (371567, 'AVL_EVAL'), (387148, 'AVL_TRAIN'), (388654, 'AVL_EVAL'), (388889, 'AVL_TRAIN'), (396683, 'AVL_EVAL'), (397094, 'AVL_TRAIN'), (397106, 'AVL_EVAL'), (397108, 'AVL_TRAIN'), (397117, 'AVL_EVAL'), (397120, 'AVL_TRAIN'), (397121, 'AVL_EVAL'), (397122, 'AVL_TRAIN'), (397291, 'AVL_EVAL'), (397333, 'AVL_TRAIN'), (397334, 'AVL_EVAL'), (397336, 'AVL_TRAIN'), (399025, 'AVL_EVAL'), (440692, 'AVL_TRAIN'), (440693, 'AVL_EVAL'), (440694, 'AVL_TRAIN'), (441314, 'AVL_EVAL'), (441315, 'AVL_TRAIN'), (441317, 'AVL_EVAL'), (441321, 'AVL_TRAIN'), (441377, 'AVL_EVAL'), (441479, 'AVL_TRAIN'), (444679, 'AVL_EVAL'), (444732, 'AVL_TRAIN'), (444765, 'AVL_EVAL'), (447248, 'AVL_TRAIN'), (447826, 'AVL_EVAL'), (447827, 'AVL_TRAIN'), (447843, 'AVL_EVAL'), (447845, 'AVL_TRAIN'), (449924, 'AVL_EVAL'), (449924, 'AVL_TRAIN'), (449990, 'AVL_EVAL'), (449991, 'AVL_TRAIN'), (450003, 'AVL_EVAL'), (450003, 'AVL_TRAIN'), (450033, 'AVL_EVAL'), (450034, 'AVL_TRAIN'), (450161, 'AVL_EVAL'), (450162, 'AVL_TRAIN'), (450232, 'AVL_EVAL'), (450232, 'AVL_TRAIN'), (450459, 'AVL_EVAL'), (450460, 'AVL_TRAIN'), (451183, 'AVL_EVAL'), (451440, 'AVL_TRAIN'), (451603, 'AVL_EVAL'), (451604, 'AVL_TRAIN'), (452272, 'AVL_EVAL'), (478858, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (13988, 'AVL_TRAIN'), (16565, 'UN_AVL'), (16566, 'AVL_EVAL'), (16566, 'AVL_TRAIN'), (16568, 'AVL_EVAL'), (16573, 'AVL_TRAIN'), (16574, 'AVL_EVAL'), (16575, 'AVL_TRAIN'), (17226, 'AVL_EVAL'), (17479, 'AVL_TRAIN'), (25539, 'AVL_EVAL'), (45036, 'AVL_TRAIN'), (51708, 'AVL_EVAL'), (70814, 'UN_AVL'), (94791, 'AVL_TRAIN'), (96644, 'UN_AVL'), (96797, 'AVL_TRAIN'), (96803, 'UN_AVL'), (96834, 'AVL_TRAIN'), (96836, 'UN_AVL'), (96841, 'AVL_TRAIN'), (96844, 'UN_AVL'), (96846, 'AVL_TRAIN'), (96847, 'UN_AVL'), (97137, 'AVL_TRAIN'), (106362, 'AVL_EVAL'), (131185, 'AVL_TRAIN'), (133446, 'AVL_EVAL'), (133677, 'AVL_TRAIN'), (133680, 'AVL_EVAL'), (133682, 'AVL_TRAIN'), (133705, 'AVL_EVAL'), (133706, 'AVL_TRAIN'), (133708, 'AVL_EVAL'), (133709, 'AVL_TRAIN'), (133712, 'AVL_EVAL'), (133716, 'AVL_TRAIN'), (134781, 'AVL_EVAL'), (141673, 'AVL_TRAIN'), (142359, 'AVL_EVAL'), (162181, 'AVL_TRAIN'), (163739, 'AVL_EVAL'), (178541, 'AVL_TRAIN'), (178547, 'AVL_EVAL'), (179975, 'AVL_TRAIN'), (179976, 'AVL_EVAL'), (184106, 'UN_AVL'), (186814, 'AVL_TRAIN'), (186822, 'UN_AVL'), (187001, 'AVL_TRAIN'), (187005, 'UN_AVL'), (187006, 'AVL_TRAIN'), (187479, 'UN_AVL'), (187480, 'AVL_TRAIN'), (188860, 'UN_AVL'), (189043, 'AVL_TRAIN'), (189044, 'UN_AVL'), (189055, 'AVL_TRAIN'), (189072, 'UN_AVL'), (189640, 'AVL_TRAIN'), (200370, 'AVL_EVAL'), (214023, 'AVL_TRAIN'), (214027, 'AVL_EVAL'), (214575, 'AVL_TRAIN'), (214576, 'AVL_EVAL'), (214576, 'AVL_TRAIN'), (217873, 'AVL_EVAL'), (237119, 'UN_AVL'), (239732, 'AVL_TRAIN'), (241988, 'AVL_EVAL'), (242070, 'AVL_TRAIN'), (242424, 'AVL_EVAL'), (242425, 'AVL_TRAIN'), (244385, 'AVL_EVAL'), (244386, 'AVL_TRAIN'), (244388, 'AVL_EVAL'), (246282, 'AVL_TRAIN'), (246288, 'AVL_EVAL'), (249961, 'UN_AVL'), (263815, 'AVL_TRAIN'), (263816, 'UN_AVL'), (263826, 'AVL_TRAIN'), (263827, 'UN_AVL'), (263832, 'AVL_TRAIN'), (268716, 'AVL_EVAL'), (277017, 'UN_AVL'), (277017, 'AVL_TRAIN'), (277020, 'UN_AVL'), (277032, 'AVL_TRAIN'), (278587, 'AVL_EVAL'), (290802, 'UN_AVL'), (297476, 'AVL_TRAIN'), (298521, 'AVL_EVAL'), (298521, 'AVL_TRAIN'), (304378, 'AVL_EVAL'), (307421, 'AVL_TRAIN'), (308955, 'AVL_EVAL'), (333991, 'UN_AVL'), (343915, 'AVL_TRAIN'), (343937, 'UN_AVL'), (343937, 'AVL_TRAIN'), (347996, 'AVL_EVAL'), (348021, 'AVL_TRAIN'), (348110, 'AVL_EVAL'), (348111, 'AVL_TRAIN'), (348122, 'AVL_EVAL'), (349238, 'AVL_TRAIN'), (349255, 'AVL_EVAL'), (349255, 'AVL_TRAIN'), (351098, 'AVL_EVAL'), (365627, 'UN_AVL'), (368744, 'AVL_TRAIN'), (371567, 'AVL_EVAL'), (387148, 'UN_AVL'), (387148, 'AVL_TRAIN'), (388654, 'AVL_EVAL'), (388889, 'AVL_TRAIN'), (396683, 'AVL_EVAL'), (397094, 'AVL_TRAIN'), (397106, 'AVL_EVAL'), (397108, 'AVL_TRAIN'), (397117, 'AVL_EVAL'), (397120, 'AVL_TRAIN'), (397121, 'AVL_EVAL'), (397122, 'AVL_TRAIN'), (397291, 'AVL_EVAL'), (397333, 'AVL_TRAIN'), (397334, 'AVL_EVAL'), (397336, 'AVL_TRAIN'), (399025, 'AVL_EVAL'), (428555, 'UN_AVL'), (440692, 'AVL_TRAIN'), (440693, 'UN_AVL'), (440694, 'AVL_TRAIN'), (441314, 'UN_AVL'), (441315, 'AVL_TRAIN'), (441317, 'UN_AVL'), (441321, 'AVL_TRAIN'), (441377, 'UN_AVL'), (441479, 'AVL_TRAIN'), (444679, 'UN_AVL'), (444732, 'AVL_TRAIN'), (444765, 'UN_AVL'), (447248, 'AVL_TRAIN'), (447826, 'UN_AVL'), (447827, 'AVL_TRAIN'), (447843, 'UN_AVL'), (447845, 'AVL_TRAIN'), (449924, 'UN_AVL'), (449924, 'AVL_TRAIN'), (449990, 'UN_AVL'), (449991, 'AVL_TRAIN'), (450003, 'UN_AVL'), (450003, 'AVL_TRAIN'), (450033, 'UN_AVL'), (450034, 'AVL_TRAIN'), (450161, 'UN_AVL'), (450162, 'AVL_TRAIN'), (450232, 'UN_AVL'), (450232, 'AVL_TRAIN'), (450459, 'UN_AVL'), (450460, 'AVL_TRAIN'), (451183, 'AVL_EVAL'), (451440, 'UN_AVL'), (451440, 'AVL_TRAIN'), (451603, 'AVL_EVAL'), (451604, 'AVL_TRAIN'), (452272, 'AVL_EVAL'), (460426, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_3.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_3.json index 1a7730974..0b6cbae93 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_3.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_3.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (14439, 'AVL_TRAIN'), (17399, 'AVL_EVAL'), (17483, 'AVL_TRAIN'), (21062, 'AVL_EVAL'), (31364, 'AVL_TRAIN'), (47178, 'AVL_EVAL'), (61443, 'UN_AVL'), (61798, 'AVL_TRAIN'), (64816, 'AVL_EVAL'), (66568, 'AVL_TRAIN'), (70684, 'AVL_EVAL'), (80719, 'UN_AVL'), (80973, 'AVL_TRAIN'), (87020, 'AVL_EVAL'), (105873, 'UN_AVL'), (106148, 'AVL_TRAIN'), (107511, 'AVL_EVAL'), (107639, 'AVL_TRAIN'), (132526, 'AVL_EVAL'), (147091, 'AVL_TRAIN'), (150724, 'AVL_EVAL'), (165013, 'UN_AVL'), (165240, 'AVL_TRAIN'), (169391, 'AVL_EVAL'), (181036, 'UN_AVL'), (184714, 'AVL_TRAIN'), (192079, 'AVL_EVAL'), (208176, 'UN_AVL'), (208176, 'AVL_TRAIN'), (210806, 'UN_AVL'), (211538, 'AVL_TRAIN'), (258787, 'AVL_EVAL'), (264026, 'AVL_TRAIN'), (268127, 'AVL_EVAL'), (280246, 'AVL_TRAIN'), (281514, 'AVL_EVAL'), (289186, 'UN_AVL'), (289186, 'AVL_TRAIN'), (295332, 'AVL_EVAL'), (322605, 'UN_AVL'), (338307, 'AVL_TRAIN'), (342849, 'AVL_EVAL'), (357291, 'AVL_TRAIN'), (359952, 'AVL_EVAL'), (360162, 'AVL_TRAIN'), (362167, 'AVL_EVAL'), (373404, 'UN_AVL'), (373404, 'AVL_TRAIN'), (378438, 'AVL_EVAL'), (411783, 'UN_AVL'), (411783, 'AVL_TRAIN'), (415919, 'AVL_EVAL'), (444735, 'UN_AVL'), (444735, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (13781, 'UN_AVL'), (14439, 'AVL_TRAIN'), (17399, 'UN_AVL'), (17483, 'AVL_TRAIN'), (21062, 'AVL_EVAL'), (29785, 'UN_AVL'), (31364, 'AVL_TRAIN'), (47178, 'AVL_EVAL'), (56015, 'UN_AVL'), (61798, 'AVL_TRAIN'), (64816, 'UN_AVL'), (66568, 'AVL_TRAIN'), (70684, 'AVL_EVAL'), (77885, 'UN_AVL'), (80973, 'AVL_TRAIN'), (87020, 'AVL_EVAL'), (100618, 'UN_AVL'), (106148, 'AVL_TRAIN'), (107511, 'UN_AVL'), (107639, 'AVL_TRAIN'), (132526, 'AVL_EVAL'), (139717, 'UN_AVL'), (147091, 'AVL_TRAIN'), (150724, 'AVL_EVAL'), (156268, 'UN_AVL'), (165240, 'AVL_TRAIN'), (169391, 'UN_AVL'), (184714, 'AVL_TRAIN'), (192079, 'AVL_EVAL'), (197577, 'UN_AVL'), (208176, 'AVL_TRAIN'), (210806, 'UN_AVL'), (211538, 'AVL_TRAIN'), (258787, 'UN_AVL'), (264026, 'AVL_TRAIN'), (268127, 'AVL_EVAL'), (278313, 'UN_AVL'), (280246, 'AVL_TRAIN'), (281514, 'UN_AVL'), (289186, 'AVL_TRAIN'), (295332, 'AVL_EVAL'), (300079, 'UN_AVL'), (338307, 'AVL_TRAIN'), (342849, 'AVL_EVAL'), (352585, 'UN_AVL'), (357291, 'AVL_TRAIN'), (359952, 'AVL_EVAL'), (360162, 'AVL_TRAIN'), (362167, 'AVL_EVAL'), (368376, 'UN_AVL'), (373404, 'AVL_TRAIN'), (378438, 'AVL_EVAL'), (386179, 'UN_AVL'), (411783, 'AVL_TRAIN'), (415919, 'AVL_EVAL'), (440774, 'UN_AVL'), (444735, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_30.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_30.json index 56d8bb5d8..4c391e260 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_30.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_30.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2320, 'AVL_EVAL'), (18879, 'UN_AVL'), (18889, 'AVL_TRAIN'), (26154, 'AVL_EVAL'), (44257, 'AVL_TRAIN'), (45032, 'AVL_EVAL'), (52091, 'UN_AVL'), (55301, 'AVL_TRAIN'), (55584, 'AVL_EVAL'), (56125, 'UN_AVL'), (56398, 'AVL_TRAIN'), (56487, 'UN_AVL'), (57209, 'AVL_TRAIN'), (59873, 'AVL_EVAL'), (64468, 'AVL_TRAIN'), (64606, 'AVL_EVAL'), (68446, 'UN_AVL'), (70240, 'AVL_TRAIN'), (70609, 'UN_AVL'), (73999, 'AVL_TRAIN'), (74585, 'UN_AVL'), (75860, 'AVL_TRAIN'), (76083, 'UN_AVL'), (77370, 'AVL_TRAIN'), (79306, 'AVL_EVAL'), (79713, 'AVL_TRAIN'), (79814, 'AVL_EVAL'), (83091, 'AVL_TRAIN'), (84860, 'AVL_EVAL'), (86669, 'AVL_TRAIN'), (87251, 'AVL_EVAL'), (98297, 'AVL_TRAIN'), (103304, 'AVL_EVAL'), (134581, 'UN_AVL'), (143068, 'AVL_TRAIN'), (143739, 'UN_AVL'), (143900, 'AVL_TRAIN'), (143908, 'UN_AVL'), (145912, 'AVL_TRAIN'), (148037, 'AVL_EVAL'), (148669, 'AVL_TRAIN'), (149498, 'AVL_EVAL'), (157586, 'UN_AVL'), (159992, 'AVL_TRAIN'), (161907, 'AVL_EVAL'), (168177, 'UN_AVL'), (170639, 'AVL_TRAIN'), (170761, 'UN_AVL'), (174058, 'AVL_TRAIN'), (175241, 'UN_AVL'), (175325, 'AVL_TRAIN'), (176149, 'AVL_EVAL'), (176149, 'AVL_TRAIN'), (176159, 'AVL_EVAL'), (176372, 'AVL_TRAIN'), (176659, 'AVL_EVAL'), (176765, 'AVL_TRAIN'), (177312, 'AVL_EVAL'), (177464, 'AVL_TRAIN'), (177980, 'AVL_EVAL'), (178704, 'AVL_TRAIN'), (179015, 'AVL_EVAL'), (183153, 'AVL_TRAIN'), (184046, 'AVL_EVAL'), (194892, 'UN_AVL'), (198048, 'AVL_EVAL'), (198048, 'AVL_TRAIN'), (204352, 'AVL_EVAL'), (264307, 'UN_AVL'), (267078, 'AVL_TRAIN'), (272977, 'AVL_EVAL'), (300535, 'UN_AVL'), (311402, 'AVL_TRAIN'), (312545, 'AVL_EVAL'), (314353, 'UN_AVL'), (314353, 'AVL_TRAIN'), (315995, 'AVL_EVAL'), (329875, 'UN_AVL'), (330152, 'AVL_TRAIN'), (337606, 'AVL_EVAL'), (374176, 'AVL_TRAIN'), (375460, 'AVL_EVAL'), (383427, 'AVL_TRAIN'), (388817, 'AVL_EVAL'), (404989, 'AVL_TRAIN'), (405471, 'AVL_EVAL'), (417883, 'AVL_TRAIN'), (417892, 'AVL_EVAL'), (417935, 'AVL_TRAIN'), (417997, 'AVL_EVAL'), (418172, 'AVL_TRAIN'), (420211, 'AVL_EVAL'), (424286, 'AVL_TRAIN'), (425948, 'AVL_EVAL'), (431247, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (18889, 'AVL_TRAIN'), (26154, 'AVL_EVAL'), (43260, 'UN_AVL'), (44257, 'AVL_TRAIN'), (45032, 'UN_AVL'), (55301, 'AVL_TRAIN'), (55584, 'UN_AVL'), (56398, 'AVL_TRAIN'), (56487, 'UN_AVL'), (57209, 'AVL_TRAIN'), (59873, 'UN_AVL'), (64468, 'AVL_TRAIN'), (64606, 'UN_AVL'), (70240, 'AVL_TRAIN'), (70609, 'UN_AVL'), (73999, 'AVL_TRAIN'), (74585, 'UN_AVL'), (75860, 'AVL_TRAIN'), (76083, 'UN_AVL'), (77370, 'AVL_TRAIN'), (79306, 'UN_AVL'), (79713, 'AVL_TRAIN'), (79814, 'UN_AVL'), (83091, 'AVL_TRAIN'), (84860, 'AVL_EVAL'), (86669, 'AVL_TRAIN'), (87251, 'AVL_EVAL'), (93316, 'UN_AVL'), (98297, 'AVL_TRAIN'), (103304, 'AVL_EVAL'), (126077, 'UN_AVL'), (143068, 'AVL_TRAIN'), (143739, 'UN_AVL'), (143900, 'AVL_TRAIN'), (143908, 'UN_AVL'), (145912, 'AVL_TRAIN'), (148037, 'UN_AVL'), (148669, 'AVL_TRAIN'), (149498, 'UN_AVL'), (159992, 'AVL_TRAIN'), (161907, 'UN_AVL'), (170639, 'AVL_TRAIN'), (170761, 'UN_AVL'), (174058, 'AVL_TRAIN'), (175241, 'UN_AVL'), (175325, 'AVL_TRAIN'), (176149, 'UN_AVL'), (176149, 'AVL_TRAIN'), (176159, 'UN_AVL'), (176372, 'AVL_TRAIN'), (176659, 'UN_AVL'), (176765, 'AVL_TRAIN'), (177312, 'UN_AVL'), (177464, 'AVL_TRAIN'), (177980, 'UN_AVL'), (178704, 'AVL_TRAIN'), (179015, 'UN_AVL'), (183153, 'AVL_TRAIN'), (184046, 'UN_AVL'), (198048, 'AVL_TRAIN'), (204352, 'AVL_EVAL'), (241586, 'UN_AVL'), (267078, 'AVL_TRAIN'), (272977, 'AVL_EVAL'), (278562, 'UN_AVL'), (311402, 'AVL_TRAIN'), (312545, 'UN_AVL'), (314353, 'AVL_TRAIN'), (315995, 'UN_AVL'), (330152, 'AVL_TRAIN'), (337606, 'AVL_EVAL'), (368181, 'UN_AVL'), (374176, 'AVL_TRAIN'), (375460, 'AVL_EVAL'), (378642, 'UN_AVL'), (383427, 'AVL_TRAIN'), (388817, 'AVL_EVAL'), (404989, 'AVL_TRAIN'), (405471, 'AVL_EVAL'), (417883, 'UN_AVL'), (417883, 'AVL_TRAIN'), (417892, 'UN_AVL'), (417935, 'AVL_TRAIN'), (417997, 'UN_AVL'), (418172, 'AVL_TRAIN'), (420211, 'AVL_EVAL'), (424286, 'AVL_TRAIN'), (425948, 'AVL_EVAL'), (431247, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_31.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_31.json index 99dc1f5d3..2148b733f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_31.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_31.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (211, 'AVL_EVAL'), (212, 'AVL_TRAIN'), (310, 'AVL_EVAL'), (311, 'AVL_TRAIN'), (406, 'AVL_EVAL'), (406, 'AVL_TRAIN'), (458, 'AVL_EVAL'), (459, 'AVL_TRAIN'), (3037, 'AVL_EVAL'), (52730, 'AVL_TRAIN'), (53037, 'AVL_EVAL'), (53038, 'AVL_TRAIN'), (53071, 'AVL_EVAL'), (53071, 'AVL_TRAIN'), (53342, 'AVL_EVAL'), (53343, 'AVL_TRAIN'), (53473, 'AVL_EVAL'), (53473, 'AVL_TRAIN'), (53550, 'AVL_EVAL'), (53550, 'AVL_TRAIN'), (53562, 'AVL_EVAL'), (53572, 'AVL_TRAIN'), (53617, 'AVL_EVAL'), (85379, 'UN_AVL'), (96322, 'AVL_EVAL'), (132605, 'AVL_TRAIN'), (132651, 'AVL_EVAL'), (132652, 'AVL_TRAIN'), (138069, 'AVL_EVAL'), (159107, 'UN_AVL'), (162125, 'AVL_TRAIN'), (162261, 'UN_AVL'), (162293, 'AVL_TRAIN'), (162739, 'UN_AVL'), (169954, 'AVL_TRAIN'), (169978, 'UN_AVL'), (169979, 'AVL_TRAIN'), (169985, 'UN_AVL'), (169986, 'AVL_TRAIN'), (170102, 'UN_AVL'), (170102, 'AVL_TRAIN'), (170121, 'UN_AVL'), (170122, 'AVL_TRAIN'), (170160, 'UN_AVL'), (170161, 'AVL_TRAIN'), (170205, 'UN_AVL'), (170206, 'AVL_TRAIN'), (170252, 'UN_AVL'), (170253, 'AVL_TRAIN'), (170259, 'UN_AVL'), (170263, 'AVL_TRAIN'), (170317, 'UN_AVL'), (170318, 'AVL_TRAIN'), (170362, 'UN_AVL'), (170362, 'AVL_TRAIN'), (170399, 'UN_AVL'), (170400, 'AVL_TRAIN'), (170410, 'UN_AVL'), (170417, 'AVL_TRAIN'), (183755, 'UN_AVL'), (255858, 'AVL_TRAIN'), (256612, 'AVL_EVAL'), (256612, 'AVL_TRAIN'), (256855, 'AVL_EVAL'), (256856, 'AVL_TRAIN'), (256904, 'AVL_EVAL'), (256905, 'AVL_TRAIN'), (260803, 'AVL_EVAL'), (286732, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (211, 'AVL_EVAL'), (212, 'AVL_TRAIN'), (310, 'AVL_EVAL'), (311, 'AVL_TRAIN'), (406, 'AVL_EVAL'), (406, 'AVL_TRAIN'), (458, 'AVL_EVAL'), (459, 'AVL_TRAIN'), (3037, 'AVL_EVAL'), (18535, 'UN_AVL'), (52730, 'AVL_TRAIN'), (53037, 'UN_AVL'), (53038, 'AVL_TRAIN'), (53071, 'UN_AVL'), (53071, 'AVL_TRAIN'), (53342, 'UN_AVL'), (53343, 'AVL_TRAIN'), (53473, 'UN_AVL'), (53473, 'AVL_TRAIN'), (53550, 'UN_AVL'), (53550, 'AVL_TRAIN'), (53562, 'UN_AVL'), (53572, 'AVL_TRAIN'), (53617, 'UN_AVL'), (61848, 'AVL_EVAL'), (82951, 'UN_AVL'), (96322, 'AVL_EVAL'), (98686, 'UN_AVL'), (132605, 'AVL_TRAIN'), (132651, 'UN_AVL'), (132652, 'AVL_TRAIN'), (138069, 'AVL_EVAL'), (150938, 'UN_AVL'), (162125, 'AVL_TRAIN'), (162261, 'UN_AVL'), (162293, 'AVL_TRAIN'), (162739, 'UN_AVL'), (169954, 'AVL_TRAIN'), (169978, 'UN_AVL'), (169979, 'AVL_TRAIN'), (169985, 'UN_AVL'), (169986, 'AVL_TRAIN'), (170102, 'UN_AVL'), (170102, 'AVL_TRAIN'), (170121, 'UN_AVL'), (170122, 'AVL_TRAIN'), (170160, 'UN_AVL'), (170161, 'AVL_TRAIN'), (170205, 'UN_AVL'), (170206, 'AVL_TRAIN'), (170252, 'UN_AVL'), (170253, 'AVL_TRAIN'), (170259, 'UN_AVL'), (170263, 'AVL_TRAIN'), (170317, 'UN_AVL'), (170318, 'AVL_TRAIN'), (170362, 'UN_AVL'), (170362, 'AVL_TRAIN'), (170399, 'UN_AVL'), (170400, 'AVL_TRAIN'), (170410, 'UN_AVL'), (170417, 'AVL_TRAIN'), (183755, 'UN_AVL'), (255858, 'AVL_TRAIN'), (256612, 'UN_AVL'), (256612, 'AVL_TRAIN'), (256855, 'UN_AVL'), (256856, 'AVL_TRAIN'), (256904, 'UN_AVL'), (256905, 'AVL_TRAIN'), (260803, 'AVL_EVAL'), (286732, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_32.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_32.json index 3b0e7fa12..9a972291a 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_32.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_32.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "6.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (3583, 'UN_AVL'), (6643, 'AVL_TRAIN'), (7950, 'UN_AVL'), (7951, 'AVL_TRAIN'), (8791, 'UN_AVL'), (8792, 'AVL_TRAIN'), (15006, 'UN_AVL'), (59961, 'AVL_TRAIN'), (70959, 'UN_AVL'), (74209, 'AVL_TRAIN'), (78627, 'UN_AVL'), (90759, 'AVL_TRAIN'), (104413, 'AVL_EVAL'), (104414, 'AVL_TRAIN'), (104512, 'AVL_EVAL'), (104513, 'AVL_TRAIN'), (104590, 'AVL_EVAL'), (104590, 'AVL_TRAIN'), (104680, 'AVL_EVAL'), (104682, 'AVL_TRAIN'), (104748, 'AVL_EVAL'), (104749, 'AVL_TRAIN'), (104841, 'AVL_EVAL'), (104841, 'AVL_TRAIN'), (105170, 'AVL_EVAL'), (105171, 'AVL_TRAIN'), (105281, 'AVL_EVAL'), (105282, 'AVL_TRAIN'), (105287, 'AVL_EVAL'), (105287, 'AVL_TRAIN'), (105290, 'AVL_EVAL'), (105291, 'AVL_TRAIN'), (105554, 'AVL_EVAL'), (105558, 'AVL_TRAIN'), (106528, 'AVL_EVAL'), (106530, 'AVL_TRAIN'), (107035, 'AVL_EVAL'), (107050, 'AVL_TRAIN'), (107158, 'AVL_EVAL'), (108124, 'UN_AVL'), (177567, 'AVL_TRAIN'), (178003, 'UN_AVL'), (183665, 'AVL_TRAIN'), (185163, 'UN_AVL'), (185163, 'AVL_TRAIN'), (187242, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (3583, 'UN_AVL'), (6643, 'AVL_TRAIN'), (7950, 'UN_AVL'), (7951, 'AVL_TRAIN'), (8791, 'UN_AVL'), (8792, 'AVL_TRAIN'), (15006, 'UN_AVL'), (59961, 'AVL_TRAIN'), (70959, 'UN_AVL'), (74209, 'AVL_TRAIN'), (78627, 'UN_AVL'), (90759, 'AVL_TRAIN'), (104413, 'UN_AVL'), (104414, 'AVL_TRAIN'), (104512, 'UN_AVL'), (104513, 'AVL_TRAIN'), (104590, 'UN_AVL'), (104590, 'AVL_TRAIN'), (104680, 'UN_AVL'), (104682, 'AVL_TRAIN'), (104748, 'UN_AVL'), (104749, 'AVL_TRAIN'), (104841, 'UN_AVL'), (104841, 'AVL_TRAIN'), (105170, 'UN_AVL'), (105171, 'AVL_TRAIN'), (105281, 'UN_AVL'), (105282, 'AVL_TRAIN'), (105287, 'UN_AVL'), (105287, 'AVL_TRAIN'), (105290, 'UN_AVL'), (105291, 'AVL_TRAIN'), (105554, 'UN_AVL'), (105558, 'AVL_TRAIN'), (106528, 'UN_AVL'), (106530, 'AVL_TRAIN'), (107035, 'UN_AVL'), (107050, 'AVL_TRAIN'), (107158, 'UN_AVL'), (177567, 'AVL_TRAIN'), (178003, 'UN_AVL'), (183665, 'AVL_TRAIN'), (185163, 'UN_AVL'), (185163, 'AVL_TRAIN'), (187242, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_33.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_33.json index c1f078f3a..6ae772710 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_33.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_33.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (14795, 'AVL_TRAIN'), (42901, 'AVL_EVAL'), (67749, 'AVL_TRAIN'), (69391, 'AVL_EVAL'), (69409, 'AVL_TRAIN'), (85971, 'AVL_EVAL'), (102045, 'UN_AVL'), (132769, 'AVL_TRAIN'), (136047, 'AVL_EVAL'), (137979, 'AVL_TRAIN'), (152741, 'AVL_EVAL'), (236326, 'UN_AVL'), (259294, 'AVL_TRAIN'), (263118, 'UN_AVL'), (263147, 'AVL_TRAIN'), (264034, 'UN_AVL'), (268486, 'AVL_TRAIN'), (273571, 'UN_AVL'), (317634, 'AVL_EVAL'), (342373, 'UN_AVL'), (390709, 'AVL_TRAIN'), (397097, 'AVL_EVAL'), (430805, 'UN_AVL'), (439164, 'AVL_TRAIN'), (439555, 'UN_AVL'), (439556, 'AVL_TRAIN'), (439558, 'UN_AVL'), (439560, 'AVL_TRAIN'), (439561, 'UN_AVL'), (439564, 'AVL_TRAIN'), (439666, 'UN_AVL'), (439783, 'AVL_TRAIN'), (439800, 'UN_AVL'), (439806, 'AVL_TRAIN'), (447073, 'AVL_EVAL'), (447075, 'AVL_TRAIN'), (447611, 'AVL_EVAL'), (474997, 'UN_AVL'), (477430, 'AVL_TRAIN'), (514597, 'AVL_EVAL'), (525658, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (7694, 'UN_AVL'), (14795, 'AVL_TRAIN'), (42901, 'AVL_EVAL'), (67749, 'AVL_TRAIN'), (69391, 'AVL_EVAL'), (69409, 'AVL_TRAIN'), (85971, 'AVL_EVAL'), (97803, 'UN_AVL'), (132769, 'AVL_TRAIN'), (136047, 'AVL_EVAL'), (137979, 'AVL_TRAIN'), (152741, 'AVL_EVAL'), (232787, 'UN_AVL'), (259294, 'AVL_TRAIN'), (263118, 'UN_AVL'), (263147, 'AVL_TRAIN'), (264034, 'UN_AVL'), (268486, 'AVL_TRAIN'), (273571, 'UN_AVL'), (317634, 'AVL_EVAL'), (336712, 'UN_AVL'), (390709, 'AVL_TRAIN'), (397097, 'AVL_EVAL'), (429156, 'UN_AVL'), (439164, 'AVL_TRAIN'), (439555, 'UN_AVL'), (439556, 'AVL_TRAIN'), (439558, 'UN_AVL'), (439560, 'AVL_TRAIN'), (439561, 'UN_AVL'), (439564, 'AVL_TRAIN'), (439666, 'UN_AVL'), (439783, 'AVL_TRAIN'), (439800, 'UN_AVL'), (439806, 'AVL_TRAIN'), (447073, 'UN_AVL'), (447075, 'AVL_TRAIN'), (447611, 'UN_AVL'), (477430, 'AVL_TRAIN'), (514597, 'AVL_EVAL'), (525658, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_34.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_34.json index daa2afe49..cc712a612 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_34.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_34.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (70271, 'AVL_TRAIN'), (70272, 'UN_AVL'), (70272, 'AVL_TRAIN'), (104036, 'UN_AVL'), (134118, 'AVL_EVAL'), (135705, 'UN_AVL'), (140517, 'AVL_TRAIN'), (141025, 'UN_AVL'), (141026, 'AVL_TRAIN'), (147453, 'UN_AVL'), (147454, 'AVL_TRAIN'), (147481, 'UN_AVL'), (147489, 'AVL_TRAIN'), (150114, 'UN_AVL'), (150783, 'AVL_TRAIN'), (151246, 'UN_AVL'), (151247, 'AVL_TRAIN'), (184459, 'UN_AVL'), (189081, 'AVL_TRAIN'), (202754, 'AVL_EVAL'), (202755, 'AVL_TRAIN'), (220036, 'AVL_EVAL'), (220036, 'AVL_TRAIN'), (223485, 'AVL_EVAL'), (238504, 'UN_AVL'), (259208, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (70271, 'AVL_TRAIN'), (70272, 'UN_AVL'), (70272, 'AVL_TRAIN'), (104036, 'UN_AVL'), (140517, 'AVL_TRAIN'), (141025, 'UN_AVL'), (141026, 'AVL_TRAIN'), (147453, 'UN_AVL'), (147454, 'AVL_TRAIN'), (147481, 'UN_AVL'), (147489, 'AVL_TRAIN'), (150114, 'UN_AVL'), (150783, 'AVL_TRAIN'), (151246, 'UN_AVL'), (151247, 'AVL_TRAIN'), (184459, 'UN_AVL'), (189081, 'AVL_TRAIN'), (202754, 'AVL_EVAL'), (202755, 'AVL_TRAIN'), (220036, 'AVL_EVAL'), (220036, 'AVL_TRAIN'), (223485, 'AVL_EVAL'), (235006, 'UN_AVL'), (259208, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_35.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_35.json index b772b4ca4..d01db1239 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_35.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_35.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (157092, 'AVL_TRAIN'), (157151, 'UN_AVL'), (159484, 'AVL_TRAIN'), (159489, 'UN_AVL'), (159494, 'AVL_TRAIN'), (162600, 'UN_AVL'), (167881, 'AVL_TRAIN'), (173569, 'AVL_EVAL'), (177173, 'UN_AVL'), (184282, 'AVL_TRAIN'), (188238, 'AVL_EVAL'), (189341, 'UN_AVL'), (219952, 'AVL_TRAIN'), (226242, 'AVL_EVAL'), (231559, 'UN_AVL'), (231559, 'AVL_TRAIN'), (237812, 'AVL_EVAL'), (271309, 'UN_AVL'), (325048, 'AVL_EVAL'), (326462, 'AVL_TRAIN'), (329766, 'AVL_EVAL'), (339363, 'UN_AVL'), (342100, 'AVL_TRAIN'), (344351, 'UN_AVL'), (348556, 'AVL_TRAIN'), (350795, 'UN_AVL'), (353753, 'AVL_TRAIN'), (360328, 'AVL_EVAL'), (362542, 'AVL_TRAIN'), (363167, 'AVL_EVAL'), (370474, 'UN_AVL'), (370484, 'AVL_TRAIN'), (374635, 'AVL_EVAL'), (377108, 'UN_AVL'), (405033, 'AVL_TRAIN'), (409621, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (157092, 'AVL_TRAIN'), (157151, 'UN_AVL'), (159484, 'AVL_TRAIN'), (159489, 'UN_AVL'), (159494, 'AVL_TRAIN'), (162600, 'UN_AVL'), (167881, 'AVL_TRAIN'), (173569, 'UN_AVL'), (184282, 'AVL_TRAIN'), (188238, 'UN_AVL'), (219952, 'AVL_TRAIN'), (226242, 'UN_AVL'), (231559, 'AVL_TRAIN'), (237812, 'AVL_EVAL'), (252499, 'UN_AVL'), (325048, 'AVL_EVAL'), (326462, 'AVL_TRAIN'), (329766, 'AVL_EVAL'), (339363, 'UN_AVL'), (342100, 'AVL_TRAIN'), (344351, 'UN_AVL'), (348556, 'AVL_TRAIN'), (350795, 'UN_AVL'), (353753, 'AVL_TRAIN'), (360328, 'UN_AVL'), (362542, 'AVL_TRAIN'), (363167, 'UN_AVL'), (370484, 'AVL_TRAIN'), (374635, 'UN_AVL'), (405033, 'AVL_TRAIN'), (409621, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_36.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_36.json index 2449a60ca..730369d33 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_36.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_36.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (181690, 'AVL_EVAL'), (189338, 'UN_AVL'), (220622, 'AVL_TRAIN'), (220655, 'UN_AVL'), (220656, 'AVL_TRAIN'), (227231, 'AVL_EVAL'), (227241, 'AVL_TRAIN'), (247979, 'AVL_EVAL'), (249947, 'AVL_TRAIN'), (253244, 'AVL_EVAL'), (259489, 'UN_AVL'), (266932, 'AVL_TRAIN'), (278193, 'AVL_EVAL'), (310219, 'UN_AVL'), (339906, 'AVL_EVAL'), (347911, 'UN_AVL'), (362595, 'AVL_TRAIN'), (390071, 'AVL_EVAL'), (430436, 'UN_AVL'), (435756, 'AVL_TRAIN'), (437100, 'UN_AVL'), (437110, 'AVL_TRAIN'), (440847, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (181690, 'AVL_EVAL'), (182347, 'UN_AVL'), (220622, 'AVL_TRAIN'), (220655, 'UN_AVL'), (220656, 'AVL_TRAIN'), (227231, 'UN_AVL'), (227241, 'AVL_TRAIN'), (247979, 'AVL_EVAL'), (249947, 'AVL_TRAIN'), (253244, 'AVL_EVAL'), (256639, 'UN_AVL'), (266932, 'AVL_TRAIN'), (278193, 'AVL_EVAL'), (306406, 'UN_AVL'), (362595, 'AVL_TRAIN'), (390071, 'AVL_EVAL'), (419325, 'UN_AVL'), (435756, 'AVL_TRAIN'), (437100, 'UN_AVL'), (437110, 'AVL_TRAIN'), (440847, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_37.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_37.json index adc5e17a2..867c67d9f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_37.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_37.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (3049, 'AVL_EVAL'), (38657, 'AVL_TRAIN'), (42930, 'AVL_EVAL'), (83335, 'AVL_TRAIN'), (89295, 'AVL_EVAL'), (132341, 'UN_AVL'), (140030, 'AVL_TRAIN'), (143759, 'UN_AVL'), (163839, 'AVL_TRAIN'), (171099, 'AVL_EVAL'), (172116, 'AVL_TRAIN'), (182003, 'AVL_EVAL'), (205065, 'UN_AVL'), (210342, 'AVL_TRAIN'), (255530, 'UN_AVL'), (255543, 'AVL_TRAIN'), (259029, 'UN_AVL'), (259047, 'AVL_TRAIN'), (267033, 'AVL_EVAL'), (303621, 'AVL_TRAIN'), (309577, 'AVL_EVAL'), (314937, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (3049, 'AVL_EVAL'), (35489, 'UN_AVL'), (38657, 'AVL_TRAIN'), (42930, 'AVL_EVAL'), (55765, 'UN_AVL'), (83335, 'AVL_TRAIN'), (89295, 'AVL_EVAL'), (97171, 'UN_AVL'), (140030, 'AVL_TRAIN'), (143759, 'UN_AVL'), (163839, 'AVL_TRAIN'), (171099, 'UN_AVL'), (172116, 'AVL_TRAIN'), (182003, 'AVL_EVAL'), (188445, 'UN_AVL'), (210342, 'AVL_TRAIN'), (255530, 'UN_AVL'), (255543, 'AVL_TRAIN'), (259029, 'UN_AVL'), (259047, 'AVL_TRAIN'), (267033, 'AVL_EVAL'), (270763, 'UN_AVL'), (303621, 'AVL_TRAIN'), (309577, 'AVL_EVAL'), (313905, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_38.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_38.json index c7ffa3571..df9e1aba0 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_38.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_38.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (11047, 'AVL_TRAIN'), (11642, 'UN_AVL'), (11747, 'AVL_TRAIN'), (13275, 'UN_AVL'), (25409, 'AVL_EVAL'), (47845, 'UN_AVL'), (71020, 'AVL_TRAIN'), (73744, 'UN_AVL'), (73767, 'AVL_TRAIN'), (74873, 'UN_AVL'), (79371, 'AVL_TRAIN'), (80765, 'UN_AVL'), (83531, 'AVL_TRAIN'), (84833, 'UN_AVL'), (94217, 'AVL_TRAIN'), (97395, 'AVL_EVAL'), (98723, 'UN_AVL'), (112834, 'AVL_TRAIN'), (112871, 'UN_AVL'), (112908, 'AVL_TRAIN'), (112965, 'UN_AVL'), (113073, 'AVL_TRAIN'), (115426, 'UN_AVL'), (117593, 'AVL_TRAIN'), (118930, 'UN_AVL'), (118950, 'AVL_TRAIN'), (137864, 'AVL_EVAL'), (159662, 'UN_AVL'), (167171, 'AVL_TRAIN'), (171052, 'AVL_EVAL'), (171058, 'AVL_TRAIN'), (171486, 'AVL_EVAL'), (183398, 'UN_AVL'), (190666, 'AVL_TRAIN'), (194339, 'AVL_EVAL'), (201702, 'UN_AVL'), (235372, 'AVL_TRAIN'), (237550, 'UN_AVL'), (237559, 'AVL_TRAIN'), (237765, 'UN_AVL'), (247173, 'AVL_TRAIN'), (247543, 'UN_AVL'), (248832, 'AVL_TRAIN'), (249602, 'UN_AVL'), (252388, 'AVL_TRAIN'), (253079, 'UN_AVL'), (257462, 'AVL_TRAIN'), (263566, 'AVL_EVAL'), (287685, 'UN_AVL'), (325656, 'AVL_TRAIN'), (327966, 'UN_AVL'), (328085, 'AVL_TRAIN'), (328102, 'UN_AVL'), (328115, 'AVL_TRAIN'), (328121, 'UN_AVL'), (340434, 'AVL_TRAIN'), (341294, 'UN_AVL'), (342887, 'AVL_TRAIN'), (343770, 'UN_AVL'), (356565, 'AVL_TRAIN'), (356582, 'UN_AVL'), (360239, 'AVL_TRAIN'), (362988, 'AVL_EVAL'), (368591, 'UN_AVL'), (413195, 'AVL_TRAIN'), (422190, 'AVL_EVAL'), (453990, 'UN_AVL'), (453990, 'AVL_TRAIN'), (460189, 'AVL_EVAL'), (488326, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (11047, 'AVL_TRAIN'), (11642, 'UN_AVL'), (11747, 'AVL_TRAIN'), (13275, 'UN_AVL'), (71020, 'AVL_TRAIN'), (73744, 'UN_AVL'), (73767, 'AVL_TRAIN'), (74873, 'UN_AVL'), (79371, 'AVL_TRAIN'), (80765, 'UN_AVL'), (83531, 'AVL_TRAIN'), (84833, 'UN_AVL'), (94217, 'AVL_TRAIN'), (97395, 'UN_AVL'), (112834, 'AVL_TRAIN'), (112871, 'UN_AVL'), (112908, 'AVL_TRAIN'), (112965, 'UN_AVL'), (113073, 'AVL_TRAIN'), (115426, 'UN_AVL'), (117593, 'AVL_TRAIN'), (118930, 'UN_AVL'), (118950, 'AVL_TRAIN'), (137864, 'AVL_EVAL'), (155633, 'UN_AVL'), (167171, 'AVL_TRAIN'), (171052, 'UN_AVL'), (171058, 'AVL_TRAIN'), (171486, 'UN_AVL'), (190666, 'AVL_TRAIN'), (194339, 'AVL_EVAL'), (198294, 'UN_AVL'), (235372, 'AVL_TRAIN'), (237550, 'UN_AVL'), (237559, 'AVL_TRAIN'), (237765, 'UN_AVL'), (247173, 'AVL_TRAIN'), (247543, 'UN_AVL'), (248832, 'AVL_TRAIN'), (249602, 'UN_AVL'), (252388, 'AVL_TRAIN'), (253079, 'UN_AVL'), (257462, 'AVL_TRAIN'), (263566, 'AVL_EVAL'), (284803, 'UN_AVL'), (325656, 'AVL_TRAIN'), (327966, 'UN_AVL'), (328085, 'AVL_TRAIN'), (328102, 'UN_AVL'), (328115, 'AVL_TRAIN'), (328121, 'UN_AVL'), (340434, 'AVL_TRAIN'), (341294, 'UN_AVL'), (342887, 'AVL_TRAIN'), (343770, 'UN_AVL'), (356565, 'AVL_TRAIN'), (356582, 'UN_AVL'), (360239, 'AVL_TRAIN'), (362988, 'UN_AVL'), (413195, 'AVL_TRAIN'), (422190, 'AVL_EVAL'), (445117, 'UN_AVL'), (453990, 'AVL_TRAIN'), (460189, 'AVL_EVAL'), (488326, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_39.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_39.json index 8d230e598..a1edc470f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_39.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_39.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (73547, 'AVL_EVAL'), (76574, 'UN_AVL'), (76633, 'AVL_TRAIN'), (76808, 'UN_AVL'), (76809, 'AVL_TRAIN'), (76850, 'UN_AVL'), (76850, 'AVL_TRAIN'), (77081, 'UN_AVL'), (77082, 'AVL_TRAIN'), (77084, 'UN_AVL'), (77088, 'AVL_TRAIN'), (77104, 'UN_AVL'), (77110, 'AVL_TRAIN'), (77127, 'UN_AVL'), (77127, 'AVL_TRAIN'), (77173, 'UN_AVL'), (77190, 'AVL_TRAIN'), (77191, 'UN_AVL'), (77193, 'AVL_TRAIN'), (77200, 'UN_AVL'), (77204, 'AVL_TRAIN'), (77206, 'UN_AVL'), (77210, 'AVL_TRAIN'), (77213, 'UN_AVL'), (77213, 'AVL_TRAIN'), (77214, 'UN_AVL'), (77215, 'AVL_TRAIN'), (77231, 'UN_AVL'), (77240, 'AVL_TRAIN'), (77241, 'UN_AVL'), (77254, 'AVL_TRAIN'), (77300, 'UN_AVL'), (77334, 'AVL_TRAIN'), (77335, 'UN_AVL'), (77340, 'AVL_TRAIN'), (77345, 'UN_AVL'), (77348, 'AVL_TRAIN'), (77350, 'UN_AVL'), (77352, 'AVL_TRAIN'), (77436, 'UN_AVL'), (77437, 'AVL_TRAIN'), (77500, 'UN_AVL'), (77502, 'AVL_TRAIN'), (77535, 'UN_AVL'), (77535, 'AVL_TRAIN'), (77536, 'UN_AVL'), (77538, 'AVL_TRAIN'), (77655, 'UN_AVL'), (77656, 'AVL_TRAIN'), (77659, 'UN_AVL'), (77660, 'AVL_TRAIN'), (77666, 'UN_AVL'), (77667, 'AVL_TRAIN'), (77784, 'UN_AVL'), (77785, 'AVL_TRAIN'), (77793, 'UN_AVL'), (77793, 'AVL_TRAIN'), (77917, 'UN_AVL'), (77917, 'AVL_TRAIN'), (77934, 'UN_AVL'), (77934, 'AVL_TRAIN'), (77960, 'UN_AVL'), (77961, 'AVL_TRAIN'), (77962, 'UN_AVL'), (77964, 'AVL_TRAIN'), (77971, 'UN_AVL'), (77972, 'AVL_TRAIN'), (77974, 'UN_AVL'), (77974, 'AVL_TRAIN'), (77977, 'UN_AVL'), (77977, 'AVL_TRAIN'), (77991, 'UN_AVL'), (77992, 'AVL_TRAIN'), (78002, 'UN_AVL'), (78003, 'AVL_TRAIN'), (78004, 'UN_AVL'), (78006, 'AVL_TRAIN'), (78008, 'UN_AVL'), (78008, 'AVL_TRAIN'), (78043, 'UN_AVL'), (78044, 'AVL_TRAIN'), (78108, 'UN_AVL'), (78108, 'AVL_TRAIN'), (78112, 'UN_AVL'), (78112, 'AVL_TRAIN'), (78175, 'UN_AVL'), (78175, 'AVL_TRAIN'), (79611, 'UN_AVL'), (83140, 'AVL_TRAIN'), (83220, 'UN_AVL'), (83220, 'AVL_TRAIN'), (83475, 'UN_AVL'), (83476, 'AVL_TRAIN'), (83853, 'UN_AVL'), (83855, 'AVL_TRAIN'), (83876, 'UN_AVL'), (83876, 'AVL_TRAIN'), (83885, 'UN_AVL'), (83885, 'AVL_TRAIN'), (83902, 'UN_AVL'), (83904, 'AVL_TRAIN'), (83913, 'UN_AVL'), (83913, 'AVL_TRAIN'), (94598, 'AVL_EVAL'), (94598, 'AVL_TRAIN'), (94617, 'AVL_EVAL'), (97436, 'AVL_TRAIN'), (98168, 'AVL_EVAL'), (98755, 'UN_AVL'), (102213, 'AVL_TRAIN'), (102362, 'UN_AVL'), (102362, 'AVL_TRAIN'), (102374, 'UN_AVL'), (102379, 'AVL_TRAIN'), (102660, 'UN_AVL'), (102670, 'AVL_TRAIN'), (102674, 'UN_AVL'), (102674, 'AVL_TRAIN'), (102978, 'UN_AVL'), (102978, 'AVL_TRAIN'), (102980, 'UN_AVL'), (102981, 'AVL_TRAIN'), (102982, 'UN_AVL'), (102982, 'AVL_TRAIN'), (102985, 'UN_AVL'), (102985, 'AVL_TRAIN'), (103021, 'UN_AVL'), (103021, 'AVL_TRAIN'), (103651, 'UN_AVL'), (103651, 'AVL_TRAIN'), (103811, 'UN_AVL'), (103811, 'AVL_TRAIN'), (103842, 'UN_AVL'), (105769, 'AVL_TRAIN'), (105867, 'UN_AVL'), (105868, 'AVL_TRAIN'), (109167, 'UN_AVL'), (109167, 'AVL_TRAIN'), (109179, 'UN_AVL'), (109179, 'AVL_TRAIN'), (109190, 'UN_AVL'), (110853, 'AVL_TRAIN'), (111020, 'UN_AVL'), (111024, 'AVL_TRAIN'), (111746, 'UN_AVL'), (111773, 'AVL_TRAIN'), (131766, 'AVL_EVAL'), (161481, 'UN_AVL'), (190250, 'AVL_EVAL'), (190250, 'AVL_TRAIN'), (190322, 'AVL_EVAL'), (190322, 'AVL_TRAIN'), (190323, 'AVL_EVAL'), (190323, 'AVL_TRAIN'), (192697, 'AVL_EVAL'), (225117, 'UN_AVL'), (236840, 'AVL_EVAL'), (243126, 'UN_AVL'), (247103, 'AVL_TRAIN'), (248048, 'UN_AVL'), (248048, 'AVL_TRAIN'), (248068, 'UN_AVL'), (258565, 'AVL_TRAIN'), (264979, 'UN_AVL'), (266318, 'AVL_TRAIN'), (267410, 'UN_AVL'), (269641, 'AVL_TRAIN'), (270373, 'UN_AVL'), (275795, 'AVL_TRAIN'), (282218, 'AVL_EVAL'), (298476, 'UN_AVL'), (318647, 'AVL_TRAIN'), (318665, 'UN_AVL'), (318666, 'AVL_TRAIN'), (318667, 'UN_AVL'), (318668, 'AVL_TRAIN'), (320765, 'UN_AVL'), (320765, 'AVL_TRAIN'), (320766, 'UN_AVL'), (320766, 'AVL_TRAIN'), (320776, 'UN_AVL'), (320778, 'AVL_TRAIN'), (320851, 'UN_AVL'), (320851, 'AVL_TRAIN'), (320870, 'UN_AVL'), (320871, 'AVL_TRAIN'), (320872, 'UN_AVL'), (320875, 'AVL_TRAIN'), (320876, 'UN_AVL'), (320877, 'AVL_TRAIN'), (320880, 'UN_AVL'), (320883, 'AVL_TRAIN'), (320884, 'UN_AVL'), (320884, 'AVL_TRAIN'), (320922, 'UN_AVL'), (320922, 'AVL_TRAIN'), (322293, 'UN_AVL'), (322295, 'AVL_TRAIN'), (322296, 'UN_AVL'), (322296, 'AVL_TRAIN'), (322297, 'UN_AVL'), (322297, 'AVL_TRAIN'), (322298, 'UN_AVL'), (322449, 'AVL_TRAIN'), (322450, 'UN_AVL'), (322467, 'AVL_TRAIN'), (322470, 'UN_AVL'), (322471, 'AVL_TRAIN'), (323215, 'UN_AVL'), (326785, 'AVL_TRAIN'), (340656, 'AVL_EVAL'), (360524, 'UN_AVL'), (360524, 'AVL_TRAIN'), (364796, 'UN_AVL'), (364802, 'AVL_TRAIN'), (364964, 'UN_AVL'), (364990, 'AVL_TRAIN'), (365065, 'UN_AVL'), (365065, 'AVL_TRAIN'), (365068, 'UN_AVL'), (365069, 'AVL_TRAIN'), (365071, 'UN_AVL'), (365072, 'AVL_TRAIN'), (365077, 'UN_AVL'), (365077, 'AVL_TRAIN'), (366909, 'UN_AVL'), (366910, 'AVL_TRAIN'), (367088, 'UN_AVL'), (367088, 'AVL_TRAIN'), (367089, 'UN_AVL'), (367089, 'AVL_TRAIN'), (367318, 'UN_AVL'), (367320, 'AVL_TRAIN'), (367327, 'UN_AVL'), (367328, 'AVL_TRAIN'), (367329, 'UN_AVL'), (367329, 'AVL_TRAIN'), (367331, 'UN_AVL'), (367332, 'AVL_TRAIN'), (367546, 'UN_AVL'), (367547, 'AVL_TRAIN'), (367635, 'UN_AVL'), (367636, 'AVL_TRAIN'), (367937, 'UN_AVL'), (367937, 'AVL_TRAIN'), (368230, 'UN_AVL'), (368231, 'AVL_TRAIN'), (368269, 'UN_AVL'), (368269, 'AVL_TRAIN'), (368270, 'UN_AVL'), (368270, 'AVL_TRAIN'), (368271, 'UN_AVL'), (368271, 'AVL_TRAIN'), (368272, 'UN_AVL'), (368272, 'AVL_TRAIN'), (368490, 'UN_AVL'), (368490, 'AVL_TRAIN'), (368491, 'UN_AVL'), (368491, 'AVL_TRAIN'), (368492, 'UN_AVL'), (368492, 'AVL_TRAIN'), (368673, 'UN_AVL'), (368673, 'AVL_TRAIN'), (371991, 'AVL_EVAL'), (371991, 'AVL_TRAIN'), (372152, 'AVL_EVAL'), (372152, 'AVL_TRAIN'), (379921, 'AVL_EVAL'), (379921, 'AVL_TRAIN'), (380554, 'AVL_EVAL'), (380554, 'AVL_TRAIN'), (380555, 'AVL_EVAL'), (416743, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (76633, 'AVL_TRAIN'), (76808, 'UN_AVL'), (76809, 'AVL_TRAIN'), (76850, 'UN_AVL'), (76850, 'AVL_TRAIN'), (77081, 'UN_AVL'), (77082, 'AVL_TRAIN'), (77084, 'UN_AVL'), (77088, 'AVL_TRAIN'), (77104, 'UN_AVL'), (77110, 'AVL_TRAIN'), (77127, 'UN_AVL'), (77127, 'AVL_TRAIN'), (77173, 'UN_AVL'), (77190, 'AVL_TRAIN'), (77191, 'UN_AVL'), (77193, 'AVL_TRAIN'), (77200, 'UN_AVL'), (77204, 'AVL_TRAIN'), (77206, 'UN_AVL'), (77210, 'AVL_TRAIN'), (77213, 'UN_AVL'), (77213, 'AVL_TRAIN'), (77214, 'UN_AVL'), (77215, 'AVL_TRAIN'), (77231, 'UN_AVL'), (77240, 'AVL_TRAIN'), (77241, 'UN_AVL'), (77254, 'AVL_TRAIN'), (77300, 'UN_AVL'), (77334, 'AVL_TRAIN'), (77335, 'UN_AVL'), (77340, 'AVL_TRAIN'), (77345, 'UN_AVL'), (77348, 'AVL_TRAIN'), (77350, 'UN_AVL'), (77352, 'AVL_TRAIN'), (77436, 'UN_AVL'), (77437, 'AVL_TRAIN'), (77500, 'UN_AVL'), (77502, 'AVL_TRAIN'), (77535, 'UN_AVL'), (77535, 'AVL_TRAIN'), (77536, 'UN_AVL'), (77538, 'AVL_TRAIN'), (77655, 'UN_AVL'), (77656, 'AVL_TRAIN'), (77659, 'UN_AVL'), (77660, 'AVL_TRAIN'), (77666, 'UN_AVL'), (77667, 'AVL_TRAIN'), (77784, 'UN_AVL'), (77785, 'AVL_TRAIN'), (77793, 'UN_AVL'), (77793, 'AVL_TRAIN'), (77917, 'UN_AVL'), (77917, 'AVL_TRAIN'), (77934, 'UN_AVL'), (77934, 'AVL_TRAIN'), (77960, 'UN_AVL'), (77961, 'AVL_TRAIN'), (77962, 'UN_AVL'), (77964, 'AVL_TRAIN'), (77971, 'UN_AVL'), (77972, 'AVL_TRAIN'), (77974, 'UN_AVL'), (77974, 'AVL_TRAIN'), (77977, 'UN_AVL'), (77977, 'AVL_TRAIN'), (77991, 'UN_AVL'), (77992, 'AVL_TRAIN'), (78002, 'UN_AVL'), (78003, 'AVL_TRAIN'), (78004, 'UN_AVL'), (78006, 'AVL_TRAIN'), (78008, 'UN_AVL'), (78008, 'AVL_TRAIN'), (78043, 'UN_AVL'), (78044, 'AVL_TRAIN'), (78108, 'UN_AVL'), (78108, 'AVL_TRAIN'), (78112, 'UN_AVL'), (78112, 'AVL_TRAIN'), (78175, 'UN_AVL'), (78175, 'AVL_TRAIN'), (79611, 'UN_AVL'), (83140, 'AVL_TRAIN'), (83220, 'UN_AVL'), (83220, 'AVL_TRAIN'), (83475, 'UN_AVL'), (83476, 'AVL_TRAIN'), (83853, 'UN_AVL'), (83855, 'AVL_TRAIN'), (83876, 'UN_AVL'), (83876, 'AVL_TRAIN'), (83885, 'UN_AVL'), (83885, 'AVL_TRAIN'), (83902, 'UN_AVL'), (83904, 'AVL_TRAIN'), (83913, 'UN_AVL'), (83913, 'AVL_TRAIN'), (94598, 'UN_AVL'), (94598, 'AVL_TRAIN'), (94617, 'UN_AVL'), (97436, 'AVL_TRAIN'), (98168, 'UN_AVL'), (102213, 'AVL_TRAIN'), (102362, 'UN_AVL'), (102362, 'AVL_TRAIN'), (102374, 'UN_AVL'), (102379, 'AVL_TRAIN'), (102660, 'UN_AVL'), (102670, 'AVL_TRAIN'), (102674, 'UN_AVL'), (102674, 'AVL_TRAIN'), (102978, 'UN_AVL'), (102978, 'AVL_TRAIN'), (102980, 'UN_AVL'), (102981, 'AVL_TRAIN'), (102982, 'UN_AVL'), (102982, 'AVL_TRAIN'), (102985, 'UN_AVL'), (102985, 'AVL_TRAIN'), (103021, 'UN_AVL'), (103021, 'AVL_TRAIN'), (103651, 'UN_AVL'), (103651, 'AVL_TRAIN'), (103811, 'UN_AVL'), (103811, 'AVL_TRAIN'), (103842, 'UN_AVL'), (105769, 'AVL_TRAIN'), (105867, 'UN_AVL'), (105868, 'AVL_TRAIN'), (109167, 'UN_AVL'), (109167, 'AVL_TRAIN'), (109179, 'UN_AVL'), (109179, 'AVL_TRAIN'), (109190, 'UN_AVL'), (110853, 'AVL_TRAIN'), (111020, 'UN_AVL'), (111024, 'AVL_TRAIN'), (111746, 'UN_AVL'), (111773, 'AVL_TRAIN'), (131766, 'AVL_EVAL'), (161481, 'UN_AVL'), (190250, 'AVL_TRAIN'), (190322, 'UN_AVL'), (190322, 'AVL_TRAIN'), (190323, 'UN_AVL'), (190323, 'AVL_TRAIN'), (192697, 'UN_AVL'), (247103, 'AVL_TRAIN'), (248048, 'UN_AVL'), (248048, 'AVL_TRAIN'), (248068, 'UN_AVL'), (258565, 'AVL_TRAIN'), (264979, 'UN_AVL'), (266318, 'AVL_TRAIN'), (267410, 'UN_AVL'), (269641, 'AVL_TRAIN'), (270373, 'UN_AVL'), (275795, 'AVL_TRAIN'), (282218, 'UN_AVL'), (318647, 'AVL_TRAIN'), (318665, 'UN_AVL'), (318666, 'AVL_TRAIN'), (318667, 'UN_AVL'), (318668, 'AVL_TRAIN'), (320765, 'UN_AVL'), (320765, 'AVL_TRAIN'), (320766, 'UN_AVL'), (320766, 'AVL_TRAIN'), (320776, 'UN_AVL'), (320778, 'AVL_TRAIN'), (320851, 'UN_AVL'), (320851, 'AVL_TRAIN'), (320870, 'UN_AVL'), (320871, 'AVL_TRAIN'), (320872, 'UN_AVL'), (320875, 'AVL_TRAIN'), (320876, 'UN_AVL'), (320877, 'AVL_TRAIN'), (320880, 'UN_AVL'), (320883, 'AVL_TRAIN'), (320884, 'UN_AVL'), (320884, 'AVL_TRAIN'), (320922, 'UN_AVL'), (320922, 'AVL_TRAIN'), (322293, 'UN_AVL'), (322295, 'AVL_TRAIN'), (322296, 'UN_AVL'), (322296, 'AVL_TRAIN'), (322297, 'UN_AVL'), (322297, 'AVL_TRAIN'), (322298, 'UN_AVL'), (322449, 'AVL_TRAIN'), (322450, 'UN_AVL'), (322467, 'AVL_TRAIN'), (322470, 'UN_AVL'), (322471, 'AVL_TRAIN'), (323215, 'UN_AVL'), (326785, 'AVL_TRAIN'), (340656, 'AVL_EVAL'), (344916, 'UN_AVL'), (360524, 'AVL_TRAIN'), (364796, 'UN_AVL'), (364802, 'AVL_TRAIN'), (364964, 'UN_AVL'), (364990, 'AVL_TRAIN'), (365065, 'UN_AVL'), (365065, 'AVL_TRAIN'), (365068, 'UN_AVL'), (365069, 'AVL_TRAIN'), (365071, 'UN_AVL'), (365072, 'AVL_TRAIN'), (365077, 'UN_AVL'), (365077, 'AVL_TRAIN'), (366909, 'UN_AVL'), (366910, 'AVL_TRAIN'), (367088, 'UN_AVL'), (367088, 'AVL_TRAIN'), (367089, 'UN_AVL'), (367089, 'AVL_TRAIN'), (367318, 'UN_AVL'), (367320, 'AVL_TRAIN'), (367327, 'UN_AVL'), (367328, 'AVL_TRAIN'), (367329, 'UN_AVL'), (367329, 'AVL_TRAIN'), (367331, 'UN_AVL'), (367332, 'AVL_TRAIN'), (367546, 'UN_AVL'), (367547, 'AVL_TRAIN'), (367635, 'UN_AVL'), (367636, 'AVL_TRAIN'), (367937, 'UN_AVL'), (367937, 'AVL_TRAIN'), (368230, 'UN_AVL'), (368231, 'AVL_TRAIN'), (368269, 'UN_AVL'), (368269, 'AVL_TRAIN'), (368270, 'UN_AVL'), (368270, 'AVL_TRAIN'), (368271, 'UN_AVL'), (368271, 'AVL_TRAIN'), (368272, 'UN_AVL'), (368272, 'AVL_TRAIN'), (368490, 'UN_AVL'), (368490, 'AVL_TRAIN'), (368491, 'UN_AVL'), (368491, 'AVL_TRAIN'), (368492, 'UN_AVL'), (368492, 'AVL_TRAIN'), (368673, 'UN_AVL'), (368673, 'AVL_TRAIN'), (371991, 'UN_AVL'), (371991, 'AVL_TRAIN'), (372152, 'UN_AVL'), (372152, 'AVL_TRAIN'), (379921, 'AVL_EVAL'), (379921, 'AVL_TRAIN'), (380554, 'AVL_EVAL'), (380554, 'AVL_TRAIN'), (380555, 'AVL_EVAL'), (411645, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_4.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_4.json index 4878a7a82..ef95ca245 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_4.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_4.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (10687, 'AVL_EVAL'), (74675, 'AVL_TRAIN'), (74990, 'AVL_EVAL'), (75607, 'AVL_TRAIN'), (83257, 'AVL_EVAL'), (92529, 'AVL_TRAIN'), (92860, 'AVL_EVAL'), (97479, 'UN_AVL'), (148456, 'AVL_EVAL'), (160606, 'UN_AVL'), (177609, 'AVL_EVAL'), (177609, 'AVL_TRAIN'), (232693, 'AVL_EVAL'), (245828, 'UN_AVL'), (255474, 'AVL_EVAL'), (259899, 'UN_AVL'), (269402, 'AVL_TRAIN'), (279325, 'AVL_EVAL'), (322502, 'UN_AVL'), (338370, 'AVL_EVAL'), (435431, 'UN_AVL'), (435431, 'AVL_TRAIN'), (437982, 'AVL_EVAL'), (520956, 'UN_AVL'), (522727, 'AVL_TRAIN'), (524615, 'UN_AVL'), (524625, 'AVL_TRAIN'), (524925, 'UN_AVL'), (526160, 'AVL_EVAL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (74675, 'AVL_TRAIN'), (74990, 'UN_AVL'), (75607, 'AVL_TRAIN'), (83257, 'AVL_EVAL'), (87925, 'UN_AVL'), (92529, 'AVL_TRAIN'), (92860, 'UN_AVL'), (148456, 'AVL_EVAL'), (160606, 'UN_AVL'), (177609, 'AVL_TRAIN'), (232693, 'AVL_EVAL'), (242290, 'UN_AVL'), (269402, 'AVL_TRAIN'), (279325, 'AVL_EVAL'), (320377, 'UN_AVL'), (338370, 'AVL_EVAL'), (352789, 'UN_AVL'), (421266, 'AVL_EVAL'), (423273, 'UN_AVL'), (435431, 'AVL_TRAIN'), (437982, 'UN_AVL'), (511796, 'AVL_EVAL'), (517950, 'UN_AVL'), (522727, 'AVL_TRAIN'), (524615, 'UN_AVL'), (524625, 'AVL_TRAIN'), (524925, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_40.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_40.json index e88c9376d..5d79a64ce 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_40.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_40.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (13557, 'AVL_TRAIN'), (15700, 'UN_AVL'), (17839, 'AVL_TRAIN'), (21235, 'AVL_EVAL'), (23943, 'UN_AVL'), (23984, 'AVL_TRAIN'), (56604, 'AVL_EVAL'), (79918, 'UN_AVL'), (89126, 'AVL_TRAIN'), (102140, 'AVL_EVAL'), (108410, 'AVL_TRAIN'), (141518, 'AVL_EVAL'), (163057, 'UN_AVL'), (176376, 'AVL_TRAIN'), (182620, 'AVL_EVAL'), (193290, 'UN_AVL'), (197285, 'AVL_TRAIN'), (225386, 'AVL_EVAL'), (250972, 'UN_AVL'), (266434, 'AVL_TRAIN'), (267417, 'UN_AVL'), (268379, 'AVL_TRAIN'), (268911, 'UN_AVL'), (269138, 'AVL_TRAIN'), (274584, 'AVL_EVAL'), (287536, 'UN_AVL'), (320065, 'AVL_TRAIN'), (330945, 'AVL_EVAL'), (359916, 'UN_AVL'), (378184, 'AVL_TRAIN'), (404092, 'AVL_EVAL'), (440213, 'UN_AVL'), (440213, 'AVL_TRAIN'), (440264, 'UN_AVL'), (440295, 'AVL_TRAIN'), (441839, 'UN_AVL'), (441841, 'AVL_TRAIN'), (441994, 'UN_AVL'), (441994, 'AVL_TRAIN'), (442004, 'UN_AVL'), (448904, 'AVL_TRAIN'), (453134, 'AVL_EVAL'), (456922, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (13557, 'AVL_TRAIN'), (15700, 'UN_AVL'), (17839, 'AVL_TRAIN'), (21235, 'UN_AVL'), (23984, 'AVL_TRAIN'), (56604, 'AVL_EVAL'), (68352, 'UN_AVL'), (89126, 'AVL_TRAIN'), (102140, 'AVL_EVAL'), (108410, 'AVL_TRAIN'), (141518, 'AVL_EVAL'), (153120, 'UN_AVL'), (176376, 'AVL_TRAIN'), (182620, 'UN_AVL'), (197285, 'AVL_TRAIN'), (225386, 'AVL_EVAL'), (246190, 'UN_AVL'), (266434, 'AVL_TRAIN'), (267417, 'UN_AVL'), (268379, 'AVL_TRAIN'), (268911, 'UN_AVL'), (269138, 'AVL_TRAIN'), (274584, 'UN_AVL'), (320065, 'AVL_TRAIN'), (330945, 'AVL_EVAL'), (345312, 'UN_AVL'), (378184, 'AVL_TRAIN'), (404092, 'AVL_EVAL'), (418210, 'UN_AVL'), (440213, 'AVL_TRAIN'), (440264, 'UN_AVL'), (440295, 'AVL_TRAIN'), (441839, 'UN_AVL'), (441841, 'AVL_TRAIN'), (441994, 'UN_AVL'), (441994, 'AVL_TRAIN'), (442004, 'UN_AVL'), (448904, 'AVL_TRAIN'), (453134, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_41.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_41.json index 0a8963cae..782cefe78 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_41.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_41.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (55523, 'AVL_TRAIN'), (69763, 'AVL_EVAL'), (89621, 'AVL_TRAIN'), (94189, 'AVL_EVAL'), (138502, 'AVL_TRAIN'), (140780, 'AVL_EVAL'), (159978, 'AVL_TRAIN'), (165139, 'AVL_EVAL'), (194198, 'AVL_TRAIN'), (194593, 'AVL_EVAL'), (218842, 'AVL_TRAIN'), (222740, 'AVL_EVAL'), (233334, 'AVL_TRAIN'), (235367, 'AVL_EVAL'), (258915, 'UN_AVL'), (261749, 'AVL_TRAIN'), (263878, 'AVL_EVAL'), (276338, 'AVL_TRAIN'), (280547, 'AVL_EVAL'), (349078, 'UN_AVL'), (349177, 'AVL_TRAIN'), (352799, 'AVL_EVAL'), (361574, 'AVL_TRAIN'), (365552, 'AVL_EVAL'), (394079, 'AVL_TRAIN'), (395652, 'AVL_EVAL'), (402844, 'AVL_TRAIN'), (404338, 'AVL_EVAL'), (428076, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (17308, 'UN_AVL'), (55523, 'AVL_TRAIN'), (69763, 'AVL_EVAL'), (89621, 'AVL_TRAIN'), (94189, 'AVL_EVAL'), (138496, 'UN_AVL'), (138502, 'AVL_TRAIN'), (140780, 'AVL_EVAL'), (156917, 'UN_AVL'), (159978, 'AVL_TRAIN'), (165139, 'AVL_EVAL'), (176788, 'UN_AVL'), (194198, 'AVL_TRAIN'), (194593, 'UN_AVL'), (218842, 'AVL_TRAIN'), (222740, 'AVL_EVAL'), (233334, 'AVL_TRAIN'), (235367, 'AVL_EVAL'), (247221, 'UN_AVL'), (261749, 'AVL_TRAIN'), (263878, 'AVL_EVAL'), (270803, 'UN_AVL'), (276338, 'AVL_TRAIN'), (280547, 'AVL_EVAL'), (309771, 'UN_AVL'), (349177, 'AVL_TRAIN'), (352799, 'AVL_EVAL'), (361162, 'UN_AVL'), (361574, 'AVL_TRAIN'), (365552, 'AVL_EVAL'), (394079, 'AVL_TRAIN'), (395652, 'AVL_EVAL'), (402844, 'AVL_TRAIN'), (404338, 'AVL_EVAL'), (420847, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_42.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_42.json index 58e1b6a17..ffdd78b55 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_42.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_42.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (932, 'UN_AVL'), (5814, 'AVL_TRAIN'), (5816, 'UN_AVL'), (5817, 'AVL_TRAIN'), (5821, 'UN_AVL'), (5824, 'AVL_TRAIN'), (37829, 'AVL_EVAL'), (47572, 'UN_AVL'), (51748, 'AVL_TRAIN'), (51749, 'UN_AVL'), (54387, 'AVL_TRAIN'), (54400, 'UN_AVL'), (57843, 'AVL_TRAIN'), (57853, 'UN_AVL'), (58001, 'AVL_TRAIN'), (58025, 'UN_AVL'), (58164, 'AVL_TRAIN'), (58168, 'UN_AVL'), (58173, 'AVL_TRAIN'), (64017, 'AVL_EVAL'), (87614, 'UN_AVL'), (97011, 'AVL_TRAIN'), (125046, 'AVL_EVAL'), (135170, 'AVL_TRAIN'), (139204, 'AVL_EVAL'), (139220, 'AVL_TRAIN'), (144104, 'AVL_EVAL'), (182419, 'UN_AVL'), (184272, 'AVL_TRAIN'), (184273, 'UN_AVL'), (184273, 'AVL_TRAIN'), (223531, 'AVL_EVAL'), (230088, 'AVL_TRAIN'), (232654, 'AVL_EVAL'), (232655, 'AVL_TRAIN'), (232660, 'AVL_EVAL'), (245831, 'AVL_TRAIN'), (246944, 'AVL_EVAL'), (315769, 'UN_AVL'), (332133, 'AVL_TRAIN'), (332136, 'UN_AVL'), (332156, 'AVL_TRAIN'), (335587, 'UN_AVL'), (335587, 'AVL_TRAIN'), (335596, 'UN_AVL'), (335597, 'AVL_TRAIN'), (340675, 'AVL_EVAL'), (349484, 'UN_AVL'), (397215, 'AVL_EVAL'), (419345, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (5814, 'AVL_TRAIN'), (5816, 'UN_AVL'), (5817, 'AVL_TRAIN'), (5821, 'UN_AVL'), (5824, 'AVL_TRAIN'), (37829, 'AVL_EVAL'), (47572, 'UN_AVL'), (51748, 'AVL_TRAIN'), (51749, 'UN_AVL'), (54387, 'AVL_TRAIN'), (54400, 'UN_AVL'), (57843, 'AVL_TRAIN'), (57853, 'UN_AVL'), (58001, 'AVL_TRAIN'), (58025, 'UN_AVL'), (58164, 'AVL_TRAIN'), (58168, 'UN_AVL'), (58173, 'AVL_TRAIN'), (64017, 'AVL_EVAL'), (73236, 'UN_AVL'), (97011, 'AVL_TRAIN'), (125046, 'AVL_EVAL'), (130209, 'UN_AVL'), (135170, 'AVL_TRAIN'), (139204, 'AVL_EVAL'), (139220, 'AVL_TRAIN'), (144104, 'AVL_EVAL'), (176517, 'UN_AVL'), (184272, 'AVL_TRAIN'), (184273, 'UN_AVL'), (184273, 'AVL_TRAIN'), (223531, 'AVL_EVAL'), (230088, 'AVL_TRAIN'), (232654, 'AVL_EVAL'), (232655, 'AVL_TRAIN'), (232660, 'AVL_EVAL'), (244352, 'UN_AVL'), (245831, 'AVL_TRAIN'), (246944, 'AVL_EVAL'), (270833, 'UN_AVL'), (332133, 'AVL_TRAIN'), (332136, 'UN_AVL'), (332156, 'AVL_TRAIN'), (335587, 'UN_AVL'), (335587, 'AVL_TRAIN'), (335596, 'UN_AVL'), (335597, 'AVL_TRAIN'), (340675, 'AVL_EVAL'), (344514, 'UN_AVL'), (397215, 'AVL_EVAL'), (419345, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_43.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_43.json index bd9670f12..4460ff32e 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_43.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_43.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (8288, 'AVL_TRAIN'), (8420, 'AVL_EVAL'), (43389, 'AVL_TRAIN'), (55741, 'AVL_EVAL'), (168004, 'AVL_TRAIN'), (168044, 'AVL_EVAL'), (168053, 'AVL_TRAIN'), (168450, 'AVL_EVAL'), (168450, 'AVL_TRAIN'), (168451, 'AVL_EVAL'), (168451, 'AVL_TRAIN'), (168453, 'AVL_EVAL'), (168454, 'AVL_TRAIN'), (168997, 'AVL_EVAL'), (168998, 'AVL_TRAIN'), (169006, 'AVL_EVAL'), (169008, 'AVL_TRAIN'), (169660, 'AVL_EVAL'), (169899, 'AVL_TRAIN'), (169910, 'AVL_EVAL'), (169914, 'AVL_TRAIN'), (170246, 'AVL_EVAL'), (244555, 'AVL_TRAIN'), (247523, 'AVL_EVAL'), (261454, 'AVL_TRAIN'), (262814, 'AVL_EVAL'), (269312, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (8288, 'AVL_TRAIN'), (8420, 'AVL_EVAL'), (9278, 'UN_AVL'), (43389, 'AVL_TRAIN'), (55741, 'AVL_EVAL'), (86911, 'UN_AVL'), (167999, 'AVL_EVAL'), (168004, 'AVL_TRAIN'), (168044, 'AVL_EVAL'), (168053, 'AVL_TRAIN'), (168450, 'AVL_EVAL'), (168450, 'AVL_TRAIN'), (168451, 'AVL_EVAL'), (168451, 'AVL_TRAIN'), (168453, 'AVL_EVAL'), (168454, 'AVL_TRAIN'), (168997, 'AVL_EVAL'), (168998, 'AVL_TRAIN'), (169006, 'AVL_EVAL'), (169008, 'AVL_TRAIN'), (169660, 'AVL_EVAL'), (169899, 'AVL_TRAIN'), (169910, 'AVL_EVAL'), (169914, 'AVL_TRAIN'), (170246, 'AVL_EVAL'), (242885, 'UN_AVL'), (244555, 'AVL_TRAIN'), (247523, 'AVL_EVAL'), (250025, 'UN_AVL'), (261454, 'AVL_TRAIN'), (262814, 'AVL_EVAL'), (267531, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_44.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_44.json index 18eb4454c..eabf1516b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_44.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_44.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4052, 'AVL_EVAL'), (13753, 'AVL_TRAIN'), (17876, 'AVL_EVAL'), (41529, 'AVL_TRAIN'), (45478, 'AVL_EVAL'), (53997, 'AVL_TRAIN'), (60043, 'AVL_EVAL'), (61878, 'AVL_TRAIN'), (62732, 'AVL_EVAL'), (66444, 'AVL_TRAIN'), (68015, 'AVL_EVAL'), (68685, 'AVL_TRAIN'), (69884, 'AVL_EVAL'), (71871, 'AVL_TRAIN'), (74958, 'AVL_EVAL'), (76782, 'AVL_TRAIN'), (76930, 'AVL_EVAL'), (77655, 'AVL_TRAIN'), (77984, 'AVL_EVAL'), (79290, 'AVL_TRAIN'), (81859, 'AVL_EVAL'), (85878, 'AVL_TRAIN'), (86708, 'AVL_EVAL'), (88806, 'AVL_TRAIN'), (89177, 'AVL_EVAL'), (89683, 'AVL_TRAIN'), (91936, 'AVL_EVAL'), (93411, 'AVL_TRAIN'), (93562, 'AVL_EVAL'), (93994, 'AVL_TRAIN'), (94445, 'AVL_EVAL'), (99116, 'AVL_TRAIN'), (107419, 'AVL_EVAL'), (125745, 'AVL_TRAIN'), (128042, 'AVL_EVAL'), (131979, 'AVL_TRAIN'), (133007, 'AVL_EVAL'), (140230, 'AVL_TRAIN'), (144034, 'AVL_EVAL'), (144211, 'AVL_TRAIN'), (146586, 'AVL_EVAL'), (147661, 'AVL_TRAIN'), (148770, 'AVL_EVAL'), (148770, 'AVL_TRAIN'), (148772, 'AVL_EVAL'), (156530, 'AVL_TRAIN'), (157342, 'AVL_EVAL'), (157441, 'AVL_TRAIN'), (158119, 'AVL_EVAL'), (161695, 'AVL_TRAIN'), (162600, 'AVL_EVAL'), (162600, 'AVL_TRAIN'), (162615, 'AVL_EVAL'), (165168, 'AVL_TRAIN'), (168116, 'AVL_EVAL'), (170187, 'AVL_TRAIN'), (170338, 'AVL_EVAL'), (176166, 'AVL_TRAIN'), (176873, 'AVL_EVAL'), (176873, 'AVL_TRAIN'), (176894, 'AVL_EVAL'), (180189, 'AVL_TRAIN'), (184247, 'AVL_EVAL'), (184247, 'AVL_TRAIN'), (184257, 'AVL_EVAL'), (193273, 'AVL_TRAIN'), (195027, 'AVL_EVAL'), (218141, 'AVL_TRAIN'), (224454, 'AVL_EVAL'), (224454, 'AVL_TRAIN'), (224456, 'AVL_EVAL'), (226097, 'AVL_TRAIN'), (229349, 'AVL_EVAL'), (234537, 'AVL_TRAIN'), (235251, 'AVL_EVAL'), (235251, 'AVL_TRAIN'), (236352, 'AVL_EVAL'), (238886, 'AVL_TRAIN'), (243026, 'AVL_EVAL'), (249891, 'AVL_TRAIN'), (253942, 'AVL_EVAL'), (256128, 'AVL_TRAIN'), (257709, 'AVL_EVAL'), (258291, 'AVL_TRAIN'), (258855, 'AVL_EVAL'), (258855, 'AVL_TRAIN'), (258865, 'AVL_EVAL'), (270663, 'AVL_TRAIN'), (278027, 'AVL_EVAL'), (282267, 'AVL_TRAIN'), (303596, 'AVL_EVAL'), (304184, 'AVL_TRAIN'), (309085, 'AVL_EVAL'), (309098, 'AVL_TRAIN'), (309324, 'AVL_EVAL'), (313815, 'AVL_TRAIN'), (313826, 'AVL_EVAL'), (313830, 'AVL_TRAIN'), (313842, 'AVL_EVAL'), (316147, 'AVL_TRAIN'), (316174, 'AVL_EVAL'), (316206, 'AVL_TRAIN'), (319014, 'AVL_EVAL'), (337764, 'AVL_TRAIN'), (337774, 'AVL_EVAL'), (337775, 'AVL_TRAIN'), (337793, 'AVL_EVAL'), (337803, 'AVL_TRAIN'), (337804, 'AVL_EVAL'), (337805, 'AVL_TRAIN'), (337822, 'AVL_EVAL'), (337833, 'AVL_TRAIN'), (337834, 'AVL_EVAL'), (337850, 'AVL_TRAIN'), (337852, 'AVL_EVAL'), (337852, 'AVL_TRAIN'), (337853, 'AVL_EVAL'), (337862, 'AVL_TRAIN'), (337864, 'AVL_EVAL'), (337865, 'AVL_TRAIN'), (337875, 'AVL_EVAL'), (337886, 'AVL_TRAIN'), (337897, 'AVL_EVAL'), (337914, 'AVL_TRAIN'), (337917, 'AVL_EVAL'), (337918, 'AVL_TRAIN'), (337929, 'AVL_EVAL'), (337944, 'AVL_TRAIN'), (337945, 'AVL_EVAL'), (337955, 'AVL_TRAIN'), (337961, 'AVL_EVAL'), (337963, 'AVL_TRAIN'), (337973, 'AVL_EVAL'), (337974, 'AVL_TRAIN'), (337996, 'AVL_EVAL'), (338006, 'AVL_TRAIN'), (338037, 'AVL_EVAL'), (338047, 'AVL_TRAIN'), (338073, 'AVL_EVAL'), (338083, 'AVL_TRAIN'), (338114, 'AVL_EVAL'), (338124, 'AVL_TRAIN'), (338145, 'AVL_EVAL'), (338145, 'AVL_TRAIN'), (338146, 'AVL_EVAL'), (338155, 'AVL_TRAIN'), (338156, 'AVL_EVAL'), (338158, 'AVL_TRAIN'), (338179, 'AVL_EVAL'), (357109, 'AVL_TRAIN'), (357333, 'AVL_EVAL'), (359643, 'AVL_TRAIN'), (384777, 'AVL_EVAL'), (387282, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4052, 'AVL_EVAL'), (13753, 'AVL_TRAIN'), (17876, 'AVL_EVAL'), (41529, 'AVL_TRAIN'), (45478, 'AVL_EVAL'), (53997, 'AVL_TRAIN'), (60043, 'AVL_EVAL'), (61878, 'AVL_TRAIN'), (62732, 'AVL_EVAL'), (66444, 'AVL_TRAIN'), (68015, 'AVL_EVAL'), (68685, 'AVL_TRAIN'), (69884, 'AVL_EVAL'), (71871, 'AVL_TRAIN'), (74958, 'AVL_EVAL'), (76782, 'AVL_TRAIN'), (76930, 'AVL_EVAL'), (77655, 'AVL_TRAIN'), (77984, 'AVL_EVAL'), (79290, 'AVL_TRAIN'), (81859, 'AVL_EVAL'), (85878, 'AVL_TRAIN'), (86708, 'AVL_EVAL'), (88806, 'AVL_TRAIN'), (89177, 'AVL_EVAL'), (89683, 'AVL_TRAIN'), (91936, 'AVL_EVAL'), (93411, 'AVL_TRAIN'), (93562, 'AVL_EVAL'), (93994, 'AVL_TRAIN'), (94445, 'AVL_EVAL'), (99116, 'AVL_TRAIN'), (107419, 'AVL_EVAL'), (125745, 'AVL_TRAIN'), (128042, 'AVL_EVAL'), (131979, 'AVL_TRAIN'), (133007, 'AVL_EVAL'), (140230, 'AVL_TRAIN'), (144034, 'AVL_EVAL'), (144211, 'AVL_TRAIN'), (146586, 'AVL_EVAL'), (147661, 'AVL_TRAIN'), (148770, 'AVL_EVAL'), (148770, 'AVL_TRAIN'), (148772, 'AVL_EVAL'), (156530, 'AVL_TRAIN'), (157342, 'AVL_EVAL'), (157441, 'AVL_TRAIN'), (158119, 'AVL_EVAL'), (161695, 'AVL_TRAIN'), (162600, 'AVL_EVAL'), (162600, 'AVL_TRAIN'), (162615, 'AVL_EVAL'), (165168, 'AVL_TRAIN'), (168116, 'AVL_EVAL'), (170187, 'AVL_TRAIN'), (170338, 'AVL_EVAL'), (176166, 'AVL_TRAIN'), (176873, 'AVL_EVAL'), (176873, 'AVL_TRAIN'), (176894, 'AVL_EVAL'), (180189, 'AVL_TRAIN'), (184247, 'AVL_EVAL'), (184247, 'AVL_TRAIN'), (184257, 'AVL_EVAL'), (193273, 'AVL_TRAIN'), (195027, 'AVL_EVAL'), (218141, 'AVL_TRAIN'), (224454, 'AVL_EVAL'), (224454, 'AVL_TRAIN'), (224456, 'AVL_EVAL'), (226097, 'AVL_TRAIN'), (229349, 'AVL_EVAL'), (234537, 'AVL_TRAIN'), (235251, 'AVL_EVAL'), (235251, 'AVL_TRAIN'), (236352, 'AVL_EVAL'), (238886, 'AVL_TRAIN'), (243026, 'AVL_EVAL'), (249891, 'AVL_TRAIN'), (253942, 'AVL_EVAL'), (256128, 'AVL_TRAIN'), (257709, 'AVL_EVAL'), (258291, 'AVL_TRAIN'), (258855, 'AVL_EVAL'), (258855, 'AVL_TRAIN'), (258865, 'AVL_EVAL'), (270663, 'AVL_TRAIN'), (278027, 'AVL_EVAL'), (282267, 'AVL_TRAIN'), (303596, 'AVL_EVAL'), (304184, 'AVL_TRAIN'), (309085, 'AVL_EVAL'), (309098, 'AVL_TRAIN'), (309324, 'AVL_EVAL'), (313815, 'AVL_TRAIN'), (313826, 'AVL_EVAL'), (313830, 'AVL_TRAIN'), (313842, 'AVL_EVAL'), (316147, 'AVL_TRAIN'), (316174, 'AVL_EVAL'), (316206, 'AVL_TRAIN'), (319014, 'AVL_EVAL'), (337764, 'AVL_TRAIN'), (337774, 'AVL_EVAL'), (337775, 'AVL_TRAIN'), (337793, 'AVL_EVAL'), (337803, 'AVL_TRAIN'), (337804, 'AVL_EVAL'), (337805, 'AVL_TRAIN'), (337822, 'AVL_EVAL'), (337833, 'AVL_TRAIN'), (337834, 'AVL_EVAL'), (337850, 'AVL_TRAIN'), (337852, 'AVL_EVAL'), (337852, 'AVL_TRAIN'), (337853, 'AVL_EVAL'), (337862, 'AVL_TRAIN'), (337864, 'AVL_EVAL'), (337865, 'AVL_TRAIN'), (337875, 'AVL_EVAL'), (337886, 'AVL_TRAIN'), (337897, 'AVL_EVAL'), (337914, 'AVL_TRAIN'), (337917, 'AVL_EVAL'), (337918, 'AVL_TRAIN'), (337929, 'AVL_EVAL'), (337944, 'AVL_TRAIN'), (337945, 'AVL_EVAL'), (337955, 'AVL_TRAIN'), (337961, 'AVL_EVAL'), (337963, 'AVL_TRAIN'), (337973, 'AVL_EVAL'), (337974, 'AVL_TRAIN'), (337996, 'AVL_EVAL'), (338006, 'AVL_TRAIN'), (338037, 'AVL_EVAL'), (338047, 'AVL_TRAIN'), (338073, 'AVL_EVAL'), (338083, 'AVL_TRAIN'), (338114, 'AVL_EVAL'), (338124, 'AVL_TRAIN'), (338145, 'AVL_EVAL'), (338145, 'AVL_TRAIN'), (338146, 'AVL_EVAL'), (338155, 'AVL_TRAIN'), (338156, 'AVL_EVAL'), (338158, 'AVL_TRAIN'), (338179, 'AVL_EVAL'), (342352, 'UN_AVL'), (357109, 'AVL_TRAIN'), (357333, 'UN_AVL'), (359643, 'AVL_TRAIN'), (384777, 'AVL_EVAL'), (387282, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_45.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_45.json index 60a7b6ce5..fa10cb6b5 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_45.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_45.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "24.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (242924, 'AVL_EVAL'), (310803, 'UN_AVL'), (329033, 'AVL_TRAIN'), (329238, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (242924, 'AVL_EVAL'), (269778, 'UN_AVL'), (329033, 'AVL_TRAIN'), (329238, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_46.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_46.json index a73985f42..410ec0a1e 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_46.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_46.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (15954, 'AVL_TRAIN'), (16282, 'UN_AVL'), (16283, 'AVL_TRAIN'), (30974, 'AVL_EVAL'), (72353, 'AVL_TRAIN'), (73635, 'AVL_EVAL'), (75830, 'AVL_TRAIN'), (75873, 'AVL_EVAL'), (85250, 'AVL_TRAIN'), (89214, 'AVL_EVAL'), (93218, 'AVL_TRAIN'), (94740, 'AVL_EVAL'), (94811, 'AVL_TRAIN'), (94849, 'AVL_EVAL'), (94882, 'AVL_TRAIN'), (94899, 'AVL_EVAL'), (130280, 'AVL_TRAIN'), (132972, 'AVL_EVAL'), (158848, 'AVL_TRAIN'), (163571, 'AVL_EVAL'), (169735, 'AVL_TRAIN'), (170612, 'AVL_EVAL'), (170616, 'AVL_TRAIN'), (170700, 'AVL_EVAL'), (175356, 'AVL_TRAIN'), (176696, 'AVL_EVAL'), (215922, 'AVL_TRAIN'), (222216, 'AVL_EVAL'), (238286, 'AVL_TRAIN'), (242458, 'AVL_EVAL'), (260294, 'AVL_TRAIN'), (260301, 'AVL_EVAL'), (276801, 'AVL_TRAIN'), (277013, 'AVL_EVAL'), (277014, 'AVL_TRAIN'), (277015, 'AVL_EVAL'), (277015, 'AVL_TRAIN'), (277284, 'AVL_EVAL'), (277285, 'AVL_TRAIN'), (277326, 'AVL_EVAL'), (277326, 'AVL_TRAIN'), (277335, 'AVL_EVAL'), (277336, 'AVL_TRAIN'), (277354, 'AVL_EVAL'), (277356, 'AVL_TRAIN'), (277373, 'AVL_EVAL'), (277374, 'AVL_TRAIN'), (277651, 'AVL_EVAL'), (278197, 'AVL_TRAIN'), (280030, 'AVL_EVAL'), (280031, 'AVL_TRAIN'), (280325, 'AVL_EVAL'), (280431, 'AVL_TRAIN'), (280578, 'AVL_EVAL'), (280580, 'AVL_TRAIN'), (280640, 'AVL_EVAL'), (280642, 'AVL_TRAIN'), (281001, 'AVL_EVAL'), (281003, 'AVL_TRAIN'), (281038, 'AVL_EVAL'), (281039, 'AVL_TRAIN'), (281182, 'AVL_EVAL'), (281182, 'AVL_TRAIN'), (281235, 'AVL_EVAL'), (281238, 'AVL_TRAIN'), (281323, 'AVL_EVAL'), (281327, 'AVL_TRAIN'), (281335, 'AVL_EVAL'), (281371, 'AVL_TRAIN'), (281665, 'AVL_EVAL'), (281667, 'AVL_TRAIN'), (281668, 'AVL_EVAL'), (281672, 'AVL_TRAIN'), (281712, 'AVL_EVAL'), (281741, 'AVL_TRAIN'), (281776, 'AVL_EVAL'), (281777, 'AVL_TRAIN'), (281779, 'AVL_EVAL'), (281782, 'AVL_TRAIN'), (281805, 'AVL_EVAL'), (282002, 'AVL_TRAIN'), (282013, 'AVL_EVAL'), (282014, 'AVL_TRAIN'), (282028, 'AVL_EVAL'), (282690, 'AVL_TRAIN'), (282702, 'AVL_EVAL'), (282702, 'AVL_TRAIN'), (282791, 'AVL_EVAL'), (282793, 'AVL_TRAIN'), (282801, 'AVL_EVAL'), (282813, 'AVL_TRAIN'), (282861, 'AVL_EVAL'), (308351, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (15954, 'AVL_TRAIN'), (16282, 'UN_AVL'), (16283, 'AVL_TRAIN'), (30974, 'AVL_EVAL'), (65765, 'UN_AVL'), (72353, 'AVL_TRAIN'), (73635, 'AVL_EVAL'), (75830, 'UN_AVL'), (75830, 'AVL_TRAIN'), (75873, 'UN_AVL'), (85250, 'AVL_TRAIN'), (89214, 'AVL_EVAL'), (93218, 'AVL_TRAIN'), (94740, 'AVL_EVAL'), (94811, 'AVL_TRAIN'), (94849, 'AVL_EVAL'), (94882, 'AVL_TRAIN'), (94899, 'AVL_EVAL'), (122409, 'UN_AVL'), (130280, 'AVL_TRAIN'), (132972, 'AVL_EVAL'), (146689, 'UN_AVL'), (158848, 'AVL_TRAIN'), (163571, 'AVL_EVAL'), (169735, 'AVL_TRAIN'), (170612, 'AVL_EVAL'), (170616, 'AVL_TRAIN'), (170700, 'AVL_EVAL'), (175356, 'AVL_TRAIN'), (176696, 'AVL_EVAL'), (192084, 'UN_AVL'), (215922, 'AVL_TRAIN'), (222216, 'AVL_EVAL'), (235714, 'UN_AVL'), (238286, 'AVL_TRAIN'), (242458, 'AVL_EVAL'), (260294, 'AVL_TRAIN'), (260301, 'AVL_EVAL'), (265110, 'UN_AVL'), (276801, 'AVL_TRAIN'), (277013, 'UN_AVL'), (277014, 'AVL_TRAIN'), (277015, 'UN_AVL'), (277015, 'AVL_TRAIN'), (277284, 'UN_AVL'), (277285, 'AVL_TRAIN'), (277326, 'UN_AVL'), (277326, 'AVL_TRAIN'), (277335, 'UN_AVL'), (277336, 'AVL_TRAIN'), (277354, 'UN_AVL'), (277356, 'AVL_TRAIN'), (277373, 'UN_AVL'), (277374, 'AVL_TRAIN'), (277651, 'UN_AVL'), (278197, 'AVL_TRAIN'), (280030, 'UN_AVL'), (280031, 'AVL_TRAIN'), (280325, 'UN_AVL'), (280431, 'AVL_TRAIN'), (280578, 'UN_AVL'), (280580, 'AVL_TRAIN'), (280640, 'UN_AVL'), (280642, 'AVL_TRAIN'), (281001, 'AVL_EVAL'), (281003, 'AVL_TRAIN'), (281038, 'AVL_EVAL'), (281039, 'AVL_TRAIN'), (281182, 'AVL_EVAL'), (281182, 'AVL_TRAIN'), (281235, 'AVL_EVAL'), (281238, 'AVL_TRAIN'), (281323, 'AVL_EVAL'), (281327, 'AVL_TRAIN'), (281335, 'AVL_EVAL'), (281371, 'AVL_TRAIN'), (281665, 'AVL_EVAL'), (281667, 'AVL_TRAIN'), (281668, 'AVL_EVAL'), (281672, 'AVL_TRAIN'), (281712, 'AVL_EVAL'), (281741, 'AVL_TRAIN'), (281776, 'AVL_EVAL'), (281777, 'AVL_TRAIN'), (281779, 'AVL_EVAL'), (281782, 'AVL_TRAIN'), (281805, 'AVL_EVAL'), (282002, 'AVL_TRAIN'), (282013, 'AVL_EVAL'), (282014, 'AVL_TRAIN'), (282028, 'AVL_EVAL'), (282690, 'AVL_TRAIN'), (282702, 'AVL_EVAL'), (282702, 'AVL_TRAIN'), (282791, 'AVL_EVAL'), (282793, 'AVL_TRAIN'), (282801, 'AVL_EVAL'), (282813, 'AVL_TRAIN'), (282861, 'AVL_EVAL'), (301444, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_47.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_47.json index 98ed20b74..80bcd141b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_47.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_47.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (1378, 'UN_AVL'), (52628, 'AVL_EVAL'), (57059, 'AVL_TRAIN'), (59458, 'AVL_EVAL'), (64525, 'UN_AVL'), (71571, 'AVL_TRAIN'), (72017, 'UN_AVL'), (72073, 'AVL_TRAIN'), (72756, 'UN_AVL'), (78997, 'AVL_TRAIN'), (79000, 'UN_AVL'), (79002, 'AVL_TRAIN'), (83746, 'AVL_EVAL'), (84808, 'AVL_TRAIN'), (86106, 'AVL_EVAL'), (87835, 'AVL_TRAIN'), (90829, 'AVL_EVAL'), (90837, 'AVL_TRAIN'), (91196, 'AVL_EVAL'), (92732, 'AVL_TRAIN'), (96630, 'AVL_EVAL'), (97336, 'AVL_TRAIN'), (101642, 'AVL_EVAL'), (101643, 'AVL_TRAIN'), (101654, 'AVL_EVAL'), (161054, 'UN_AVL'), (163339, 'AVL_TRAIN'), (164634, 'AVL_EVAL'), (165475, 'UN_AVL'), (170275, 'AVL_TRAIN'), (175534, 'AVL_EVAL'), (176602, 'UN_AVL'), (186201, 'AVL_TRAIN'), (192948, 'AVL_EVAL'), (192962, 'AVL_TRAIN'), (221280, 'AVL_EVAL'), (229044, 'AVL_TRAIN'), (235196, 'AVL_EVAL'), (242624, 'UN_AVL'), (250484, 'AVL_EVAL'), (251134, 'UN_AVL'), (254004, 'AVL_TRAIN'), (263336, 'AVL_EVAL'), (272961, 'UN_AVL'), (273364, 'AVL_TRAIN'), (300090, 'AVL_EVAL'), (317239, 'UN_AVL'), (321302, 'AVL_TRAIN'), (326727, 'AVL_EVAL'), (327882, 'UN_AVL'), (331530, 'AVL_TRAIN'), (336720, 'AVL_EVAL'), (338372, 'AVL_TRAIN'), (341533, 'AVL_EVAL'), (345407, 'AVL_TRAIN'), (345514, 'AVL_EVAL'), (347108, 'UN_AVL'), (348695, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (52628, 'AVL_EVAL'), (55735, 'UN_AVL'), (57059, 'AVL_TRAIN'), (59458, 'AVL_EVAL'), (61538, 'UN_AVL'), (71571, 'AVL_TRAIN'), (72017, 'UN_AVL'), (72073, 'AVL_TRAIN'), (72756, 'UN_AVL'), (78997, 'AVL_TRAIN'), (79000, 'UN_AVL'), (79002, 'AVL_TRAIN'), (83746, 'UN_AVL'), (84808, 'AVL_TRAIN'), (86106, 'UN_AVL'), (87835, 'AVL_TRAIN'), (90829, 'AVL_EVAL'), (90837, 'AVL_TRAIN'), (91196, 'AVL_EVAL'), (92732, 'AVL_TRAIN'), (96630, 'AVL_EVAL'), (97336, 'AVL_TRAIN'), (101642, 'AVL_EVAL'), (101643, 'AVL_TRAIN'), (101654, 'AVL_EVAL'), (154926, 'UN_AVL'), (163339, 'AVL_TRAIN'), (164634, 'UN_AVL'), (170275, 'AVL_TRAIN'), (175534, 'UN_AVL'), (186201, 'AVL_TRAIN'), (192948, 'AVL_EVAL'), (192962, 'AVL_TRAIN'), (221280, 'AVL_EVAL'), (229044, 'AVL_TRAIN'), (235196, 'AVL_EVAL'), (239142, 'UN_AVL'), (254004, 'AVL_TRAIN'), (263336, 'AVL_EVAL'), (272961, 'UN_AVL'), (273364, 'AVL_TRAIN'), (300090, 'AVL_EVAL'), (311187, 'UN_AVL'), (321302, 'AVL_TRAIN'), (326727, 'UN_AVL'), (331530, 'AVL_TRAIN'), (336720, 'AVL_EVAL'), (336990, 'UN_AVL'), (338372, 'AVL_TRAIN'), (341533, 'AVL_EVAL'), (344677, 'UN_AVL'), (345407, 'AVL_TRAIN'), (345514, 'UN_AVL'), (348695, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_48.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_48.json index c17522c1d..3527b148f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_48.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_48.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "14.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (113643, 'UN_AVL'), (167106, 'AVL_TRAIN'), (167523, 'UN_AVL'), (167524, 'AVL_TRAIN'), (167818, 'UN_AVL'), (167819, 'AVL_TRAIN'), (168044, 'UN_AVL'), (168046, 'AVL_TRAIN'), (170002, 'UN_AVL'), (170557, 'AVL_TRAIN'), (172114, 'UN_AVL'), (173767, 'AVL_TRAIN'), (184325, 'UN_AVL'), (184327, 'AVL_TRAIN'), (184380, 'UN_AVL'), (184382, 'AVL_TRAIN'), (201544, 'AVL_EVAL'), (270198, 'UN_AVL'), (301907, 'AVL_TRAIN'), (306103, 'UN_AVL'), (307644, 'AVL_TRAIN'), (323768, 'AVL_EVAL'), (324213, 'AVL_TRAIN'), (331037, 'AVL_EVAL'), (331118, 'AVL_TRAIN'), (333438, 'UN_AVL'), (333439, 'AVL_TRAIN'), (333441, 'UN_AVL'), (333447, 'AVL_TRAIN'), (333451, 'UN_AVL'), (333452, 'AVL_TRAIN'), (333455, 'UN_AVL'), (333522, 'AVL_TRAIN'), (333549, 'UN_AVL'), (333550, 'AVL_TRAIN'), (333569, 'UN_AVL'), (333573, 'AVL_TRAIN'), (340914, 'UN_AVL'), (340917, 'AVL_TRAIN'), (340927, 'UN_AVL'), (340928, 'AVL_TRAIN'), (340936, 'UN_AVL'), (340941, 'AVL_TRAIN'), (340946, 'UN_AVL'), (340947, 'AVL_TRAIN'), (340948, 'UN_AVL'), (340953, 'AVL_TRAIN'), (340958, 'UN_AVL'), (340964, 'AVL_TRAIN'), (340969, 'UN_AVL'), (340970, 'AVL_TRAIN'), (340971, 'UN_AVL'), (340972, 'AVL_TRAIN'), (340974, 'UN_AVL'), (340981, 'AVL_TRAIN'), (340982, 'UN_AVL'), (340983, 'AVL_TRAIN'), (340992, 'UN_AVL'), (340992, 'AVL_TRAIN'), (341608, 'UN_AVL'), (341613, 'AVL_TRAIN'), (341643, 'UN_AVL'), (341657, 'AVL_TRAIN'), (341668, 'UN_AVL'), (341693, 'AVL_TRAIN'), (342705, 'UN_AVL'), (342708, 'AVL_TRAIN'), (343007, 'UN_AVL'), (343010, 'AVL_TRAIN'), (343011, 'UN_AVL'), (343012, 'AVL_TRAIN'), (343016, 'UN_AVL'), (343022, 'AVL_TRAIN'), (343031, 'UN_AVL'), (343190, 'AVL_TRAIN'), (360439, 'UN_AVL'), (364048, 'AVL_EVAL'), (364048, 'AVL_TRAIN'), (364397, 'AVL_EVAL'), (451047, 'UN_AVL'), (463846, 'AVL_TRAIN'), (489644, 'AVL_EVAL'), (495289, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (68319, 'UN_AVL'), (167106, 'AVL_TRAIN'), (167523, 'UN_AVL'), (167524, 'AVL_TRAIN'), (167818, 'UN_AVL'), (167819, 'AVL_TRAIN'), (168044, 'UN_AVL'), (168046, 'AVL_TRAIN'), (170002, 'UN_AVL'), (170557, 'AVL_TRAIN'), (172114, 'UN_AVL'), (173767, 'AVL_TRAIN'), (184325, 'UN_AVL'), (184327, 'AVL_TRAIN'), (184380, 'UN_AVL'), (184382, 'AVL_TRAIN'), (201544, 'AVL_EVAL'), (245426, 'UN_AVL'), (301907, 'AVL_TRAIN'), (306103, 'UN_AVL'), (307644, 'AVL_TRAIN'), (323768, 'UN_AVL'), (324213, 'AVL_TRAIN'), (331037, 'UN_AVL'), (331118, 'AVL_TRAIN'), (333438, 'UN_AVL'), (333439, 'AVL_TRAIN'), (333441, 'UN_AVL'), (333447, 'AVL_TRAIN'), (333451, 'UN_AVL'), (333452, 'AVL_TRAIN'), (333455, 'UN_AVL'), (333522, 'AVL_TRAIN'), (333549, 'UN_AVL'), (333550, 'AVL_TRAIN'), (333569, 'UN_AVL'), (333573, 'AVL_TRAIN'), (340914, 'UN_AVL'), (340917, 'AVL_TRAIN'), (340927, 'UN_AVL'), (340928, 'AVL_TRAIN'), (340936, 'UN_AVL'), (340941, 'AVL_TRAIN'), (340946, 'UN_AVL'), (340947, 'AVL_TRAIN'), (340948, 'UN_AVL'), (340953, 'AVL_TRAIN'), (340958, 'UN_AVL'), (340964, 'AVL_TRAIN'), (340969, 'UN_AVL'), (340970, 'AVL_TRAIN'), (340971, 'UN_AVL'), (340972, 'AVL_TRAIN'), (340974, 'UN_AVL'), (340981, 'AVL_TRAIN'), (340982, 'UN_AVL'), (340983, 'AVL_TRAIN'), (340992, 'UN_AVL'), (340992, 'AVL_TRAIN'), (341608, 'UN_AVL'), (341613, 'AVL_TRAIN'), (341643, 'UN_AVL'), (341657, 'AVL_TRAIN'), (341668, 'UN_AVL'), (341693, 'AVL_TRAIN'), (342705, 'UN_AVL'), (342708, 'AVL_TRAIN'), (343007, 'UN_AVL'), (343010, 'AVL_TRAIN'), (343011, 'UN_AVL'), (343012, 'AVL_TRAIN'), (343016, 'UN_AVL'), (343022, 'AVL_TRAIN'), (343031, 'UN_AVL'), (343190, 'AVL_TRAIN'), (360439, 'UN_AVL'), (364048, 'AVL_TRAIN'), (364397, 'UN_AVL'), (400728, 'AVL_EVAL'), (420906, 'UN_AVL'), (463846, 'AVL_TRAIN'), (489644, 'AVL_EVAL'), (495289, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_49.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_49.json index 7a65e1cbe..17cf27807 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_49.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_49.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (5310, 'AVL_TRAIN'), (7222, 'UN_AVL'), (31086, 'AVL_TRAIN'), (31945, 'UN_AVL'), (32100, 'AVL_TRAIN'), (36154, 'AVL_EVAL'), (37070, 'AVL_TRAIN'), (45584, 'AVL_EVAL'), (147921, 'AVL_TRAIN'), (148354, 'AVL_EVAL'), (171992, 'UN_AVL'), (215923, 'AVL_TRAIN'), (217064, 'UN_AVL'), (255393, 'AVL_TRAIN'), (256981, 'UN_AVL'), (257217, 'AVL_TRAIN'), (257470, 'UN_AVL'), (257611, 'AVL_TRAIN'), (261824, 'AVL_EVAL'), (261835, 'AVL_TRAIN'), (263206, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (5310, 'AVL_TRAIN'), (7222, 'UN_AVL'), (31086, 'AVL_TRAIN'), (31945, 'UN_AVL'), (32100, 'AVL_TRAIN'), (36154, 'AVL_EVAL'), (37070, 'AVL_TRAIN'), (45584, 'AVL_EVAL'), (147921, 'AVL_TRAIN'), (148354, 'AVL_EVAL'), (155656, 'UN_AVL'), (215923, 'AVL_TRAIN'), (217064, 'UN_AVL'), (255393, 'AVL_TRAIN'), (256981, 'UN_AVL'), (257217, 'AVL_TRAIN'), (257470, 'UN_AVL'), (257611, 'AVL_TRAIN'), (261824, 'UN_AVL'), (261835, 'AVL_TRAIN'), (263206, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_5.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_5.json index 1ede83afa..d43f9ddf7 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_5.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_5.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (223349, 'AVL_TRAIN'), (223793, 'UN_AVL'), (223794, 'AVL_TRAIN'), (223795, 'UN_AVL'), (228077, 'AVL_TRAIN'), (231075, 'UN_AVL'), (235397, 'AVL_TRAIN'), (239058, 'AVL_EVAL'), (240725, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (223349, 'AVL_TRAIN'), (223793, 'UN_AVL'), (223794, 'AVL_TRAIN'), (223795, 'UN_AVL'), (228077, 'AVL_TRAIN'), (231075, 'UN_AVL'), (235397, 'AVL_TRAIN'), (239058, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_50.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_50.json index 3a1afbc73..1db805336 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_50.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_50.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (95264, 'AVL_EVAL'), (99453, 'UN_AVL'), (102924, 'AVL_TRAIN'), (107713, 'AVL_EVAL'), (138831, 'UN_AVL'), (140998, 'AVL_TRAIN'), (152704, 'AVL_EVAL'), (152704, 'AVL_TRAIN'), (152714, 'AVL_EVAL'), (153501, 'AVL_TRAIN'), (154725, 'UN_AVL'), (154850, 'AVL_TRAIN'), (167956, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (102924, 'AVL_TRAIN'), (107713, 'AVL_EVAL'), (138831, 'UN_AVL'), (140998, 'AVL_TRAIN'), (152704, 'AVL_EVAL'), (152704, 'AVL_TRAIN'), (152714, 'AVL_EVAL'), (153501, 'AVL_TRAIN'), (154725, 'UN_AVL'), (154850, 'AVL_TRAIN'), (167956, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_51.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_51.json index 9cffd9c55..25f02f4fb 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_51.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_51.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "16.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (207687, 'AVL_TRAIN'), (209393, 'AVL_EVAL'), (213046, 'AVL_TRAIN'), (213809, 'AVL_EVAL'), (219540, 'UN_AVL'), (229295, 'AVL_TRAIN'), (230080, 'UN_AVL'), (236635, 'AVL_TRAIN'), (238791, 'UN_AVL'), (246659, 'AVL_TRAIN'), (249110, 'AVL_EVAL'), (261165, 'UN_AVL'), (261379, 'AVL_TRAIN'), (264886, 'AVL_EVAL'), (296386, 'UN_AVL'), (342427, 'AVL_EVAL'), (348184, 'UN_AVL'), (359372, 'AVL_TRAIN'), (360155, 'AVL_EVAL'), (361153, 'UN_AVL'), (377549, 'AVL_TRAIN'), (381983, 'AVL_EVAL'), (428246, 'AVL_TRAIN'), (431851, 'AVL_EVAL'), (432670, 'AVL_TRAIN'), (436094, 'AVL_EVAL'), (466115, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (207687, 'AVL_TRAIN'), (209393, 'UN_AVL'), (213046, 'AVL_TRAIN'), (213809, 'UN_AVL'), (229295, 'AVL_TRAIN'), (230080, 'UN_AVL'), (236635, 'AVL_TRAIN'), (238791, 'UN_AVL'), (246659, 'AVL_TRAIN'), (249110, 'AVL_EVAL'), (252107, 'UN_AVL'), (261379, 'AVL_TRAIN'), (264886, 'AVL_EVAL'), (280379, 'UN_AVL'), (359372, 'AVL_TRAIN'), (360155, 'UN_AVL'), (377549, 'AVL_TRAIN'), (381983, 'AVL_EVAL'), (428246, 'AVL_TRAIN'), (431851, 'AVL_EVAL'), (432670, 'AVL_TRAIN'), (436094, 'AVL_EVAL'), (458635, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_52.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_52.json index 7521ae809..fe8a92395 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_52.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_52.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "14.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (6904, 'UN_AVL'), (39500, 'AVL_EVAL'), (39500, 'AVL_TRAIN'), (39645, 'AVL_EVAL'), (41348, 'AVL_TRAIN'), (42382, 'AVL_EVAL'), (47469, 'AVL_TRAIN'), (48348, 'AVL_EVAL'), (69109, 'AVL_TRAIN'), (69539, 'AVL_EVAL'), (88968, 'UN_AVL'), (125891, 'AVL_EVAL'), (125891, 'AVL_TRAIN'), (126158, 'AVL_EVAL'), (142259, 'AVL_TRAIN'), (142496, 'AVL_EVAL'), (153130, 'AVL_TRAIN'), (158358, 'AVL_EVAL'), (171476, 'UN_AVL'), (212272, 'AVL_TRAIN'), (214842, 'AVL_EVAL'), (225708, 'UN_AVL'), (231258, 'AVL_TRAIN'), (232781, 'AVL_EVAL'), (232784, 'AVL_TRAIN'), (236573, 'AVL_EVAL'), (236575, 'AVL_TRAIN'), (236584, 'AVL_EVAL'), (238842, 'AVL_TRAIN'), (243910, 'AVL_EVAL'), (256171, 'UN_AVL'), (298693, 'AVL_EVAL'), (298693, 'AVL_TRAIN'), (304769, 'AVL_EVAL'), (322875, 'AVL_TRAIN'), (323462, 'AVL_EVAL'), (323563, 'AVL_TRAIN'), (329094, 'AVL_EVAL'), (352807, 'UN_AVL'), (355630, 'AVL_TRAIN'), (360038, 'AVL_EVAL'), (386293, 'AVL_TRAIN'), (389065, 'AVL_EVAL'), (423082, 'UN_AVL'), (438883, 'AVL_TRAIN'), (472611, 'AVL_EVAL'), (472823, 'AVL_TRAIN'), (475963, 'AVL_EVAL'), (499849, 'UN_AVL'), (500014, 'AVL_TRAIN'), (502187, 'AVL_EVAL'), (502189, 'AVL_TRAIN'), (502978, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (129, 'UN_AVL'), (39500, 'AVL_EVAL'), (39500, 'AVL_TRAIN'), (39645, 'AVL_EVAL'), (41348, 'AVL_TRAIN'), (42382, 'AVL_EVAL'), (47469, 'AVL_TRAIN'), (48348, 'AVL_EVAL'), (69109, 'AVL_TRAIN'), (69539, 'AVL_EVAL'), (73250, 'UN_AVL'), (125891, 'AVL_EVAL'), (125891, 'AVL_TRAIN'), (126158, 'AVL_EVAL'), (141072, 'UN_AVL'), (142259, 'AVL_TRAIN'), (142496, 'AVL_EVAL'), (150215, 'UN_AVL'), (153130, 'AVL_TRAIN'), (158358, 'AVL_EVAL'), (162470, 'UN_AVL'), (212272, 'AVL_TRAIN'), (214842, 'UN_AVL'), (231258, 'AVL_TRAIN'), (232781, 'UN_AVL'), (232784, 'AVL_TRAIN'), (236573, 'AVL_EVAL'), (236575, 'AVL_TRAIN'), (236584, 'AVL_EVAL'), (238842, 'AVL_TRAIN'), (243910, 'AVL_EVAL'), (251139, 'UN_AVL'), (298693, 'AVL_EVAL'), (298693, 'AVL_TRAIN'), (304769, 'AVL_EVAL'), (318536, 'UN_AVL'), (322875, 'AVL_TRAIN'), (323462, 'UN_AVL'), (323563, 'AVL_TRAIN'), (329094, 'AVL_EVAL'), (339424, 'UN_AVL'), (355630, 'AVL_TRAIN'), (360038, 'AVL_EVAL'), (386293, 'AVL_TRAIN'), (389065, 'AVL_EVAL'), (416556, 'UN_AVL'), (438883, 'AVL_TRAIN'), (472611, 'AVL_EVAL'), (472823, 'AVL_TRAIN'), (475963, 'AVL_EVAL'), (492671, 'UN_AVL'), (500014, 'AVL_TRAIN'), (502187, 'AVL_EVAL'), (502189, 'AVL_TRAIN'), (502978, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_53.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_53.json index 6d46d5217..08be02d6c 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_53.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_53.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (5993, 'AVL_EVAL'), (40428, 'AVL_TRAIN'), (40791, 'AVL_EVAL'), (42512, 'AVL_TRAIN'), (45685, 'AVL_EVAL'), (89687, 'UN_AVL'), (128969, 'AVL_TRAIN'), (128970, 'UN_AVL'), (128980, 'AVL_TRAIN'), (132202, 'AVL_EVAL'), (170019, 'UN_AVL'), (170219, 'AVL_TRAIN'), (171710, 'AVL_EVAL'), (185010, 'UN_AVL'), (231040, 'AVL_TRAIN'), (250239, 'AVL_EVAL'), (250239, 'AVL_TRAIN'), (250249, 'AVL_EVAL'), (271861, 'UN_AVL'), (305189, 'AVL_TRAIN'), (310362, 'AVL_EVAL'), (310362, 'AVL_TRAIN'), (310365, 'AVL_EVAL'), (325579, 'UN_AVL'), (331152, 'AVL_TRAIN'), (335736, 'AVL_EVAL'), (356228, 'UN_AVL'), (387925, 'AVL_TRAIN'), (391048, 'AVL_EVAL'), (442293, 'UN_AVL'), (471896, 'AVL_TRAIN'), (474550, 'AVL_EVAL'), (517256, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (5993, 'AVL_EVAL'), (40397, 'UN_AVL'), (40428, 'AVL_TRAIN'), (40791, 'AVL_EVAL'), (42512, 'AVL_TRAIN'), (45685, 'AVL_EVAL'), (75909, 'UN_AVL'), (128969, 'AVL_TRAIN'), (128970, 'UN_AVL'), (128980, 'AVL_TRAIN'), (132202, 'AVL_EVAL'), (159350, 'UN_AVL'), (170219, 'AVL_TRAIN'), (171710, 'AVL_EVAL'), (178470, 'UN_AVL'), (231040, 'AVL_TRAIN'), (250239, 'AVL_EVAL'), (250239, 'AVL_TRAIN'), (250249, 'AVL_EVAL'), (270607, 'UN_AVL'), (305189, 'AVL_TRAIN'), (310362, 'AVL_EVAL'), (310362, 'AVL_TRAIN'), (310365, 'AVL_EVAL'), (320383, 'UN_AVL'), (331152, 'AVL_TRAIN'), (335736, 'AVL_EVAL'), (343018, 'UN_AVL'), (387925, 'AVL_TRAIN'), (391048, 'AVL_EVAL'), (426462, 'UN_AVL'), (471896, 'AVL_TRAIN'), (474550, 'AVL_EVAL'), (509686, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_54.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_54.json index dd5db66c2..e37889782 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_54.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_54.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "14.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (102464, 'AVL_EVAL'), (161123, 'UN_AVL'), (171080, 'AVL_TRAIN'), (171088, 'UN_AVL'), (171088, 'AVL_TRAIN'), (171098, 'UN_AVL'), (171123, 'AVL_TRAIN'), (173152, 'AVL_EVAL'), (183205, 'UN_AVL'), (183205, 'AVL_TRAIN'), (183264, 'UN_AVL'), (184085, 'AVL_TRAIN'), (185117, 'UN_AVL'), (187218, 'AVL_TRAIN'), (187219, 'UN_AVL'), (187220, 'AVL_TRAIN'), (187221, 'UN_AVL'), (187224, 'AVL_TRAIN'), (187812, 'UN_AVL'), (187812, 'AVL_TRAIN'), (189770, 'UN_AVL'), (189776, 'AVL_TRAIN'), (190415, 'UN_AVL'), (190554, 'AVL_TRAIN'), (190794, 'UN_AVL'), (191867, 'AVL_TRAIN'), (191962, 'UN_AVL'), (192840, 'AVL_TRAIN'), (217951, 'AVL_EVAL'), (223912, 'AVL_TRAIN'), (224426, 'AVL_EVAL'), (224447, 'AVL_TRAIN'), (224514, 'AVL_EVAL'), (319312, 'UN_AVL'), (351032, 'AVL_TRAIN'), (351484, 'UN_AVL'), (351903, 'AVL_TRAIN'), (352123, 'UN_AVL'), (352374, 'AVL_TRAIN'), (353704, 'AVL_EVAL'), (359274, 'UN_AVL'), (401067, 'AVL_TRAIN'), (402327, 'UN_AVL'), (434330, 'AVL_TRAIN'), (434610, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (102464, 'AVL_EVAL'), (142257, 'UN_AVL'), (171080, 'AVL_TRAIN'), (171088, 'UN_AVL'), (171088, 'AVL_TRAIN'), (171098, 'UN_AVL'), (171123, 'AVL_TRAIN'), (173152, 'UN_AVL'), (183205, 'AVL_TRAIN'), (183264, 'UN_AVL'), (184085, 'AVL_TRAIN'), (185117, 'UN_AVL'), (187218, 'AVL_TRAIN'), (187219, 'UN_AVL'), (187220, 'AVL_TRAIN'), (187221, 'UN_AVL'), (187224, 'AVL_TRAIN'), (187812, 'UN_AVL'), (187812, 'AVL_TRAIN'), (189770, 'UN_AVL'), (189776, 'AVL_TRAIN'), (190415, 'UN_AVL'), (190554, 'AVL_TRAIN'), (190794, 'UN_AVL'), (191867, 'AVL_TRAIN'), (191962, 'UN_AVL'), (192840, 'AVL_TRAIN'), (217951, 'AVL_EVAL'), (223912, 'AVL_TRAIN'), (224426, 'AVL_EVAL'), (224447, 'AVL_TRAIN'), (224514, 'AVL_EVAL'), (262522, 'UN_AVL'), (351032, 'AVL_TRAIN'), (351484, 'UN_AVL'), (351903, 'AVL_TRAIN'), (352123, 'UN_AVL'), (352374, 'AVL_TRAIN'), (353704, 'UN_AVL'), (401067, 'AVL_TRAIN'), (402327, 'UN_AVL'), (434330, 'AVL_TRAIN'), (434610, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_55.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_55.json index 740580dfb..f9e017e94 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_55.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_55.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "23.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_56.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_56.json index e6ef8cfd7..463b9084d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_56.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_56.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "30.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (3785, 'UN_AVL'), (67339, 'AVL_TRAIN'), (70372, 'UN_AVL'), (90088, 'AVL_TRAIN'), (105890, 'AVL_EVAL'), (211817, 'UN_AVL'), (323903, 'AVL_EVAL'), (352242, 'UN_AVL'), (380055, 'AVL_TRAIN'), (387626, 'AVL_EVAL'), (389230, 'AVL_TRAIN'), (395929, 'AVL_EVAL'), (432844, 'UN_AVL'), (468462, 'AVL_TRAIN'), (473226, 'AVL_EVAL'), (477430, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (67339, 'AVL_TRAIN'), (70372, 'UN_AVL'), (90088, 'AVL_TRAIN'), (105890, 'AVL_EVAL'), (163572, 'UN_AVL'), (323903, 'AVL_EVAL'), (334910, 'UN_AVL'), (380055, 'AVL_TRAIN'), (387626, 'AVL_EVAL'), (389230, 'AVL_TRAIN'), (395929, 'AVL_EVAL'), (425206, 'UN_AVL'), (468462, 'AVL_TRAIN'), (473226, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_57.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_57.json index 6283616d3..029dd0274 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_57.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_57.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (698, 'AVL_EVAL'), (67276, 'UN_AVL'), (68243, 'AVL_TRAIN'), (83793, 'AVL_EVAL'), (89025, 'AVL_TRAIN'), (89507, 'AVL_EVAL'), (91835, 'AVL_TRAIN'), (93790, 'AVL_EVAL'), (98474, 'AVL_TRAIN'), (108101, 'AVL_EVAL'), (151046, 'AVL_TRAIN'), (152849, 'AVL_EVAL'), (173965, 'UN_AVL'), (173996, 'AVL_TRAIN'), (180294, 'AVL_EVAL'), (180818, 'AVL_TRAIN'), (181988, 'AVL_EVAL'), (218151, 'AVL_TRAIN'), (218563, 'AVL_EVAL'), (231723, 'AVL_TRAIN'), (235443, 'AVL_EVAL'), (244565, 'UN_AVL'), (244565, 'AVL_TRAIN'), (311411, 'AVL_EVAL'), (313674, 'UN_AVL'), (320190, 'AVL_TRAIN'), (320595, 'UN_AVL'), (321584, 'AVL_EVAL'), (321584, 'AVL_TRAIN'), (329629, 'AVL_EVAL'), (335635, 'AVL_TRAIN'), (338546, 'AVL_EVAL'), (350094, 'AVL_TRAIN'), (353416, 'AVL_EVAL'), (399275, 'AVL_TRAIN'), (401693, 'AVL_EVAL'), (413938, 'AVL_TRAIN'), (415712, 'AVL_EVAL'), (418777, 'AVL_TRAIN'), (421213, 'AVL_EVAL'), (428651, 'AVL_TRAIN'), (429617, 'AVL_EVAL'), (434104, 'AVL_TRAIN'), (437429, 'AVL_EVAL'), (450017, 'UN_AVL'), (450724, 'AVL_TRAIN'), (456537, 'AVL_EVAL'), (477850, 'AVL_TRAIN'), (478799, 'AVL_EVAL'), (494683, 'UN_AVL'), (499000, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (698, 'AVL_EVAL'), (48463, 'UN_AVL'), (68243, 'AVL_TRAIN'), (83793, 'AVL_EVAL'), (89025, 'UN_AVL'), (89025, 'AVL_TRAIN'), (89507, 'AVL_EVAL'), (91835, 'UN_AVL'), (91835, 'AVL_TRAIN'), (93790, 'AVL_EVAL'), (98474, 'UN_AVL'), (98474, 'AVL_TRAIN'), (108101, 'AVL_EVAL'), (138889, 'UN_AVL'), (151046, 'AVL_TRAIN'), (152849, 'AVL_EVAL'), (169817, 'UN_AVL'), (173996, 'AVL_TRAIN'), (180294, 'AVL_EVAL'), (180818, 'UN_AVL'), (180818, 'AVL_TRAIN'), (181988, 'AVL_EVAL'), (190289, 'UN_AVL'), (218151, 'AVL_TRAIN'), (218563, 'UN_AVL'), (231723, 'AVL_TRAIN'), (235443, 'UN_AVL'), (244565, 'AVL_TRAIN'), (311411, 'UN_AVL'), (320190, 'AVL_TRAIN'), (320595, 'UN_AVL'), (321584, 'AVL_TRAIN'), (329629, 'AVL_EVAL'), (334654, 'UN_AVL'), (335635, 'AVL_TRAIN'), (338546, 'AVL_EVAL'), (342938, 'UN_AVL'), (350094, 'AVL_TRAIN'), (353416, 'AVL_EVAL'), (356712, 'UN_AVL'), (360380, 'AVL_EVAL'), (392017, 'UN_AVL'), (399275, 'AVL_TRAIN'), (401693, 'AVL_EVAL'), (407531, 'UN_AVL'), (413938, 'AVL_TRAIN'), (415712, 'AVL_EVAL'), (418777, 'UN_AVL'), (418777, 'AVL_TRAIN'), (421213, 'AVL_EVAL'), (428651, 'UN_AVL'), (428651, 'AVL_TRAIN'), (429617, 'UN_AVL'), (434104, 'AVL_TRAIN'), (437429, 'AVL_EVAL'), (443635, 'UN_AVL'), (450724, 'AVL_TRAIN'), (456537, 'AVL_EVAL'), (477484, 'UN_AVL'), (477850, 'AVL_TRAIN'), (478799, 'AVL_EVAL'), (481511, 'UN_AVL'), (499000, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_58.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_58.json index c9d20d557..b84eb602f 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_58.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_58.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "28.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (47141, 'AVL_EVAL'), (132219, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (47141, 'AVL_EVAL'), (79690, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_59.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_59.json index bccbfdebc..190f874db 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_59.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_59.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (2220, 'UN_AVL'), (69562, 'AVL_TRAIN'), (90340, 'AVL_EVAL'), (91380, 'AVL_TRAIN'), (91391, 'AVL_EVAL'), (91399, 'AVL_TRAIN'), (91459, 'AVL_EVAL'), (91470, 'AVL_TRAIN'), (92782, 'AVL_EVAL'), (167142, 'UN_AVL'), (187114, 'AVL_TRAIN'), (219297, 'AVL_EVAL'), (262348, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (69562, 'AVL_TRAIN'), (90340, 'AVL_EVAL'), (91380, 'AVL_TRAIN'), (91391, 'AVL_EVAL'), (91399, 'AVL_TRAIN'), (91459, 'AVL_EVAL'), (91470, 'AVL_TRAIN'), (92782, 'AVL_EVAL'), (104010, 'UN_AVL'), (187114, 'AVL_TRAIN'), (219297, 'AVL_EVAL'), (232976, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_6.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_6.json index 6eaa2a751..3a29f0bf3 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_6.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_6.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (7925, 'AVL_EVAL'), (48603, 'AVL_TRAIN'), (54224, 'AVL_EVAL'), (54225, 'AVL_TRAIN'), (58146, 'AVL_EVAL'), (67548, 'AVL_TRAIN'), (67560, 'AVL_EVAL'), (94384, 'UN_AVL'), (133631, 'AVL_EVAL'), (135976, 'AVL_TRAIN'), (136810, 'AVL_EVAL'), (145256, 'AVL_TRAIN'), (146614, 'AVL_EVAL'), (166844, 'UN_AVL'), (174078, 'AVL_TRAIN'), (181585, 'AVL_EVAL'), (225980, 'AVL_TRAIN'), (233310, 'AVL_EVAL'), (257948, 'UN_AVL'), (261738, 'AVL_TRAIN'), (264036, 'AVL_EVAL'), (267774, 'UN_AVL'), (271183, 'AVL_TRAIN'), (305145, 'AVL_EVAL'), (323824, 'UN_AVL'), (323881, 'AVL_TRAIN'), (327861, 'UN_AVL'), (328215, 'AVL_TRAIN'), (334742, 'AVL_EVAL'), (352603, 'UN_AVL'), (358996, 'AVL_TRAIN'), (391418, 'AVL_EVAL'), (438347, 'UN_AVL'), (478096, 'AVL_TRAIN'), (499631, 'AVL_EVAL'), (501624, 'AVL_TRAIN'), (501703, 'AVL_EVAL'), (526086, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (48603, 'AVL_TRAIN'), (54224, 'AVL_EVAL'), (54225, 'AVL_TRAIN'), (58146, 'AVL_EVAL'), (67548, 'AVL_TRAIN'), (67560, 'AVL_EVAL'), (85306, 'UN_AVL'), (133631, 'AVL_EVAL'), (135976, 'AVL_TRAIN'), (136810, 'AVL_EVAL'), (145256, 'AVL_TRAIN'), (146614, 'AVL_EVAL'), (157669, 'UN_AVL'), (174078, 'AVL_TRAIN'), (181585, 'AVL_EVAL'), (225980, 'AVL_TRAIN'), (233310, 'AVL_EVAL'), (243824, 'UN_AVL'), (261738, 'AVL_TRAIN'), (264036, 'UN_AVL'), (271183, 'AVL_TRAIN'), (305145, 'AVL_EVAL'), (314591, 'UN_AVL'), (323881, 'AVL_TRAIN'), (327861, 'UN_AVL'), (328215, 'AVL_TRAIN'), (334742, 'AVL_EVAL'), (339115, 'UN_AVL'), (358996, 'AVL_TRAIN'), (391418, 'AVL_EVAL'), (424954, 'UN_AVL'), (478096, 'AVL_TRAIN'), (499631, 'AVL_EVAL'), (501624, 'AVL_TRAIN'), (501703, 'AVL_EVAL'), (515373, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_60.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_60.json index fba7a14e3..b2e62b223 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_60.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_60.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "8.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (259119, 'AVL_EVAL'), (259173, 'AVL_TRAIN'), (259667, 'AVL_EVAL'), (259673, 'AVL_TRAIN'), (259814, 'AVL_EVAL'), (261917, 'UN_AVL'), (269920, 'AVL_EVAL'), (269920, 'AVL_TRAIN'), (270018, 'AVL_EVAL'), (270151, 'AVL_TRAIN'), (271975, 'AVL_EVAL'), (331190, 'AVL_TRAIN'), (332454, 'AVL_EVAL'), (338467, 'UN_AVL'), (346055, 'AVL_EVAL'), (389070, 'UN_AVL'), (389092, 'AVL_TRAIN'), (391514, 'AVL_EVAL'), (391535, 'AVL_TRAIN'), (392513, 'AVL_EVAL'), (392521, 'AVL_TRAIN'), (392522, 'AVL_EVAL'), (392523, 'AVL_TRAIN'), (392526, 'AVL_EVAL'), (392534, 'AVL_TRAIN'), (392815, 'AVL_EVAL'), (392815, 'AVL_TRAIN'), (396077, 'AVL_EVAL'), (396077, 'AVL_TRAIN'), (396086, 'AVL_EVAL'), (396097, 'AVL_TRAIN'), (397298, 'AVL_EVAL'), (400080, 'AVL_TRAIN'), (408261, 'AVL_EVAL'), (440671, 'AVL_TRAIN'), (477109, 'AVL_EVAL'), (480884, 'AVL_TRAIN'), (481211, 'AVL_EVAL'), (504485, 'UN_AVL'), (507706, 'AVL_TRAIN'), (508777, 'AVL_EVAL'), (509133, 'UN_AVL'), (510158, 'AVL_TRAIN'), (515831, 'AVL_EVAL'), (518294, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (259173, 'AVL_TRAIN'), (259667, 'UN_AVL'), (259673, 'AVL_TRAIN'), (259814, 'UN_AVL'), (269920, 'AVL_TRAIN'), (270018, 'UN_AVL'), (270151, 'AVL_TRAIN'), (271975, 'UN_AVL'), (301307, 'AVL_EVAL'), (315853, 'UN_AVL'), (331190, 'AVL_TRAIN'), (332454, 'AVL_EVAL'), (332831, 'UN_AVL'), (389092, 'AVL_TRAIN'), (391514, 'UN_AVL'), (391535, 'AVL_TRAIN'), (392513, 'UN_AVL'), (392521, 'AVL_TRAIN'), (392522, 'UN_AVL'), (392523, 'AVL_TRAIN'), (392526, 'UN_AVL'), (392534, 'AVL_TRAIN'), (392815, 'UN_AVL'), (392815, 'AVL_TRAIN'), (396077, 'AVL_EVAL'), (396077, 'AVL_TRAIN'), (396086, 'AVL_EVAL'), (396097, 'AVL_TRAIN'), (397298, 'AVL_EVAL'), (400080, 'AVL_TRAIN'), (408261, 'AVL_EVAL'), (438400, 'UN_AVL'), (440671, 'AVL_TRAIN'), (477109, 'AVL_EVAL'), (480884, 'AVL_TRAIN'), (481211, 'AVL_EVAL'), (494682, 'UN_AVL'), (507706, 'AVL_TRAIN'), (508777, 'UN_AVL'), (510158, 'AVL_TRAIN'), (515831, 'AVL_EVAL'), (518294, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_61.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_61.json index 444edff27..d5751e488 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_61.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_61.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8428, 'AVL_EVAL'), (14636, 'UN_AVL'), (44110, 'AVL_TRAIN'), (74890, 'AVL_EVAL'), (99609, 'UN_AVL'), (130209, 'AVL_TRAIN'), (150745, 'AVL_EVAL'), (171567, 'AVL_TRAIN'), (179526, 'AVL_EVAL'), (238010, 'UN_AVL'), (238084, 'AVL_TRAIN'), (247807, 'AVL_EVAL'), (260861, 'UN_AVL'), (260872, 'AVL_TRAIN'), (263929, 'AVL_EVAL'), (271408, 'UN_AVL'), (278939, 'AVL_TRAIN'), (302514, 'AVL_EVAL'), (341053, 'UN_AVL'), (343667, 'AVL_TRAIN'), (348698, 'AVL_EVAL'), (360108, 'UN_AVL'), (422460, 'AVL_TRAIN'), (425905, 'AVL_EVAL'), (429944, 'UN_AVL'), (430637, 'AVL_TRAIN'), (438845, 'AVL_EVAL'), (516838, 'UN_AVL'), (516993, 'AVL_TRAIN'), (525518, 'AVL_EVAL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (44110, 'AVL_TRAIN'), (74890, 'AVL_EVAL'), (84531, 'UN_AVL'), (130209, 'AVL_TRAIN'), (150745, 'AVL_EVAL'), (170002, 'UN_AVL'), (171567, 'AVL_TRAIN'), (179526, 'AVL_EVAL'), (238010, 'UN_AVL'), (238084, 'AVL_TRAIN'), (247807, 'AVL_EVAL'), (256476, 'UN_AVL'), (260872, 'AVL_TRAIN'), (263929, 'UN_AVL'), (278939, 'AVL_TRAIN'), (302514, 'AVL_EVAL'), (317463, 'UN_AVL'), (343667, 'AVL_TRAIN'), (348698, 'AVL_EVAL'), (352330, 'UN_AVL'), (422460, 'AVL_TRAIN'), (425905, 'UN_AVL'), (430637, 'AVL_TRAIN'), (438845, 'AVL_EVAL'), (476624, 'UN_AVL'), (516993, 'AVL_TRAIN'), (525518, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_62.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_62.json index 5e8b93794..4c16057ea 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_62.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_62.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (64608, 'AVL_TRAIN'), (65265, 'UN_AVL'), (65381, 'AVL_TRAIN'), (71302, 'AVL_EVAL'), (77062, 'UN_AVL'), (101055, 'AVL_TRAIN'), (107529, 'AVL_EVAL'), (160131, 'UN_AVL'), (191772, 'AVL_TRAIN'), (196433, 'UN_AVL'), (256070, 'AVL_EVAL'), (304632, 'UN_AVL'), (346433, 'AVL_TRAIN'), (357461, 'AVL_EVAL'), (457959, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (64608, 'AVL_TRAIN'), (65265, 'UN_AVL'), (65381, 'AVL_TRAIN'), (71302, 'UN_AVL'), (101055, 'AVL_TRAIN'), (107529, 'UN_AVL'), (191772, 'AVL_TRAIN'), (196433, 'UN_AVL'), (256070, 'AVL_EVAL'), (288421, 'UN_AVL'), (346433, 'AVL_TRAIN'), (357461, 'AVL_EVAL'), (409957, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_63.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_63.json index f3a221150..386fd6541 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_63.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_63.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "20.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (318747, 'AVL_EVAL'), (323496, 'AVL_TRAIN'), (323512, 'AVL_EVAL'), (323513, 'AVL_TRAIN'), (323547, 'AVL_EVAL'), (323548, 'AVL_TRAIN'), (323549, 'AVL_EVAL'), (325422, 'UN_AVL'), (325422, 'AVL_TRAIN'), (326320, 'AVL_EVAL'), (336906, 'UN_AVL'), (336927, 'AVL_TRAIN'), (344882, 'AVL_EVAL'), (390350, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (323496, 'AVL_TRAIN'), (323512, 'UN_AVL'), (323513, 'AVL_TRAIN'), (323547, 'UN_AVL'), (323548, 'AVL_TRAIN'), (323549, 'UN_AVL'), (325422, 'AVL_TRAIN'), (326320, 'UN_AVL'), (336927, 'AVL_TRAIN'), (344882, 'AVL_EVAL'), (390350, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_64.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_64.json index 5285657d8..a7342f207 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_64.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_64.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (10471, 'UN_AVL'), (23052, 'AVL_TRAIN'), (51943, 'AVL_EVAL'), (120242, 'AVL_TRAIN'), (133849, 'AVL_EVAL'), (169594, 'AVL_TRAIN'), (171629, 'AVL_EVAL'), (176696, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (23052, 'AVL_TRAIN'), (51943, 'AVL_EVAL'), (120242, 'AVL_TRAIN'), (133849, 'AVL_EVAL'), (169504, 'UN_AVL'), (169594, 'AVL_TRAIN'), (171629, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_65.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_65.json index d5e2b3d3f..b658d1e4d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_65.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_65.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "22.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (428306, 'AVL_EVAL'), (428306, 'AVL_TRAIN'), (429653, 'AVL_EVAL'), (429658, 'AVL_TRAIN'), (430720, 'AVL_EVAL'), (439306, 'UN_AVL'), (443756, 'AVL_TRAIN'), (444692, 'UN_AVL'), (447878, 'AVL_TRAIN'), (453699, 'AVL_EVAL'), (476126, 'AVL_TRAIN'), (476127, 'AVL_EVAL'), (493367, 'AVL_TRAIN'), (495809, 'AVL_EVAL'), (502089, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (428306, 'AVL_TRAIN'), (429653, 'AVL_EVAL'), (429658, 'AVL_TRAIN'), (430720, 'AVL_EVAL'), (435744, 'UN_AVL'), (443756, 'AVL_TRAIN'), (444692, 'UN_AVL'), (447878, 'AVL_TRAIN'), (453699, 'UN_AVL'), (476126, 'AVL_EVAL'), (476126, 'AVL_TRAIN'), (476127, 'AVL_EVAL'), (493367, 'AVL_TRAIN'), (495809, 'AVL_EVAL'), (499378, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_66.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_66.json index 00bf9a55b..3414f0ce5 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_66.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_66.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (16765, 'AVL_EVAL'), (47208, 'AVL_TRAIN'), (47896, 'AVL_EVAL'), (76014, 'UN_AVL'), (84359, 'AVL_TRAIN'), (87358, 'AVL_EVAL'), (88215, 'UN_AVL'), (91883, 'AVL_TRAIN'), (93296, 'UN_AVL'), (95358, 'AVL_TRAIN'), (96408, 'UN_AVL'), (100482, 'AVL_TRAIN'), (111737, 'AVL_EVAL'), (150816, 'UN_AVL'), (165998, 'AVL_TRAIN'), (171119, 'AVL_EVAL'), (175408, 'UN_AVL'), (180986, 'AVL_TRAIN'), (205860, 'AVL_EVAL'), (249271, 'UN_AVL'), (258099, 'AVL_TRAIN'), (259682, 'UN_AVL'), (263505, 'AVL_TRAIN'), (269843, 'AVL_EVAL'), (303758, 'UN_AVL'), (304527, 'AVL_TRAIN'), (306677, 'AVL_EVAL'), (314606, 'AVL_TRAIN'), (318211, 'AVL_EVAL'), (336844, 'UN_AVL'), (343077, 'AVL_TRAIN'), (343679, 'AVL_EVAL'), (349113, 'UN_AVL'), (356360, 'AVL_TRAIN'), (367219, 'AVL_EVAL'), (424702, 'UN_AVL'), (430075, 'AVL_TRAIN'), (434814, 'AVL_EVAL'), (439745, 'UN_AVL'), (447624, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (16765, 'AVL_EVAL'), (17381, 'UN_AVL'), (47208, 'AVL_EVAL'), (47208, 'AVL_TRAIN'), (47896, 'AVL_EVAL'), (61763, 'UN_AVL'), (84359, 'AVL_TRAIN'), (87358, 'UN_AVL'), (91883, 'AVL_TRAIN'), (93296, 'UN_AVL'), (95358, 'AVL_TRAIN'), (96408, 'UN_AVL'), (100482, 'AVL_TRAIN'), (111737, 'AVL_EVAL'), (143480, 'UN_AVL'), (165998, 'AVL_TRAIN'), (171119, 'UN_AVL'), (180986, 'AVL_TRAIN'), (205860, 'AVL_EVAL'), (223719, 'UN_AVL'), (258099, 'AVL_TRAIN'), (259682, 'UN_AVL'), (263505, 'AVL_TRAIN'), (269843, 'AVL_EVAL'), (274785, 'UN_AVL'), (304527, 'AVL_TRAIN'), (306677, 'UN_AVL'), (314606, 'AVL_TRAIN'), (318211, 'AVL_EVAL'), (328132, 'UN_AVL'), (343077, 'AVL_TRAIN'), (343679, 'UN_AVL'), (356360, 'AVL_TRAIN'), (367219, 'AVL_EVAL'), (395344, 'UN_AVL'), (430075, 'AVL_TRAIN'), (434814, 'AVL_EVAL'), (437842, 'UN_AVL'), (447624, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_67.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_67.json index dc903b3e2..868ff4120 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_67.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_67.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "6.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (4013, 'AVL_TRAIN'), (5104, 'UN_AVL'), (5723, 'AVL_TRAIN'), (5807, 'UN_AVL'), (7487, 'AVL_TRAIN'), (9968, 'AVL_EVAL'), (20492, 'AVL_TRAIN'), (22239, 'UN_AVL'), (22631, 'AVL_TRAIN'), (72271, 'AVL_EVAL'), (101911, 'UN_AVL'), (111604, 'AVL_TRAIN'), (113749, 'UN_AVL'), (113758, 'AVL_TRAIN'), (117848, 'UN_AVL'), (117851, 'AVL_TRAIN'), (158356, 'AVL_EVAL'), (160205, 'AVL_TRAIN'), (162268, 'AVL_EVAL'), (177370, 'UN_AVL'), (183858, 'AVL_TRAIN'), (184766, 'UN_AVL'), (184769, 'AVL_TRAIN'), (186214, 'AVL_EVAL'), (187039, 'UN_AVL'), (189238, 'AVL_TRAIN'), (192613, 'AVL_EVAL'), (197063, 'AVL_TRAIN'), (198268, 'AVL_EVAL'), (201303, 'UN_AVL'), (206189, 'AVL_TRAIN'), (238836, 'AVL_EVAL'), (241143, 'AVL_TRAIN'), (243318, 'AVL_EVAL'), (254294, 'UN_AVL'), (256535, 'AVL_TRAIN'), (259280, 'UN_AVL'), (262951, 'AVL_TRAIN'), (263998, 'UN_AVL'), (266091, 'AVL_TRAIN'), (277913, 'AVL_EVAL'), (284879, 'AVL_TRAIN'), (288294, 'AVL_EVAL'), (288297, 'AVL_TRAIN'), (290100, 'AVL_EVAL'), (293517, 'UN_AVL'), (293517, 'AVL_TRAIN'), (318946, 'AVL_EVAL'), (337649, 'UN_AVL'), (337649, 'AVL_TRAIN'), (337965, 'UN_AVL'), (337975, 'AVL_TRAIN'), (341323, 'UN_AVL'), (342510, 'AVL_TRAIN'), (346132, 'UN_AVL'), (347152, 'AVL_TRAIN'), (351287, 'UN_AVL'), (351303, 'AVL_TRAIN'), (352911, 'UN_AVL'), (353131, 'AVL_TRAIN'), (356647, 'UN_AVL'), (356653, 'AVL_TRAIN'), (362516, 'AVL_EVAL'), (362543, 'AVL_TRAIN'), (362816, 'AVL_EVAL'), (362819, 'AVL_TRAIN'), (362820, 'AVL_EVAL'), (362823, 'AVL_TRAIN'), (368122, 'AVL_EVAL'), (373477, 'AVL_TRAIN'), (395762, 'AVL_EVAL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (4013, 'AVL_TRAIN'), (5104, 'UN_AVL'), (5723, 'AVL_TRAIN'), (5807, 'UN_AVL'), (7487, 'AVL_TRAIN'), (9968, 'AVL_EVAL'), (11468, 'UN_AVL'), (20492, 'AVL_TRAIN'), (22239, 'UN_AVL'), (22631, 'AVL_TRAIN'), (72271, 'AVL_EVAL'), (92112, 'UN_AVL'), (111604, 'AVL_TRAIN'), (113749, 'UN_AVL'), (113758, 'AVL_TRAIN'), (117848, 'UN_AVL'), (117851, 'AVL_TRAIN'), (158356, 'AVL_EVAL'), (160205, 'AVL_TRAIN'), (162268, 'AVL_EVAL'), (168105, 'UN_AVL'), (183858, 'AVL_TRAIN'), (184766, 'UN_AVL'), (184769, 'AVL_TRAIN'), (186214, 'UN_AVL'), (189238, 'AVL_TRAIN'), (192613, 'UN_AVL'), (197063, 'AVL_TRAIN'), (198268, 'UN_AVL'), (206189, 'AVL_TRAIN'), (238836, 'AVL_EVAL'), (241143, 'AVL_TRAIN'), (243318, 'AVL_EVAL'), (254294, 'UN_AVL'), (256535, 'AVL_TRAIN'), (259280, 'UN_AVL'), (262951, 'AVL_TRAIN'), (263998, 'UN_AVL'), (266091, 'AVL_TRAIN'), (277913, 'AVL_EVAL'), (284879, 'UN_AVL'), (284879, 'AVL_TRAIN'), (288294, 'UN_AVL'), (288297, 'AVL_TRAIN'), (290100, 'UN_AVL'), (293517, 'AVL_TRAIN'), (318946, 'AVL_EVAL'), (327810, 'UN_AVL'), (337649, 'AVL_TRAIN'), (337965, 'UN_AVL'), (337975, 'AVL_TRAIN'), (341323, 'UN_AVL'), (342510, 'AVL_TRAIN'), (346132, 'UN_AVL'), (347152, 'AVL_TRAIN'), (351287, 'UN_AVL'), (351303, 'AVL_TRAIN'), (352911, 'UN_AVL'), (353131, 'AVL_TRAIN'), (356647, 'UN_AVL'), (356653, 'AVL_TRAIN'), (362516, 'AVL_EVAL'), (362543, 'UN_AVL'), (362543, 'AVL_TRAIN'), (362816, 'AVL_EVAL'), (362819, 'AVL_TRAIN'), (362820, 'AVL_EVAL'), (362823, 'AVL_TRAIN'), (368122, 'AVL_EVAL'), (373477, 'UN_AVL'), (373477, 'AVL_TRAIN'), (395762, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_68.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_68.json index 8e1e6706a..167b0ffd4 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_68.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_68.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (8324, 'UN_AVL'), (21894, 'AVL_TRAIN'), (42509, 'AVL_EVAL'), (48632, 'AVL_TRAIN'), (50270, 'AVL_EVAL'), (65294, 'AVL_TRAIN'), (76731, 'AVL_EVAL'), (88487, 'AVL_TRAIN'), (88674, 'AVL_EVAL'), (104987, 'UN_AVL'), (104987, 'AVL_TRAIN'), (130211, 'AVL_EVAL'), (144687, 'AVL_TRAIN'), (146531, 'AVL_EVAL'), (152860, 'AVL_TRAIN'), (162597, 'AVL_EVAL'), (173341, 'AVL_TRAIN'), (174345, 'AVL_EVAL'), (190546, 'UN_AVL'), (195126, 'AVL_TRAIN'), (220867, 'AVL_EVAL'), (229221, 'AVL_TRAIN'), (229687, 'AVL_EVAL'), (231157, 'AVL_TRAIN'), (231671, 'AVL_EVAL'), (238845, 'AVL_TRAIN'), (246010, 'AVL_EVAL'), (278697, 'UN_AVL'), (289934, 'AVL_TRAIN'), (310496, 'AVL_EVAL'), (322083, 'AVL_TRAIN'), (325394, 'AVL_EVAL'), (341657, 'UN_AVL'), (343598, 'AVL_EVAL'), (343598, 'AVL_TRAIN'), (344391, 'AVL_EVAL'), (346628, 'AVL_TRAIN'), (349480, 'AVL_EVAL'), (349614, 'AVL_TRAIN'), (350054, 'AVL_EVAL'), (350065, 'AVL_TRAIN'), (352077, 'AVL_EVAL'), (361502, 'AVL_TRAIN'), (392817, 'AVL_EVAL'), (399308, 'AVL_TRAIN'), (401414, 'AVL_EVAL'), (411647, 'AVL_TRAIN'), (417380, 'AVL_EVAL'), (434242, 'AVL_TRAIN'), (435532, 'AVL_EVAL'), (442494, 'UN_AVL'), (458572, 'AVL_TRAIN'), (479526, 'AVL_EVAL'), (489227, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (21894, 'AVL_TRAIN'), (42509, 'AVL_EVAL'), (48632, 'AVL_TRAIN'), (50270, 'AVL_EVAL'), (65294, 'UN_AVL'), (65294, 'AVL_TRAIN'), (76731, 'AVL_EVAL'), (88487, 'AVL_TRAIN'), (88674, 'AVL_EVAL'), (91345, 'UN_AVL'), (104987, 'AVL_TRAIN'), (130211, 'AVL_EVAL'), (144687, 'AVL_TRAIN'), (146531, 'AVL_EVAL'), (152854, 'UN_AVL'), (152860, 'AVL_TRAIN'), (162597, 'AVL_EVAL'), (173159, 'UN_AVL'), (173341, 'AVL_TRAIN'), (174345, 'AVL_EVAL'), (179411, 'UN_AVL'), (195126, 'AVL_TRAIN'), (220867, 'AVL_EVAL'), (229221, 'AVL_TRAIN'), (229687, 'AVL_EVAL'), (231157, 'AVL_TRAIN'), (231671, 'AVL_EVAL'), (238845, 'AVL_TRAIN'), (246010, 'AVL_EVAL'), (268026, 'UN_AVL'), (289934, 'AVL_TRAIN'), (310496, 'AVL_EVAL'), (321092, 'UN_AVL'), (322083, 'AVL_TRAIN'), (325394, 'AVL_EVAL'), (334442, 'UN_AVL'), (343598, 'AVL_TRAIN'), (344391, 'UN_AVL'), (346628, 'AVL_TRAIN'), (349480, 'UN_AVL'), (349614, 'AVL_TRAIN'), (350054, 'AVL_EVAL'), (350065, 'AVL_TRAIN'), (352077, 'AVL_EVAL'), (356608, 'UN_AVL'), (361502, 'AVL_TRAIN'), (392817, 'AVL_EVAL'), (399308, 'AVL_TRAIN'), (401414, 'AVL_EVAL'), (411647, 'UN_AVL'), (411647, 'AVL_TRAIN'), (417380, 'AVL_EVAL'), (430468, 'UN_AVL'), (434242, 'AVL_TRAIN'), (435532, 'UN_AVL'), (458572, 'AVL_TRAIN'), (479526, 'AVL_EVAL'), (489227, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_69.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_69.json index 8dac74e50..07b99339c 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_69.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_69.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2433, 'AVL_TRAIN'), (2538, 'UN_AVL'), (2541, 'AVL_TRAIN'), (3397, 'UN_AVL'), (5926, 'AVL_TRAIN'), (5933, 'UN_AVL'), (6238, 'AVL_TRAIN'), (6242, 'UN_AVL'), (7160, 'AVL_TRAIN'), (7292, 'UN_AVL'), (7293, 'AVL_TRAIN'), (7756, 'UN_AVL'), (7913, 'AVL_TRAIN'), (7963, 'UN_AVL'), (15887, 'AVL_TRAIN'), (16051, 'UN_AVL'), (16051, 'AVL_TRAIN'), (16339, 'UN_AVL'), (16355, 'AVL_TRAIN'), (18494, 'UN_AVL'), (23642, 'AVL_TRAIN'), (25060, 'UN_AVL'), (57809, 'AVL_TRAIN'), (69589, 'AVL_EVAL'), (84170, 'UN_AVL'), (94728, 'AVL_TRAIN'), (94852, 'UN_AVL'), (94858, 'AVL_TRAIN'), (95138, 'UN_AVL'), (95140, 'AVL_TRAIN'), (95142, 'UN_AVL'), (95143, 'AVL_TRAIN'), (95586, 'UN_AVL'), (95708, 'AVL_TRAIN'), (95996, 'UN_AVL'), (102176, 'AVL_TRAIN'), (105610, 'UN_AVL'), (105611, 'AVL_TRAIN'), (105621, 'UN_AVL'), (154917, 'AVL_TRAIN'), (155584, 'UN_AVL'), (155657, 'AVL_TRAIN'), (155862, 'UN_AVL'), (155867, 'AVL_TRAIN'), (155964, 'UN_AVL'), (156143, 'AVL_TRAIN'), (156325, 'UN_AVL'), (156416, 'AVL_TRAIN'), (156573, 'UN_AVL'), (164806, 'AVL_TRAIN'), (164827, 'UN_AVL'), (164830, 'AVL_TRAIN'), (165126, 'UN_AVL'), (165128, 'AVL_TRAIN'), (165415, 'UN_AVL'), (165552, 'AVL_TRAIN'), (165777, 'UN_AVL'), (165789, 'AVL_TRAIN'), (165884, 'UN_AVL'), (165910, 'AVL_TRAIN'), (165952, 'UN_AVL'), (166130, 'AVL_TRAIN'), (174707, 'AVL_EVAL'), (193044, 'UN_AVL'), (239865, 'AVL_TRAIN'), (242180, 'UN_AVL'), (272160, 'AVL_EVAL'), (284271, 'UN_AVL'), (328468, 'AVL_TRAIN'), (328723, 'UN_AVL'), (337958, 'AVL_TRAIN'), (340778, 'AVL_EVAL'), (355205, 'UN_AVL'), (415141, 'AVL_TRAIN'), (415221, 'UN_AVL'), (415614, 'AVL_TRAIN'), (415748, 'UN_AVL'), (415751, 'AVL_TRAIN'), (416712, 'UN_AVL'), (416716, 'AVL_TRAIN'), (416788, 'UN_AVL'), (416844, 'AVL_TRAIN'), (417041, 'UN_AVL'), (424386, 'AVL_TRAIN'), (424447, 'UN_AVL'), (424714, 'AVL_TRAIN'), (424813, 'UN_AVL'), (424816, 'AVL_TRAIN'), (424820, 'UN_AVL'), (424826, 'AVL_TRAIN'), (427995, 'AVL_EVAL'), (433723, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2433, 'AVL_TRAIN'), (2538, 'UN_AVL'), (2541, 'AVL_TRAIN'), (3397, 'UN_AVL'), (5926, 'AVL_TRAIN'), (5933, 'UN_AVL'), (6238, 'AVL_TRAIN'), (6242, 'UN_AVL'), (7160, 'AVL_TRAIN'), (7292, 'UN_AVL'), (7293, 'AVL_TRAIN'), (7756, 'UN_AVL'), (7913, 'AVL_TRAIN'), (7963, 'UN_AVL'), (15887, 'AVL_TRAIN'), (16051, 'UN_AVL'), (16051, 'AVL_TRAIN'), (16339, 'UN_AVL'), (16355, 'AVL_TRAIN'), (18494, 'UN_AVL'), (23642, 'AVL_TRAIN'), (25060, 'UN_AVL'), (57809, 'AVL_TRAIN'), (69589, 'AVL_EVAL'), (79218, 'UN_AVL'), (94728, 'AVL_TRAIN'), (94852, 'UN_AVL'), (94858, 'AVL_TRAIN'), (95138, 'UN_AVL'), (95140, 'AVL_TRAIN'), (95142, 'UN_AVL'), (95143, 'AVL_TRAIN'), (95586, 'UN_AVL'), (95708, 'AVL_TRAIN'), (95996, 'UN_AVL'), (102176, 'AVL_TRAIN'), (105610, 'UN_AVL'), (105611, 'AVL_TRAIN'), (105621, 'UN_AVL'), (154917, 'AVL_TRAIN'), (155584, 'UN_AVL'), (155657, 'AVL_TRAIN'), (155862, 'UN_AVL'), (155867, 'AVL_TRAIN'), (155964, 'UN_AVL'), (156143, 'AVL_TRAIN'), (156325, 'UN_AVL'), (156416, 'AVL_TRAIN'), (156573, 'UN_AVL'), (164806, 'AVL_TRAIN'), (164827, 'UN_AVL'), (164830, 'AVL_TRAIN'), (165126, 'UN_AVL'), (165128, 'AVL_TRAIN'), (165415, 'UN_AVL'), (165552, 'AVL_TRAIN'), (165777, 'UN_AVL'), (165789, 'AVL_TRAIN'), (165884, 'UN_AVL'), (165910, 'AVL_TRAIN'), (165952, 'UN_AVL'), (166130, 'AVL_TRAIN'), (174707, 'AVL_EVAL'), (183920, 'UN_AVL'), (239865, 'AVL_TRAIN'), (242180, 'UN_AVL'), (272160, 'AVL_EVAL'), (276427, 'UN_AVL'), (328468, 'AVL_TRAIN'), (328723, 'UN_AVL'), (337958, 'AVL_TRAIN'), (340778, 'UN_AVL'), (415141, 'AVL_TRAIN'), (415221, 'UN_AVL'), (415614, 'AVL_TRAIN'), (415748, 'UN_AVL'), (415751, 'AVL_TRAIN'), (416712, 'UN_AVL'), (416716, 'AVL_TRAIN'), (416788, 'UN_AVL'), (416844, 'AVL_TRAIN'), (417041, 'UN_AVL'), (424386, 'AVL_TRAIN'), (424447, 'UN_AVL'), (424714, 'AVL_TRAIN'), (424813, 'UN_AVL'), (424816, 'AVL_TRAIN'), (424820, 'UN_AVL'), (424826, 'AVL_TRAIN'), (427995, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_7.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_7.json index 1deee8f37..b90839f5a 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_7.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_7.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "17.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9017, 'AVL_TRAIN'), (10786, 'UN_AVL'), (14358, 'AVL_TRAIN'), (14497, 'UN_AVL'), (15051, 'AVL_TRAIN'), (39546, 'AVL_EVAL'), (69769, 'UN_AVL'), (71100, 'AVL_TRAIN'), (71461, 'UN_AVL'), (72508, 'AVL_TRAIN'), (73125, 'UN_AVL'), (89429, 'AVL_TRAIN'), (92099, 'UN_AVL'), (104353, 'AVL_TRAIN'), (108068, 'AVL_EVAL'), (110934, 'AVL_TRAIN'), (126070, 'AVL_EVAL'), (156083, 'UN_AVL'), (157916, 'AVL_TRAIN'), (158306, 'UN_AVL'), (159842, 'AVL_TRAIN'), (168937, 'AVL_EVAL'), (174212, 'AVL_TRAIN'), (175455, 'AVL_EVAL'), (191782, 'UN_AVL'), (191803, 'AVL_TRAIN'), (196476, 'AVL_EVAL'), (234781, 'UN_AVL'), (234850, 'AVL_TRAIN'), (235355, 'AVL_EVAL'), (236615, 'UN_AVL'), (240706, 'AVL_TRAIN'), (247139, 'UN_AVL'), (252813, 'AVL_TRAIN'), (257593, 'AVL_EVAL'), (262126, 'AVL_TRAIN'), (264532, 'AVL_EVAL'), (277505, 'UN_AVL'), (282323, 'AVL_TRAIN'), (303309, 'AVL_EVAL'), (303309, 'AVL_TRAIN'), (303679, 'AVL_EVAL'), (319626, 'UN_AVL'), (324393, 'AVL_TRAIN'), (328549, 'AVL_EVAL'), (340152, 'UN_AVL'), (340177, 'AVL_TRAIN'), (343587, 'AVL_EVAL'), (358011, 'UN_AVL'), (361739, 'AVL_TRAIN'), (387847, 'AVL_EVAL'), (408233, 'UN_AVL'), (420993, 'AVL_TRAIN'), (425646, 'AVL_EVAL'), (433732, 'AVL_TRAIN'), (439485, 'AVL_EVAL'), (449059, 'AVL_TRAIN'), (471126, 'AVL_EVAL'), (494215, 'UN_AVL'), (504610, 'AVL_TRAIN'), (507970, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9017, 'AVL_TRAIN'), (10786, 'UN_AVL'), (14358, 'AVL_TRAIN'), (14497, 'UN_AVL'), (15051, 'AVL_TRAIN'), (39546, 'AVL_EVAL'), (57605, 'UN_AVL'), (71100, 'AVL_TRAIN'), (71461, 'UN_AVL'), (72508, 'AVL_TRAIN'), (73125, 'UN_AVL'), (89429, 'AVL_TRAIN'), (92099, 'UN_AVL'), (104353, 'AVL_TRAIN'), (108068, 'UN_AVL'), (110934, 'AVL_TRAIN'), (126070, 'AVL_EVAL'), (150170, 'UN_AVL'), (157916, 'AVL_TRAIN'), (158306, 'UN_AVL'), (159842, 'AVL_TRAIN'), (168937, 'AVL_EVAL'), (174212, 'AVL_TRAIN'), (175455, 'AVL_EVAL'), (183939, 'UN_AVL'), (191803, 'AVL_TRAIN'), (196476, 'AVL_EVAL'), (228881, 'UN_AVL'), (234850, 'AVL_TRAIN'), (235355, 'UN_AVL'), (240706, 'AVL_TRAIN'), (247139, 'UN_AVL'), (252813, 'AVL_TRAIN'), (257593, 'UN_AVL'), (262126, 'AVL_TRAIN'), (264532, 'AVL_EVAL'), (269381, 'UN_AVL'), (282323, 'AVL_TRAIN'), (303309, 'AVL_EVAL'), (303309, 'AVL_TRAIN'), (303679, 'AVL_EVAL'), (315824, 'UN_AVL'), (324393, 'AVL_TRAIN'), (328549, 'AVL_EVAL'), (336794, 'UN_AVL'), (340177, 'AVL_TRAIN'), (343587, 'AVL_EVAL'), (350824, 'UN_AVL'), (361739, 'AVL_TRAIN'), (387847, 'AVL_EVAL'), (399094, 'UN_AVL'), (420993, 'AVL_TRAIN'), (425646, 'AVL_EVAL'), (432953, 'UN_AVL'), (433732, 'AVL_TRAIN'), (439485, 'AVL_EVAL'), (448499, 'UN_AVL'), (449059, 'AVL_TRAIN'), (471126, 'AVL_EVAL'), (482557, 'UN_AVL'), (504610, 'AVL_TRAIN'), (507970, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_70.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_70.json index 16ec2c802..e8ccec533 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_70.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_70.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (50410, 'AVL_EVAL'), (50410, 'AVL_TRAIN'), (66090, 'AVL_EVAL'), (75333, 'UN_AVL'), (75388, 'AVL_TRAIN'), (81658, 'AVL_EVAL'), (84310, 'UN_AVL'), (84310, 'AVL_TRAIN'), (85129, 'UN_AVL'), (85141, 'AVL_TRAIN'), (85143, 'UN_AVL'), (85153, 'AVL_TRAIN'), (86021, 'UN_AVL'), (86262, 'AVL_TRAIN'), (86321, 'UN_AVL'), (86326, 'AVL_TRAIN'), (92968, 'AVL_EVAL'), (93245, 'AVL_TRAIN'), (118423, 'AVL_EVAL'), (132245, 'UN_AVL'), (132245, 'AVL_TRAIN'), (132270, 'UN_AVL'), (132285, 'AVL_TRAIN'), (132337, 'UN_AVL'), (132343, 'AVL_TRAIN'), (136804, 'AVL_EVAL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (50410, 'AVL_EVAL'), (50410, 'AVL_TRAIN'), (66090, 'AVL_EVAL'), (71354, 'UN_AVL'), (75388, 'AVL_TRAIN'), (81658, 'UN_AVL'), (84310, 'AVL_TRAIN'), (85129, 'UN_AVL'), (85141, 'AVL_TRAIN'), (85143, 'UN_AVL'), (85153, 'AVL_TRAIN'), (86021, 'UN_AVL'), (86262, 'AVL_TRAIN'), (86321, 'UN_AVL'), (86326, 'AVL_TRAIN'), (92968, 'AVL_EVAL'), (93245, 'AVL_TRAIN'), (118423, 'AVL_EVAL'), (132245, 'UN_AVL'), (132245, 'AVL_TRAIN'), (132270, 'UN_AVL'), (132285, 'AVL_TRAIN'), (132337, 'UN_AVL'), (132343, 'AVL_TRAIN'), (136804, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_71.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_71.json index c110ed296..e004cdaf6 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_71.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_71.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (308981, 'AVL_EVAL'), (399404, 'UN_AVL'), (442854, 'AVL_TRAIN'), (467716, 'AVL_EVAL'), (481639, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (308981, 'AVL_EVAL'), (346959, 'UN_AVL'), (442854, 'AVL_TRAIN'), (467716, 'AVL_EVAL'), (481639, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_72.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_72.json index b0924bb06..e69ff0d30 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_72.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_72.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "21.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8561, 'AVL_TRAIN'), (19861, 'AVL_EVAL'), (78969, 'UN_AVL'), (80833, 'AVL_TRAIN'), (81031, 'UN_AVL'), (81742, 'AVL_TRAIN'), (82207, 'UN_AVL'), (82250, 'AVL_TRAIN'), (82275, 'UN_AVL'), (82373, 'AVL_TRAIN'), (82425, 'UN_AVL'), (83496, 'AVL_TRAIN'), (89906, 'AVL_EVAL'), (103880, 'UN_AVL'), (107486, 'AVL_TRAIN'), (110334, 'UN_AVL'), (110376, 'AVL_TRAIN'), (117548, 'AVL_EVAL'), (127100, 'AVL_TRAIN'), (127895, 'AVL_EVAL'), (133304, 'AVL_TRAIN'), (134016, 'AVL_EVAL'), (183763, 'UN_AVL'), (192155, 'AVL_TRAIN'), (192454, 'UN_AVL'), (192509, 'AVL_TRAIN'), (199060, 'UN_AVL'), (199073, 'AVL_TRAIN'), (199291, 'AVL_EVAL'), (199791, 'AVL_TRAIN'), (208187, 'AVL_EVAL'), (217367, 'AVL_TRAIN'), (217824, 'AVL_EVAL'), (258342, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8561, 'AVL_TRAIN'), (19861, 'AVL_EVAL'), (45570, 'UN_AVL'), (80833, 'AVL_TRAIN'), (81031, 'UN_AVL'), (81742, 'AVL_TRAIN'), (82207, 'UN_AVL'), (82250, 'AVL_TRAIN'), (82275, 'UN_AVL'), (82373, 'AVL_TRAIN'), (82425, 'UN_AVL'), (83496, 'AVL_TRAIN'), (89906, 'AVL_EVAL'), (95456, 'UN_AVL'), (107486, 'AVL_TRAIN'), (110334, 'UN_AVL'), (110376, 'AVL_TRAIN'), (117548, 'AVL_EVAL'), (127100, 'AVL_TRAIN'), (127895, 'AVL_EVAL'), (133304, 'AVL_TRAIN'), (134016, 'AVL_EVAL'), (161131, 'UN_AVL'), (192155, 'AVL_TRAIN'), (192454, 'UN_AVL'), (192509, 'AVL_TRAIN'), (199060, 'UN_AVL'), (199073, 'AVL_TRAIN'), (199291, 'UN_AVL'), (199791, 'AVL_TRAIN'), (208187, 'AVL_EVAL'), (217367, 'AVL_TRAIN'), (217824, 'AVL_EVAL'), (253918, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_73.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_73.json index a280cc542..d519c4392 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_73.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_73.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "2.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (4259, 'AVL_EVAL'), (7182, 'UN_AVL'), (12991, 'AVL_TRAIN'), (14582, 'UN_AVL'), (14638, 'AVL_TRAIN'), (16153, 'UN_AVL'), (56977, 'AVL_EVAL'), (70494, 'UN_AVL'), (74795, 'AVL_TRAIN'), (79828, 'AVL_EVAL'), (82304, 'UN_AVL'), (93453, 'AVL_TRAIN'), (99718, 'AVL_EVAL'), (100804, 'AVL_TRAIN'), (104214, 'AVL_EVAL'), (138593, 'UN_AVL'), (146604, 'AVL_TRAIN'), (147102, 'UN_AVL'), (147150, 'AVL_TRAIN'), (147482, 'UN_AVL'), (148633, 'AVL_TRAIN'), (149522, 'UN_AVL'), (149524, 'AVL_TRAIN'), (149563, 'UN_AVL'), (149586, 'AVL_TRAIN'), (149589, 'UN_AVL'), (149593, 'AVL_TRAIN'), (149595, 'UN_AVL'), (149609, 'AVL_TRAIN'), (149612, 'UN_AVL'), (149618, 'AVL_TRAIN'), (149636, 'UN_AVL'), (149640, 'AVL_TRAIN'), (150675, 'UN_AVL'), (153678, 'AVL_TRAIN'), (154402, 'UN_AVL'), (154404, 'AVL_TRAIN'), (154409, 'UN_AVL'), (154412, 'AVL_TRAIN'), (154998, 'UN_AVL'), (155000, 'AVL_TRAIN'), (156289, 'UN_AVL'), (173844, 'AVL_TRAIN'), (175880, 'UN_AVL'), (176942, 'AVL_TRAIN'), (181304, 'AVL_EVAL'), (195443, 'UN_AVL'), (218330, 'AVL_TRAIN'), (218713, 'UN_AVL'), (218721, 'AVL_TRAIN'), (221952, 'UN_AVL'), (224086, 'AVL_TRAIN'), (224766, 'UN_AVL'), (224774, 'AVL_TRAIN'), (225304, 'UN_AVL'), (226234, 'AVL_TRAIN'), (229219, 'AVL_EVAL'), (229230, 'AVL_TRAIN'), (231075, 'AVL_EVAL'), (245140, 'UN_AVL'), (261751, 'AVL_TRAIN'), (262150, 'UN_AVL'), (262239, 'AVL_TRAIN'), (262346, 'UN_AVL'), (263991, 'AVL_TRAIN'), (268340, 'AVL_EVAL'), (275106, 'UN_AVL'), (276707, 'AVL_TRAIN'), (279266, 'AVL_EVAL'), (285905, 'UN_AVL'), (311931, 'AVL_TRAIN'), (317297, 'AVL_EVAL'), (341684, 'UN_AVL'), (342808, 'AVL_TRAIN'), (349589, 'AVL_EVAL'), (349600, 'AVL_TRAIN'), (349675, 'AVL_EVAL'), (349683, 'AVL_TRAIN'), (353205, 'AVL_EVAL'), (389852, 'AVL_TRAIN'), (392766, 'AVL_EVAL'), (408842, 'AVL_TRAIN'), (408872, 'AVL_EVAL'), (427944, 'UN_AVL'), (441856, 'AVL_TRAIN'), (441858, 'UN_AVL'), (441858, 'AVL_TRAIN'), (441859, 'UN_AVL'), (441863, 'AVL_TRAIN'), (441865, 'UN_AVL'), (441882, 'AVL_TRAIN'), (442310, 'UN_AVL'), (442490, 'AVL_TRAIN'), (443176, 'UN_AVL'), (443240, 'AVL_TRAIN'), (446981, 'AVL_EVAL'), (447654, 'AVL_TRAIN'), (448492, 'AVL_EVAL'), (449952, 'AVL_TRAIN'), (463961, 'AVL_EVAL'), (520659, 'AVL_TRAIN'), (521981, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12991, 'AVL_TRAIN'), (14582, 'UN_AVL'), (14638, 'AVL_TRAIN'), (16153, 'UN_AVL'), (56977, 'AVL_EVAL'), (64176, 'UN_AVL'), (74795, 'AVL_TRAIN'), (79828, 'UN_AVL'), (93453, 'AVL_TRAIN'), (99718, 'AVL_EVAL'), (100804, 'AVL_TRAIN'), (104214, 'AVL_EVAL'), (135459, 'UN_AVL'), (146604, 'AVL_TRAIN'), (147102, 'UN_AVL'), (147150, 'AVL_TRAIN'), (147482, 'UN_AVL'), (148633, 'AVL_TRAIN'), (149522, 'UN_AVL'), (149524, 'AVL_TRAIN'), (149563, 'UN_AVL'), (149586, 'AVL_TRAIN'), (149589, 'UN_AVL'), (149593, 'AVL_TRAIN'), (149595, 'UN_AVL'), (149609, 'AVL_TRAIN'), (149612, 'UN_AVL'), (149618, 'AVL_TRAIN'), (149636, 'UN_AVL'), (149640, 'AVL_TRAIN'), (150675, 'UN_AVL'), (153678, 'AVL_TRAIN'), (154402, 'UN_AVL'), (154404, 'AVL_TRAIN'), (154409, 'UN_AVL'), (154412, 'AVL_TRAIN'), (154998, 'UN_AVL'), (155000, 'AVL_TRAIN'), (156289, 'UN_AVL'), (173844, 'AVL_TRAIN'), (175880, 'UN_AVL'), (176942, 'AVL_TRAIN'), (181304, 'AVL_EVAL'), (182835, 'UN_AVL'), (218330, 'AVL_TRAIN'), (218713, 'UN_AVL'), (218721, 'AVL_TRAIN'), (221952, 'UN_AVL'), (224086, 'AVL_TRAIN'), (224766, 'UN_AVL'), (224774, 'AVL_TRAIN'), (225304, 'UN_AVL'), (226234, 'AVL_TRAIN'), (229219, 'UN_AVL'), (229230, 'AVL_TRAIN'), (231075, 'AVL_EVAL'), (234865, 'UN_AVL'), (261751, 'AVL_TRAIN'), (262150, 'UN_AVL'), (262239, 'AVL_TRAIN'), (262346, 'UN_AVL'), (263991, 'AVL_TRAIN'), (268340, 'UN_AVL'), (276707, 'AVL_TRAIN'), (279266, 'UN_AVL'), (311931, 'AVL_TRAIN'), (317297, 'UN_AVL'), (342808, 'AVL_TRAIN'), (349589, 'AVL_EVAL'), (349600, 'AVL_TRAIN'), (349675, 'AVL_EVAL'), (349683, 'AVL_TRAIN'), (353205, 'AVL_EVAL'), (357050, 'UN_AVL'), (389852, 'AVL_TRAIN'), (392766, 'AVL_EVAL'), (408536, 'UN_AVL'), (408842, 'AVL_TRAIN'), (408872, 'UN_AVL'), (441856, 'AVL_TRAIN'), (441858, 'UN_AVL'), (441858, 'AVL_TRAIN'), (441859, 'UN_AVL'), (441863, 'AVL_TRAIN'), (441865, 'UN_AVL'), (441882, 'AVL_TRAIN'), (442310, 'UN_AVL'), (442490, 'AVL_TRAIN'), (443176, 'UN_AVL'), (443240, 'AVL_TRAIN'), (446981, 'UN_AVL'), (447654, 'AVL_TRAIN'), (448492, 'AVL_EVAL'), (449952, 'AVL_TRAIN'), (463961, 'AVL_EVAL'), (498184, 'UN_AVL'), (520659, 'AVL_TRAIN'), (521981, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_74.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_74.json index 3fd8a3af6..d66b7c01d 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_74.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_74.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "28.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12226, 'AVL_EVAL'), (12226, 'AVL_TRAIN'), (40734, 'AVL_EVAL'), (100070, 'AVL_TRAIN'), (100072, 'AVL_EVAL'), (100075, 'AVL_TRAIN'), (171837, 'AVL_EVAL'), (185258, 'UN_AVL'), (188439, 'AVL_TRAIN'), (191808, 'AVL_EVAL'), (191809, 'AVL_TRAIN'), (219987, 'AVL_EVAL'), (246401, 'UN_AVL'), (275732, 'AVL_TRAIN'), (304045, 'AVL_EVAL'), (304047, 'AVL_TRAIN'), (304070, 'AVL_EVAL'), (346345, 'UN_AVL'), (357902, 'AVL_TRAIN'), (386596, 'AVL_EVAL'), (443162, 'AVL_TRAIN'), (443338, 'AVL_EVAL'), (443339, 'AVL_TRAIN'), (472050, 'AVL_EVAL'), (518392, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12226, 'AVL_TRAIN'), (40734, 'AVL_EVAL'), (100070, 'AVL_TRAIN'), (100072, 'AVL_EVAL'), (100075, 'AVL_TRAIN'), (171837, 'UN_AVL'), (188439, 'AVL_TRAIN'), (191808, 'UN_AVL'), (191809, 'AVL_TRAIN'), (219987, 'AVL_EVAL'), (241972, 'UN_AVL'), (275732, 'AVL_TRAIN'), (304045, 'AVL_EVAL'), (304047, 'AVL_TRAIN'), (304070, 'AVL_EVAL'), (323691, 'UN_AVL'), (357902, 'AVL_TRAIN'), (386596, 'AVL_EVAL'), (440756, 'UN_AVL'), (443162, 'AVL_TRAIN'), (443338, 'UN_AVL'), (443339, 'AVL_TRAIN'), (472050, 'AVL_EVAL'), (518392, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_75.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_75.json index 404a2c7c0..5c0f76e65 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_75.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_75.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (439413, 'AVL_EVAL'), (473162, 'AVL_TRAIN'), (473897, 'AVL_EVAL'), (489337, 'AVL_TRAIN'), (496640, 'AVL_EVAL'), (519819, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (439413, 'AVL_EVAL'), (473162, 'AVL_TRAIN'), (473897, 'AVL_EVAL'), (489337, 'AVL_TRAIN'), (496640, 'AVL_EVAL'), (519819, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_76.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_76.json index 0e9d22b34..a39ca1afe 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_76.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_76.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (11928, 'AVL_EVAL'), (12117, 'AVL_TRAIN'), (41441, 'AVL_EVAL'), (42805, 'AVL_TRAIN'), (69465, 'AVL_EVAL'), (88191, 'AVL_TRAIN'), (88496, 'AVL_EVAL'), (97203, 'UN_AVL'), (98146, 'AVL_TRAIN'), (150871, 'AVL_EVAL'), (154175, 'AVL_TRAIN'), (158699, 'AVL_EVAL'), (165936, 'AVL_TRAIN'), (166258, 'AVL_EVAL'), (170503, 'AVL_TRAIN'), (175275, 'AVL_EVAL'), (175281, 'AVL_TRAIN'), (176749, 'AVL_EVAL'), (176753, 'AVL_TRAIN'), (177234, 'AVL_EVAL'), (177238, 'AVL_TRAIN'), (177263, 'AVL_EVAL'), (178999, 'AVL_TRAIN'), (179547, 'AVL_EVAL'), (179551, 'AVL_TRAIN'), (179583, 'AVL_EVAL'), (181830, 'AVL_TRAIN'), (182652, 'AVL_EVAL'), (185216, 'AVL_TRAIN'), (215337, 'AVL_EVAL'), (217255, 'AVL_TRAIN'), (238225, 'AVL_EVAL'), (238473, 'AVL_TRAIN'), (242686, 'AVL_EVAL'), (248275, 'AVL_TRAIN'), (251347, 'AVL_EVAL'), (251349, 'AVL_TRAIN'), (251828, 'AVL_EVAL'), (252137, 'AVL_TRAIN'), (253481, 'AVL_EVAL'), (255751, 'AVL_TRAIN'), (256163, 'AVL_EVAL'), (256416, 'AVL_TRAIN'), (258992, 'AVL_EVAL'), (265627, 'AVL_TRAIN'), (266353, 'AVL_EVAL'), (266949, 'AVL_TRAIN'), (267908, 'AVL_EVAL'), (274429, 'AVL_TRAIN'), (300850, 'AVL_EVAL'), (305134, 'AVL_TRAIN'), (310740, 'AVL_EVAL'), (344505, 'AVL_TRAIN'), (347376, 'AVL_EVAL'), (347382, 'AVL_TRAIN'), (347468, 'AVL_EVAL'), (388096, 'AVL_TRAIN'), (414073, 'AVL_EVAL'), (414075, 'AVL_TRAIN'), (414736, 'AVL_EVAL'), (414739, 'AVL_TRAIN'), (414782, 'AVL_EVAL'), (414785, 'AVL_TRAIN'), (414888, 'AVL_EVAL'), (419963, 'AVL_TRAIN'), (434318, 'AVL_EVAL'), (434321, 'AVL_TRAIN'), (434509, 'AVL_EVAL'), (434512, 'AVL_TRAIN'), (434854, 'AVL_EVAL'), (438318, 'AVL_TRAIN'), (438966, 'AVL_EVAL'), (444248, 'AVL_TRAIN'), (502393, 'AVL_EVAL'), (506396, 'AVL_TRAIN'), (508566, 'AVL_EVAL'), (508589, 'AVL_TRAIN'), (509615, 'AVL_EVAL'), (509621, 'AVL_TRAIN'), (509809, 'AVL_EVAL'), (512986, 'AVL_TRAIN'), (512998, 'AVL_EVAL'), (513000, 'AVL_TRAIN'), (521212, 'UN_AVL'), (521212, 'AVL_TRAIN'), (521222, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (12117, 'AVL_TRAIN'), (41441, 'AVL_EVAL'), (42805, 'AVL_TRAIN'), (69465, 'AVL_EVAL'), (81989, 'UN_AVL'), (88191, 'AVL_TRAIN'), (88496, 'UN_AVL'), (98146, 'AVL_TRAIN'), (150871, 'AVL_EVAL'), (154098, 'UN_AVL'), (154175, 'AVL_TRAIN'), (158699, 'AVL_EVAL'), (162731, 'UN_AVL'), (165936, 'AVL_TRAIN'), (166258, 'UN_AVL'), (170503, 'AVL_TRAIN'), (175275, 'AVL_EVAL'), (175281, 'AVL_TRAIN'), (176749, 'AVL_EVAL'), (176753, 'AVL_TRAIN'), (177234, 'AVL_EVAL'), (177238, 'AVL_TRAIN'), (177263, 'AVL_EVAL'), (178999, 'AVL_TRAIN'), (179547, 'AVL_EVAL'), (179551, 'AVL_TRAIN'), (179583, 'AVL_EVAL'), (181827, 'UN_AVL'), (181830, 'AVL_TRAIN'), (182652, 'AVL_EVAL'), (185203, 'UN_AVL'), (185216, 'AVL_TRAIN'), (215337, 'AVL_EVAL'), (217255, 'AVL_TRAIN'), (238225, 'AVL_EVAL'), (238473, 'AVL_TRAIN'), (242686, 'AVL_EVAL'), (246197, 'UN_AVL'), (248275, 'AVL_TRAIN'), (251347, 'AVL_EVAL'), (251349, 'AVL_TRAIN'), (251828, 'AVL_EVAL'), (252137, 'AVL_TRAIN'), (253481, 'AVL_EVAL'), (255751, 'AVL_TRAIN'), (256163, 'AVL_EVAL'), (256416, 'AVL_TRAIN'), (258992, 'AVL_EVAL'), (262011, 'UN_AVL'), (265627, 'AVL_TRAIN'), (266353, 'UN_AVL'), (266949, 'AVL_TRAIN'), (267908, 'UN_AVL'), (274429, 'AVL_TRAIN'), (300850, 'AVL_EVAL'), (305126, 'UN_AVL'), (305134, 'AVL_TRAIN'), (310740, 'AVL_EVAL'), (315383, 'UN_AVL'), (344505, 'AVL_EVAL'), (344505, 'AVL_TRAIN'), (347376, 'AVL_EVAL'), (347382, 'AVL_TRAIN'), (347468, 'AVL_EVAL'), (355069, 'UN_AVL'), (388096, 'AVL_TRAIN'), (414073, 'AVL_EVAL'), (414075, 'AVL_TRAIN'), (414736, 'AVL_EVAL'), (414739, 'AVL_TRAIN'), (414782, 'AVL_EVAL'), (414785, 'AVL_TRAIN'), (414888, 'AVL_EVAL'), (419745, 'UN_AVL'), (419963, 'AVL_TRAIN'), (434318, 'AVL_EVAL'), (434321, 'AVL_TRAIN'), (434509, 'AVL_EVAL'), (434512, 'AVL_TRAIN'), (434854, 'AVL_EVAL'), (438302, 'UN_AVL'), (438318, 'AVL_TRAIN'), (438966, 'AVL_EVAL'), (439347, 'UN_AVL'), (444248, 'AVL_TRAIN'), (502393, 'AVL_EVAL'), (505943, 'UN_AVL'), (506396, 'AVL_TRAIN'), (508566, 'AVL_EVAL'), (508589, 'AVL_TRAIN'), (509615, 'AVL_EVAL'), (509621, 'AVL_TRAIN'), (509809, 'AVL_EVAL'), (512986, 'AVL_TRAIN'), (512998, 'AVL_EVAL'), (513000, 'AVL_TRAIN'), (521212, 'UN_AVL'), (521212, 'AVL_TRAIN'), (521222, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_77.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_77.json index a1c28fe14..edfb488ca 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_77.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_77.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "22.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2036, 'AVL_EVAL'), (11139, 'UN_AVL'), (76199, 'AVL_EVAL'), (184202, 'UN_AVL'), (240470, 'AVL_EVAL'), (253362, 'UN_AVL'), (263719, 'AVL_EVAL'), (271057, 'UN_AVL'), (303005, 'AVL_EVAL'), (331814, 'UN_AVL'), (331883, 'AVL_TRAIN'), (334717, 'AVL_EVAL'), (341421, 'AVL_TRAIN'), (346928, 'AVL_EVAL'), (433575, 'AVL_TRAIN'), (436123, 'AVL_EVAL'), (442725, 'UN_AVL'), (507619, 'AVL_EVAL'), (515613, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (2036, 'AVL_EVAL'), (11139, 'UN_AVL'), (76199, 'AVL_EVAL'), (82710, 'UN_AVL'), (240470, 'AVL_EVAL'), (249573, 'UN_AVL'), (331883, 'AVL_TRAIN'), (334717, 'UN_AVL'), (341421, 'AVL_TRAIN'), (346928, 'AVL_EVAL'), (356280, 'UN_AVL'), (386159, 'AVL_EVAL'), (427611, 'UN_AVL'), (433575, 'AVL_TRAIN'), (436123, 'AVL_EVAL'), (441450, 'UN_AVL'), (507619, 'AVL_EVAL'), (515613, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_78.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_78.json index 148840d4d..7d0e77e82 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_78.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_78.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "16.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (44164, 'AVL_EVAL'), (90304, 'UN_AVL'), (96884, 'AVL_TRAIN'), (98971, 'UN_AVL'), (130475, 'AVL_EVAL'), (167431, 'UN_AVL'), (167431, 'AVL_TRAIN'), (167678, 'UN_AVL'), (171239, 'AVL_TRAIN'), (171400, 'UN_AVL'), (172833, 'AVL_TRAIN'), (173172, 'UN_AVL'), (173201, 'AVL_TRAIN'), (174950, 'UN_AVL'), (175335, 'AVL_TRAIN'), (175629, 'UN_AVL'), (176175, 'AVL_TRAIN'), (176523, 'UN_AVL'), (176668, 'AVL_TRAIN'), (178740, 'UN_AVL'), (184069, 'AVL_TRAIN'), (184413, 'UN_AVL'), (185527, 'AVL_TRAIN'), (186520, 'UN_AVL'), (187596, 'AVL_TRAIN'), (220551, 'AVL_EVAL'), (233370, 'UN_AVL'), (244488, 'AVL_TRAIN'), (248833, 'UN_AVL'), (251473, 'AVL_TRAIN'), (253971, 'UN_AVL'), (256740, 'AVL_TRAIN'), (258141, 'UN_AVL'), (259484, 'AVL_TRAIN'), (261278, 'UN_AVL'), (263356, 'AVL_TRAIN'), (263359, 'UN_AVL'), (263359, 'AVL_TRAIN'), (265718, 'UN_AVL'), (267168, 'AVL_TRAIN'), (267756, 'UN_AVL'), (269384, 'AVL_TRAIN'), (271163, 'UN_AVL'), (273601, 'AVL_TRAIN'), (274511, 'UN_AVL'), (274705, 'AVL_TRAIN'), (275534, 'UN_AVL'), (276357, 'AVL_TRAIN'), (276795, 'UN_AVL'), (278899, 'AVL_TRAIN'), (311690, 'AVL_EVAL'), (325339, 'UN_AVL'), (339269, 'AVL_TRAIN'), (340790, 'UN_AVL'), (342223, 'AVL_TRAIN'), (342943, 'UN_AVL'), (342948, 'AVL_TRAIN'), (342966, 'UN_AVL'), (344336, 'AVL_TRAIN'), (347885, 'UN_AVL'), (353922, 'AVL_TRAIN'), (356245, 'UN_AVL'), (359401, 'AVL_TRAIN'), (359402, 'UN_AVL'), (391875, 'AVL_TRAIN'), (421919, 'AVL_EVAL'), (441449, 'UN_AVL'), (476153, 'AVL_EVAL'), (523897, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (44164, 'AVL_EVAL'), (86440, 'UN_AVL'), (96884, 'AVL_TRAIN'), (98971, 'UN_AVL'), (130475, 'AVL_EVAL'), (167431, 'UN_AVL'), (167431, 'AVL_TRAIN'), (167678, 'UN_AVL'), (171239, 'AVL_TRAIN'), (171400, 'UN_AVL'), (172833, 'AVL_TRAIN'), (173172, 'UN_AVL'), (173201, 'AVL_TRAIN'), (174950, 'UN_AVL'), (175335, 'AVL_TRAIN'), (175629, 'UN_AVL'), (176175, 'AVL_TRAIN'), (176523, 'UN_AVL'), (176668, 'AVL_TRAIN'), (178740, 'UN_AVL'), (184069, 'AVL_TRAIN'), (184413, 'UN_AVL'), (185527, 'AVL_TRAIN'), (186520, 'UN_AVL'), (187596, 'AVL_TRAIN'), (220551, 'AVL_EVAL'), (229650, 'UN_AVL'), (244488, 'AVL_TRAIN'), (248833, 'UN_AVL'), (251473, 'AVL_TRAIN'), (253971, 'UN_AVL'), (256740, 'AVL_TRAIN'), (258141, 'UN_AVL'), (259484, 'AVL_TRAIN'), (261278, 'UN_AVL'), (263356, 'AVL_TRAIN'), (263359, 'UN_AVL'), (263359, 'AVL_TRAIN'), (265718, 'UN_AVL'), (267168, 'AVL_TRAIN'), (267756, 'UN_AVL'), (269384, 'AVL_TRAIN'), (271163, 'UN_AVL'), (273601, 'AVL_TRAIN'), (274511, 'UN_AVL'), (274705, 'AVL_TRAIN'), (275534, 'UN_AVL'), (276357, 'AVL_TRAIN'), (276795, 'UN_AVL'), (278899, 'AVL_TRAIN'), (311690, 'AVL_EVAL'), (321563, 'UN_AVL'), (339269, 'AVL_TRAIN'), (340790, 'UN_AVL'), (342223, 'AVL_TRAIN'), (342943, 'UN_AVL'), (342948, 'AVL_TRAIN'), (342966, 'UN_AVL'), (344336, 'AVL_TRAIN'), (347885, 'UN_AVL'), (353922, 'AVL_TRAIN'), (356245, 'UN_AVL'), (359401, 'AVL_TRAIN'), (359402, 'UN_AVL'), (391875, 'AVL_TRAIN'), (421919, 'AVL_EVAL'), (432049, 'UN_AVL'), (476153, 'AVL_EVAL'), (517876, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_79.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_79.json index fce2758cd..1b9c796e9 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_79.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_79.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4067, 'UN_AVL'), (4094, 'AVL_TRAIN'), (4102, 'UN_AVL'), (4106, 'AVL_TRAIN'), (4107, 'UN_AVL'), (4116, 'AVL_TRAIN'), (6690, 'UN_AVL'), (6692, 'AVL_TRAIN'), (9136, 'AVL_EVAL'), (9243, 'AVL_TRAIN'), (10252, 'AVL_EVAL'), (15088, 'UN_AVL'), (15088, 'AVL_TRAIN'), (39618, 'AVL_EVAL'), (75536, 'UN_AVL'), (81416, 'AVL_TRAIN'), (85218, 'UN_AVL'), (93701, 'AVL_TRAIN'), (93702, 'UN_AVL'), (93706, 'AVL_TRAIN'), (93712, 'UN_AVL'), (93712, 'AVL_TRAIN'), (93714, 'UN_AVL'), (93735, 'AVL_TRAIN'), (99705, 'AVL_EVAL'), (132400, 'UN_AVL'), (158713, 'AVL_TRAIN'), (158755, 'UN_AVL'), (159663, 'AVL_TRAIN'), (159664, 'UN_AVL'), (159976, 'AVL_TRAIN'), (164180, 'UN_AVL'), (172831, 'AVL_TRAIN'), (172832, 'UN_AVL'), (172840, 'AVL_TRAIN'), (193260, 'AVL_EVAL'), (251459, 'UN_AVL'), (251465, 'AVL_TRAIN'), (253057, 'UN_AVL'), (276460, 'AVL_EVAL'), (283530, 'UN_AVL'), (283984, 'AVL_TRAIN'), (309208, 'AVL_EVAL'), (354820, 'UN_AVL'), (362391, 'AVL_TRAIN'), (362392, 'UN_AVL'), (362393, 'AVL_TRAIN'), (362402, 'UN_AVL'), (362402, 'AVL_TRAIN'), (362411, 'UN_AVL'), (362537, 'AVL_TRAIN'), (385238, 'AVL_EVAL'), (385239, 'AVL_TRAIN'), (385253, 'AVL_EVAL'), (385255, 'AVL_TRAIN'), (385256, 'AVL_EVAL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4067, 'UN_AVL'), (4094, 'AVL_TRAIN'), (4102, 'UN_AVL'), (4106, 'AVL_TRAIN'), (4107, 'UN_AVL'), (4116, 'AVL_TRAIN'), (6690, 'UN_AVL'), (6692, 'AVL_TRAIN'), (9136, 'UN_AVL'), (9243, 'AVL_TRAIN'), (10252, 'UN_AVL'), (15088, 'AVL_TRAIN'), (39618, 'AVL_EVAL'), (70350, 'UN_AVL'), (81416, 'AVL_TRAIN'), (85218, 'UN_AVL'), (93701, 'AVL_TRAIN'), (93702, 'UN_AVL'), (93706, 'AVL_TRAIN'), (93712, 'UN_AVL'), (93712, 'AVL_TRAIN'), (93714, 'UN_AVL'), (93735, 'AVL_TRAIN'), (99705, 'UN_AVL'), (158713, 'AVL_TRAIN'), (158755, 'UN_AVL'), (159663, 'AVL_TRAIN'), (159664, 'UN_AVL'), (159976, 'AVL_TRAIN'), (164180, 'UN_AVL'), (172831, 'AVL_TRAIN'), (172832, 'UN_AVL'), (172840, 'AVL_TRAIN'), (193260, 'AVL_EVAL'), (234920, 'UN_AVL'), (251465, 'AVL_TRAIN'), (253057, 'UN_AVL'), (283984, 'AVL_TRAIN'), (309208, 'AVL_EVAL'), (317305, 'UN_AVL'), (342149, 'AVL_EVAL'), (351248, 'UN_AVL'), (362391, 'AVL_TRAIN'), (362392, 'UN_AVL'), (362393, 'AVL_TRAIN'), (362402, 'UN_AVL'), (362402, 'AVL_TRAIN'), (362411, 'UN_AVL'), (362537, 'AVL_TRAIN'), (385238, 'AVL_EVAL'), (385239, 'AVL_TRAIN'), (385253, 'AVL_EVAL'), (385255, 'AVL_TRAIN'), (385256, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_8.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_8.json index 7c0e60d41..69d33d990 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_8.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_8.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "13.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (14086, 'AVL_TRAIN'), (28419, 'AVL_EVAL'), (49568, 'UN_AVL'), (49629, 'AVL_TRAIN'), (52427, 'AVL_EVAL'), (58025, 'UN_AVL'), (65611, 'AVL_TRAIN'), (79774, 'AVL_EVAL'), (83313, 'AVL_TRAIN'), (83321, 'AVL_EVAL'), (83728, 'AVL_TRAIN'), (95271, 'AVL_EVAL'), (101892, 'UN_AVL'), (101892, 'AVL_TRAIN'), (102501, 'AVL_EVAL'), (102502, 'AVL_TRAIN'), (102508, 'AVL_EVAL'), (102509, 'AVL_TRAIN'), (104568, 'AVL_EVAL'), (104594, 'AVL_TRAIN'), (107007, 'AVL_EVAL'), (109057, 'UN_AVL'), (123025, 'AVL_TRAIN'), (126123, 'UN_AVL'), (126151, 'AVL_TRAIN'), (131671, 'UN_AVL'), (133452, 'AVL_TRAIN'), (135169, 'UN_AVL'), (135207, 'AVL_TRAIN'), (136408, 'UN_AVL'), (136411, 'AVL_TRAIN'), (140912, 'AVL_EVAL'), (144921, 'UN_AVL'), (147794, 'AVL_TRAIN'), (158779, 'AVL_EVAL'), (159844, 'AVL_TRAIN'), (161852, 'AVL_EVAL'), (162997, 'AVL_TRAIN'), (169542, 'AVL_EVAL'), (169594, 'AVL_TRAIN'), (173040, 'AVL_EVAL'), (179280, 'UN_AVL'), (179280, 'AVL_TRAIN'), (184709, 'AVL_EVAL'), (184733, 'AVL_TRAIN'), (217008, 'AVL_EVAL'), (236496, 'UN_AVL'), (238720, 'AVL_TRAIN'), (241991, 'UN_AVL'), (242634, 'AVL_TRAIN'), (242664, 'UN_AVL'), (242677, 'AVL_TRAIN'), (247133, 'UN_AVL'), (247136, 'AVL_TRAIN'), (253504, 'UN_AVL'), (254584, 'AVL_TRAIN'), (255913, 'UN_AVL'), (257698, 'AVL_TRAIN'), (258798, 'UN_AVL'), (258815, 'AVL_TRAIN'), (261988, 'UN_AVL'), (264706, 'AVL_TRAIN'), (274712, 'UN_AVL'), (276514, 'AVL_TRAIN'), (289007, 'AVL_EVAL'), (292214, 'AVL_TRAIN'), (305171, 'AVL_EVAL'), (308278, 'AVL_TRAIN'), (311550, 'AVL_EVAL'), (324195, 'AVL_TRAIN'), (327485, 'AVL_EVAL'), (329116, 'AVL_TRAIN'), (333701, 'AVL_EVAL'), (362870, 'UN_AVL'), (366125, 'AVL_TRAIN'), (389628, 'AVL_EVAL'), (410139, 'UN_AVL'), (416056, 'AVL_TRAIN'), (422013, 'UN_AVL'), (424168, 'AVL_TRAIN'), (427102, 'UN_AVL'), (430244, 'AVL_TRAIN'), (437747, 'AVL_EVAL'), (441228, 'UN_AVL'), (445549, 'AVL_TRAIN'), (476355, 'AVL_EVAL'), (477020, 'AVL_TRAIN'), (477191, 'AVL_EVAL'), (478691, 'AVL_TRAIN'), (479336, 'AVL_EVAL'), (479796, 'AVL_TRAIN'), (487185, 'AVL_EVAL'), (505708, 'UN_AVL'), (505708, 'AVL_TRAIN'), (511633, 'AVL_EVAL'), (513723, 'UN_AVL'), (513723, 'AVL_TRAIN'), (515049, 'UN_AVL'), (515120, 'AVL_TRAIN'), (523045, 'AVL_EVAL'), (529118, 'UN_AVL'), (532010, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (14086, 'AVL_TRAIN'), (28419, 'AVL_EVAL'), (45234, 'UN_AVL'), (49629, 'AVL_TRAIN'), (52427, 'AVL_EVAL'), (53652, 'UN_AVL'), (65611, 'AVL_TRAIN'), (79774, 'AVL_EVAL'), (81757, 'UN_AVL'), (83313, 'AVL_TRAIN'), (83321, 'UN_AVL'), (83728, 'AVL_TRAIN'), (95271, 'AVL_EVAL'), (101199, 'UN_AVL'), (101892, 'AVL_TRAIN'), (102501, 'UN_AVL'), (102502, 'AVL_TRAIN'), (102508, 'UN_AVL'), (102509, 'AVL_TRAIN'), (104568, 'UN_AVL'), (104594, 'AVL_TRAIN'), (107007, 'UN_AVL'), (123025, 'AVL_TRAIN'), (126123, 'UN_AVL'), (126151, 'AVL_TRAIN'), (131671, 'UN_AVL'), (133452, 'AVL_TRAIN'), (135169, 'UN_AVL'), (135207, 'AVL_TRAIN'), (136408, 'UN_AVL'), (136411, 'AVL_TRAIN'), (140912, 'AVL_EVAL'), (141082, 'UN_AVL'), (147794, 'AVL_TRAIN'), (158779, 'AVL_EVAL'), (159819, 'UN_AVL'), (159844, 'AVL_TRAIN'), (161852, 'UN_AVL'), (162997, 'AVL_TRAIN'), (169542, 'AVL_EVAL'), (169594, 'AVL_TRAIN'), (173040, 'AVL_EVAL'), (175322, 'UN_AVL'), (179280, 'AVL_TRAIN'), (184709, 'UN_AVL'), (184733, 'AVL_TRAIN'), (217008, 'AVL_EVAL'), (229707, 'UN_AVL'), (238720, 'AVL_TRAIN'), (241991, 'UN_AVL'), (242634, 'AVL_TRAIN'), (242664, 'UN_AVL'), (242677, 'AVL_TRAIN'), (247133, 'UN_AVL'), (247136, 'AVL_TRAIN'), (253504, 'UN_AVL'), (254584, 'AVL_TRAIN'), (255913, 'UN_AVL'), (257698, 'AVL_TRAIN'), (258798, 'UN_AVL'), (258815, 'AVL_TRAIN'), (261988, 'UN_AVL'), (264706, 'AVL_TRAIN'), (274712, 'UN_AVL'), (276514, 'AVL_TRAIN'), (289007, 'AVL_EVAL'), (292214, 'UN_AVL'), (292214, 'AVL_TRAIN'), (305171, 'AVL_EVAL'), (308278, 'AVL_TRAIN'), (311550, 'AVL_EVAL'), (324195, 'AVL_TRAIN'), (327485, 'AVL_EVAL'), (329116, 'AVL_TRAIN'), (333701, 'AVL_EVAL'), (362870, 'UN_AVL'), (366125, 'AVL_TRAIN'), (389628, 'AVL_EVAL'), (401016, 'UN_AVL'), (416056, 'AVL_TRAIN'), (422013, 'UN_AVL'), (424168, 'AVL_TRAIN'), (427102, 'UN_AVL'), (430244, 'AVL_TRAIN'), (437747, 'UN_AVL'), (445549, 'AVL_TRAIN'), (476355, 'UN_AVL'), (477020, 'AVL_TRAIN'), (477191, 'UN_AVL'), (478691, 'AVL_TRAIN'), (479336, 'UN_AVL'), (479796, 'AVL_TRAIN'), (487185, 'AVL_EVAL'), (497333, 'UN_AVL'), (505708, 'AVL_TRAIN'), (511633, 'UN_AVL'), (513723, 'AVL_TRAIN'), (515049, 'UN_AVL'), (515120, 'AVL_TRAIN'), (523045, 'AVL_EVAL'), (526222, 'UN_AVL'), (532010, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_80.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_80.json index a69203d00..df6f877d7 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_80.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_80.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "22.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (23275, 'UN_AVL'), (91670, 'AVL_TRAIN'), (93509, 'AVL_EVAL'), (94662, 'AVL_TRAIN'), (102922, 'AVL_EVAL'), (161177, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (91670, 'AVL_TRAIN'), (93509, 'UN_AVL'), (94662, 'AVL_TRAIN'), (102922, 'AVL_EVAL'), (161177, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_81.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_81.json index 8f8bde731..2d3a18447 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_81.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_81.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (1875, 'UN_AVL'), (6737, 'AVL_TRAIN'), (15570, 'AVL_EVAL'), (64624, 'UN_AVL'), (135079, 'AVL_TRAIN'), (148341, 'AVL_EVAL'), (221763, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (1875, 'UN_AVL'), (6737, 'AVL_TRAIN'), (15570, 'AVL_EVAL'), (19819, 'UN_AVL'), (135079, 'AVL_TRAIN'), (148341, 'AVL_EVAL'), (169143, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_82.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_82.json index 4cef1dc2d..ff757d333 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_82.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_82.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "18.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4125, 'AVL_EVAL'), (16119, 'UN_AVL'), (17583, 'AVL_TRAIN'), (54386, 'AVL_EVAL'), (76257, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (4125, 'AVL_EVAL'), (6734, 'UN_AVL'), (17583, 'AVL_TRAIN'), (54386, 'AVL_EVAL'), (57880, 'UN_AVL'), (70487, 'AVL_EVAL'), (73064, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_83.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_83.json index ddd807fc1..2143a3e05 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_83.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_83.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "14.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (55781, 'UN_AVL'), (68421, 'AVL_TRAIN'), (68587, 'UN_AVL'), (68622, 'AVL_TRAIN'), (69004, 'UN_AVL'), (69007, 'AVL_TRAIN'), (70122, 'UN_AVL'), (70123, 'AVL_TRAIN'), (70125, 'UN_AVL'), (70261, 'AVL_TRAIN'), (71894, 'AVL_EVAL'), (72008, 'AVL_TRAIN'), (73289, 'AVL_EVAL'), (73294, 'AVL_TRAIN'), (75267, 'AVL_EVAL'), (75267, 'AVL_TRAIN'), (75274, 'AVL_EVAL'), (80560, 'UN_AVL'), (81756, 'AVL_TRAIN'), (81763, 'UN_AVL'), (81764, 'AVL_TRAIN'), (81791, 'UN_AVL'), (84015, 'AVL_TRAIN'), (85329, 'AVL_EVAL'), (85599, 'AVL_TRAIN'), (85677, 'AVL_EVAL'), (88367, 'AVL_TRAIN'), (88651, 'AVL_EVAL'), (88656, 'AVL_TRAIN'), (88714, 'AVL_EVAL'), (88714, 'AVL_TRAIN'), (88715, 'AVL_EVAL'), (88715, 'AVL_TRAIN'), (88716, 'AVL_EVAL'), (88732, 'AVL_TRAIN'), (91239, 'AVL_EVAL'), (91295, 'AVL_TRAIN'), (91731, 'AVL_EVAL'), (91841, 'AVL_TRAIN'), (92198, 'AVL_EVAL'), (92198, 'AVL_TRAIN'), (92199, 'AVL_EVAL'), (92466, 'AVL_TRAIN'), (92726, 'AVL_EVAL'), (92748, 'AVL_TRAIN'), (92755, 'AVL_EVAL'), (92807, 'AVL_TRAIN'), (93307, 'AVL_EVAL'), (93316, 'AVL_TRAIN'), (93317, 'AVL_EVAL'), (93319, 'AVL_TRAIN'), (93320, 'AVL_EVAL'), (93366, 'AVL_TRAIN'), (93444, 'AVL_EVAL'), (175339, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (41042, 'AVL_EVAL'), (46212, 'UN_AVL'), (68421, 'AVL_TRAIN'), (68587, 'UN_AVL'), (68622, 'AVL_TRAIN'), (69004, 'UN_AVL'), (69007, 'AVL_TRAIN'), (70122, 'UN_AVL'), (70123, 'AVL_TRAIN'), (70125, 'UN_AVL'), (70261, 'AVL_TRAIN'), (71894, 'UN_AVL'), (72008, 'AVL_TRAIN'), (73289, 'UN_AVL'), (73294, 'AVL_TRAIN'), (75267, 'UN_AVL'), (75267, 'AVL_TRAIN'), (75274, 'UN_AVL'), (81756, 'AVL_TRAIN'), (81763, 'UN_AVL'), (81764, 'AVL_TRAIN'), (81791, 'UN_AVL'), (84015, 'AVL_TRAIN'), (85329, 'UN_AVL'), (85599, 'AVL_TRAIN'), (85677, 'UN_AVL'), (88367, 'AVL_TRAIN'), (88651, 'UN_AVL'), (88656, 'AVL_TRAIN'), (88714, 'UN_AVL'), (88714, 'AVL_TRAIN'), (88715, 'UN_AVL'), (88715, 'AVL_TRAIN'), (88716, 'UN_AVL'), (88732, 'AVL_TRAIN'), (91239, 'AVL_EVAL'), (91295, 'AVL_TRAIN'), (91731, 'AVL_EVAL'), (91841, 'AVL_TRAIN'), (92198, 'AVL_EVAL'), (92198, 'AVL_TRAIN'), (92199, 'AVL_EVAL'), (92466, 'AVL_TRAIN'), (92726, 'AVL_EVAL'), (92748, 'AVL_TRAIN'), (92755, 'AVL_EVAL'), (92807, 'AVL_TRAIN'), (93307, 'AVL_EVAL'), (93316, 'AVL_TRAIN'), (93317, 'AVL_EVAL'), (93319, 'AVL_TRAIN'), (93320, 'AVL_EVAL'), (93366, 'AVL_TRAIN'), (93444, 'AVL_EVAL'), (175339, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_84.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_84.json index fb1d60c10..81bb20322 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_84.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_84.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "27.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (71298, 'AVL_EVAL'), (133850, 'AVL_TRAIN'), (142392, 'AVL_EVAL'), (174984, 'UN_AVL'), (188462, 'AVL_EVAL'), (229387, 'UN_AVL'), (325864, 'AVL_EVAL'), (336082, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (71298, 'AVL_EVAL'), (96352, 'UN_AVL'), (133850, 'AVL_TRAIN'), (142392, 'AVL_EVAL'), (161821, 'UN_AVL'), (188462, 'AVL_EVAL'), (192509, 'UN_AVL'), (325864, 'AVL_EVAL'), (327721, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_85.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_85.json index b6064e48b..6b616a243 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_85.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_85.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (5931, 'AVL_EVAL'), (33832, 'UN_AVL'), (33938, 'AVL_TRAIN'), (45609, 'AVL_EVAL'), (96608, 'AVL_TRAIN'), (96683, 'AVL_EVAL'), (97202, 'UN_AVL'), (114801, 'AVL_TRAIN'), (124228, 'AVL_EVAL'), (152983, 'UN_AVL'), (158990, 'AVL_TRAIN'), (162087, 'AVL_EVAL'), (164619, 'UN_AVL'), (172261, 'AVL_TRAIN'), (175152, 'AVL_EVAL'), (176477, 'AVL_TRAIN'), (176478, 'AVL_EVAL'), (176481, 'AVL_TRAIN'), (180232, 'AVL_EVAL'), (180233, 'AVL_TRAIN'), (181693, 'AVL_EVAL'), (218874, 'UN_AVL'), (238498, 'AVL_TRAIN'), (239308, 'UN_AVL'), (239918, 'AVL_TRAIN'), (241708, 'UN_AVL'), (257321, 'AVL_TRAIN'), (261066, 'UN_AVL'), (266197, 'AVL_TRAIN'), (270056, 'AVL_EVAL'), (287535, 'UN_AVL'), (307665, 'AVL_TRAIN'), (308550, 'UN_AVL'), (308626, 'AVL_TRAIN'), (314453, 'AVL_EVAL'), (329978, 'UN_AVL'), (341202, 'AVL_TRAIN'), (341470, 'UN_AVL'), (341513, 'AVL_TRAIN'), (343459, 'AVL_EVAL'), (343460, 'AVL_TRAIN'), (343493, 'AVL_EVAL'), (343493, 'AVL_TRAIN'), (344083, 'AVL_EVAL'), (348174, 'UN_AVL'), (356056, 'AVL_TRAIN'), (357385, 'UN_AVL'), (371649, 'AVL_TRAIN'), (374287, 'UN_AVL'), (411109, 'AVL_TRAIN'), (413668, 'UN_AVL'), (414095, 'AVL_TRAIN'), (417763, 'AVL_EVAL'), (426534, 'UN_AVL'), (426540, 'AVL_TRAIN'), (428833, 'AVL_EVAL'), (432664, 'UN_AVL'), (451683, 'AVL_TRAIN'), (459575, 'AVL_EVAL'), (481788, 'UN_AVL'), (488430, 'AVL_TRAIN'), (490031, 'UN_AVL'), (490136, 'AVL_TRAIN'), (493219, 'AVL_EVAL'), (501176, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'AVL_TRAIN'), (5931, 'AVL_EVAL'), (24254, 'UN_AVL'), (33938, 'AVL_TRAIN'), (45609, 'AVL_EVAL'), (79743, 'UN_AVL'), (96608, 'AVL_TRAIN'), (96683, 'UN_AVL'), (114801, 'AVL_TRAIN'), (124228, 'AVL_EVAL'), (132062, 'UN_AVL'), (158990, 'AVL_TRAIN'), (162087, 'UN_AVL'), (172261, 'AVL_TRAIN'), (175152, 'UN_AVL'), (176477, 'AVL_TRAIN'), (176478, 'UN_AVL'), (176481, 'AVL_TRAIN'), (180232, 'AVL_EVAL'), (180233, 'AVL_TRAIN'), (181693, 'AVL_EVAL'), (193585, 'UN_AVL'), (238498, 'AVL_TRAIN'), (239308, 'UN_AVL'), (239918, 'AVL_TRAIN'), (241708, 'UN_AVL'), (257321, 'AVL_TRAIN'), (261066, 'UN_AVL'), (266197, 'AVL_TRAIN'), (270056, 'UN_AVL'), (307665, 'AVL_TRAIN'), (308550, 'UN_AVL'), (308626, 'AVL_TRAIN'), (314453, 'AVL_EVAL'), (314695, 'UN_AVL'), (341202, 'AVL_TRAIN'), (341470, 'UN_AVL'), (341513, 'AVL_TRAIN'), (343459, 'UN_AVL'), (343460, 'AVL_TRAIN'), (343493, 'UN_AVL'), (343493, 'AVL_TRAIN'), (344083, 'UN_AVL'), (356056, 'AVL_TRAIN'), (357385, 'UN_AVL'), (371649, 'AVL_TRAIN'), (374287, 'UN_AVL'), (411109, 'AVL_TRAIN'), (413668, 'UN_AVL'), (414095, 'AVL_TRAIN'), (417763, 'UN_AVL'), (426540, 'AVL_TRAIN'), (428833, 'UN_AVL'), (451683, 'AVL_TRAIN'), (459575, 'AVL_EVAL'), (467648, 'UN_AVL'), (488430, 'AVL_TRAIN'), (490031, 'UN_AVL'), (490136, 'AVL_TRAIN'), (493219, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_86.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_86.json index fba8efb12..24bad40f2 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_86.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_86.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "15.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (10223, 'AVL_TRAIN'), (15412, 'UN_AVL'), (15439, 'AVL_TRAIN'), (38295, 'AVL_EVAL'), (38302, 'AVL_TRAIN'), (41547, 'AVL_EVAL'), (41561, 'AVL_TRAIN'), (41665, 'AVL_EVAL'), (52980, 'UN_AVL'), (58902, 'AVL_TRAIN'), (62479, 'UN_AVL'), (62912, 'AVL_TRAIN'), (65484, 'AVL_EVAL'), (67966, 'UN_AVL'), (73011, 'AVL_TRAIN'), (81621, 'UN_AVL'), (81678, 'AVL_TRAIN'), (82031, 'UN_AVL'), (82048, 'AVL_TRAIN'), (82133, 'UN_AVL'), (83949, 'AVL_TRAIN'), (85708, 'UN_AVL'), (86868, 'AVL_TRAIN'), (87941, 'UN_AVL'), (89157, 'AVL_TRAIN'), (89344, 'UN_AVL'), (89817, 'AVL_TRAIN'), (90808, 'UN_AVL'), (90816, 'AVL_TRAIN'), (91632, 'UN_AVL'), (91634, 'AVL_TRAIN'), (91663, 'UN_AVL'), (91665, 'AVL_TRAIN'), (91668, 'UN_AVL'), (91694, 'AVL_TRAIN'), (91849, 'UN_AVL'), (96437, 'AVL_TRAIN'), (131012, 'AVL_EVAL'), (145311, 'UN_AVL'), (154350, 'AVL_TRAIN'), (160466, 'AVL_EVAL'), (160583, 'AVL_TRAIN'), (160878, 'AVL_EVAL'), (173162, 'UN_AVL'), (177101, 'AVL_TRAIN'), (179026, 'UN_AVL'), (180563, 'AVL_TRAIN'), (216689, 'AVL_EVAL'), (231269, 'UN_AVL'), (236002, 'AVL_TRAIN'), (236543, 'UN_AVL'), (237742, 'AVL_TRAIN'), (237783, 'UN_AVL'), (237787, 'AVL_TRAIN'), (238962, 'UN_AVL'), (238963, 'AVL_TRAIN'), (240008, 'UN_AVL'), (240020, 'AVL_TRAIN'), (242325, 'AVL_EVAL'), (246625, 'UN_AVL'), (246625, 'AVL_TRAIN'), (248409, 'UN_AVL'), (248487, 'AVL_TRAIN'), (248489, 'UN_AVL'), (248491, 'AVL_TRAIN'), (248495, 'UN_AVL'), (248497, 'AVL_TRAIN'), (248557, 'UN_AVL'), (248571, 'AVL_TRAIN'), (248572, 'UN_AVL'), (248719, 'AVL_TRAIN'), (248721, 'UN_AVL'), (249061, 'AVL_TRAIN'), (249063, 'UN_AVL'), (249068, 'AVL_TRAIN'), (249092, 'UN_AVL'), (249207, 'AVL_TRAIN'), (249208, 'UN_AVL'), (249210, 'AVL_TRAIN'), (250436, 'UN_AVL'), (250453, 'AVL_TRAIN'), (250456, 'UN_AVL'), (250479, 'AVL_TRAIN'), (250481, 'UN_AVL'), (250482, 'AVL_TRAIN'), (251664, 'UN_AVL'), (251783, 'AVL_TRAIN'), (251785, 'UN_AVL'), (251792, 'AVL_TRAIN'), (251817, 'UN_AVL'), (255226, 'AVL_TRAIN'), (258714, 'AVL_EVAL'), (259885, 'UN_AVL'), (265294, 'AVL_TRAIN'), (269727, 'UN_AVL'), (269740, 'AVL_TRAIN'), (270435, 'UN_AVL'), (273870, 'AVL_TRAIN'), (273872, 'UN_AVL'), (273922, 'AVL_TRAIN'), (273924, 'UN_AVL'), (275743, 'AVL_TRAIN'), (278368, 'AVL_EVAL'), (279385, 'UN_AVL'), (284264, 'AVL_TRAIN'), (304016, 'AVL_EVAL'), (315926, 'UN_AVL'), (316540, 'AVL_TRAIN'), (318164, 'UN_AVL'), (318170, 'AVL_TRAIN'), (319158, 'UN_AVL'), (319561, 'AVL_TRAIN'), (319685, 'UN_AVL'), (323111, 'AVL_TRAIN'), (325054, 'UN_AVL'), (325056, 'AVL_TRAIN'), (325060, 'UN_AVL'), (325184, 'AVL_TRAIN'), (326130, 'UN_AVL'), (326196, 'AVL_TRAIN'), (327005, 'UN_AVL'), (329098, 'AVL_EVAL'), (329098, 'AVL_TRAIN'), (329425, 'AVL_EVAL'), (330470, 'AVL_TRAIN'), (330765, 'AVL_EVAL'), (340261, 'UN_AVL'), (340261, 'AVL_TRAIN'), (340466, 'UN_AVL'), (340491, 'AVL_TRAIN'), (340515, 'UN_AVL'), (340736, 'AVL_TRAIN'), (341743, 'UN_AVL'), (341758, 'AVL_TRAIN'), (342911, 'UN_AVL'), (342916, 'AVL_TRAIN'), (343086, 'UN_AVL'), (347773, 'AVL_TRAIN'), (352176, 'UN_AVL'), (352187, 'AVL_TRAIN'), (352720, 'UN_AVL'), (358989, 'AVL_TRAIN'), (377191, 'AVL_EVAL'), (405569, 'UN_AVL'), (405569, 'AVL_TRAIN'), (408564, 'UN_AVL'), (409860, 'AVL_TRAIN'), (411396, 'UN_AVL'), (413097, 'AVL_TRAIN'), (414255, 'UN_AVL'), (414275, 'AVL_TRAIN'), (415591, 'UN_AVL'), (415595, 'AVL_TRAIN'), (415620, 'UN_AVL'), (415866, 'AVL_TRAIN'), (416384, 'UN_AVL'), (416397, 'AVL_TRAIN'), (416650, 'UN_AVL'), (416986, 'AVL_TRAIN'), (423733, 'UN_AVL'), (423735, 'AVL_TRAIN'), (423829, 'UN_AVL'), (425784, 'AVL_TRAIN'), (427689, 'UN_AVL'), (427713, 'AVL_TRAIN'), (428101, 'UN_AVL'), (428109, 'AVL_TRAIN'), (429085, 'UN_AVL'), (429096, 'AVL_TRAIN'), (429866, 'UN_AVL'), (429871, 'AVL_TRAIN'), (429877, 'UN_AVL'), (429902, 'AVL_TRAIN'), (429906, 'UN_AVL'), (430153, 'AVL_TRAIN'), (430988, 'UN_AVL'), (435560, 'AVL_TRAIN'), (438811, 'UN_AVL'), (438820, 'AVL_TRAIN'), (441758, 'UN_AVL'), (441780, 'AVL_TRAIN'), (443740, 'UN_AVL'), (444246, 'AVL_TRAIN'), (470980, 'AVL_EVAL'), (480392, 'UN_AVL'), (487254, 'AVL_TRAIN'), (491831, 'AVL_EVAL'), (496159, 'UN_AVL'), (498984, 'AVL_TRAIN'), (499701, 'UN_AVL'), (499703, 'AVL_TRAIN'), (499907, 'UN_AVL'), (499914, 'AVL_TRAIN'), (501313, 'UN_AVL'), (503611, 'AVL_TRAIN'), (504699, 'UN_AVL'), (504703, 'AVL_TRAIN'), (504933, 'UN_AVL'), (505663, 'AVL_TRAIN'), (507020, 'UN_AVL'), (507032, 'AVL_TRAIN'), (507034, 'UN_AVL'), (507036, 'AVL_TRAIN'), (511472, 'UN_AVL'), (511476, 'AVL_TRAIN'), (511477, 'UN_AVL'), (511479, 'AVL_TRAIN'), (511629, 'UN_AVL'), (511632, 'AVL_TRAIN'), (511637, 'UN_AVL'), (511643, 'AVL_TRAIN'), (511645, 'UN_AVL'), (511652, 'AVL_TRAIN'), (511668, 'UN_AVL'), (511674, 'AVL_TRAIN'), (511749, 'UN_AVL'), (511750, 'AVL_TRAIN'), (511752, 'UN_AVL'), (511756, 'AVL_TRAIN'), (511850, 'UN_AVL'), (511852, 'AVL_TRAIN'), (511854, 'UN_AVL'), (511855, 'AVL_TRAIN'), (516034, 'UN_AVL'), (516035, 'AVL_TRAIN'), (516054, 'UN_AVL'), (516552, 'AVL_TRAIN'), (519985, 'UN_AVL'), (520172, 'AVL_TRAIN'), (520478, 'AVL_EVAL'), (521403, 'UN_AVL'), (525820, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (10223, 'AVL_TRAIN'), (15412, 'UN_AVL'), (15439, 'AVL_TRAIN'), (38295, 'AVL_EVAL'), (38302, 'AVL_TRAIN'), (41547, 'AVL_EVAL'), (41561, 'AVL_TRAIN'), (41665, 'AVL_EVAL'), (52980, 'UN_AVL'), (58902, 'AVL_TRAIN'), (62479, 'UN_AVL'), (62912, 'AVL_TRAIN'), (65484, 'UN_AVL'), (73011, 'AVL_TRAIN'), (81621, 'UN_AVL'), (81678, 'AVL_TRAIN'), (82031, 'UN_AVL'), (82048, 'AVL_TRAIN'), (82133, 'UN_AVL'), (83949, 'AVL_TRAIN'), (85708, 'UN_AVL'), (86868, 'AVL_TRAIN'), (87941, 'UN_AVL'), (89157, 'AVL_TRAIN'), (89344, 'UN_AVL'), (89817, 'AVL_TRAIN'), (90808, 'UN_AVL'), (90816, 'AVL_TRAIN'), (91632, 'UN_AVL'), (91634, 'AVL_TRAIN'), (91663, 'UN_AVL'), (91665, 'AVL_TRAIN'), (91668, 'UN_AVL'), (91694, 'AVL_TRAIN'), (91849, 'UN_AVL'), (96437, 'AVL_TRAIN'), (131012, 'AVL_EVAL'), (144001, 'UN_AVL'), (154350, 'AVL_TRAIN'), (160466, 'AVL_EVAL'), (160583, 'AVL_TRAIN'), (160878, 'AVL_EVAL'), (173162, 'UN_AVL'), (177101, 'AVL_TRAIN'), (179026, 'UN_AVL'), (180563, 'AVL_TRAIN'), (216689, 'AVL_EVAL'), (225952, 'UN_AVL'), (236002, 'AVL_TRAIN'), (236543, 'UN_AVL'), (237742, 'AVL_TRAIN'), (237783, 'UN_AVL'), (237787, 'AVL_TRAIN'), (238962, 'UN_AVL'), (238963, 'AVL_TRAIN'), (240008, 'UN_AVL'), (240020, 'AVL_TRAIN'), (242325, 'UN_AVL'), (246625, 'AVL_TRAIN'), (248409, 'UN_AVL'), (248487, 'AVL_TRAIN'), (248489, 'UN_AVL'), (248491, 'AVL_TRAIN'), (248495, 'UN_AVL'), (248497, 'AVL_TRAIN'), (248557, 'UN_AVL'), (248571, 'AVL_TRAIN'), (248572, 'UN_AVL'), (248719, 'AVL_TRAIN'), (248721, 'UN_AVL'), (249061, 'AVL_TRAIN'), (249063, 'UN_AVL'), (249068, 'AVL_TRAIN'), (249092, 'UN_AVL'), (249207, 'AVL_TRAIN'), (249208, 'UN_AVL'), (249210, 'AVL_TRAIN'), (250436, 'UN_AVL'), (250453, 'AVL_TRAIN'), (250456, 'UN_AVL'), (250479, 'AVL_TRAIN'), (250481, 'UN_AVL'), (250482, 'AVL_TRAIN'), (251664, 'UN_AVL'), (251783, 'AVL_TRAIN'), (251785, 'UN_AVL'), (251792, 'AVL_TRAIN'), (251817, 'UN_AVL'), (255226, 'AVL_TRAIN'), (258714, 'UN_AVL'), (265294, 'AVL_TRAIN'), (269727, 'UN_AVL'), (269740, 'AVL_TRAIN'), (270435, 'UN_AVL'), (273870, 'AVL_TRAIN'), (273872, 'UN_AVL'), (273922, 'AVL_TRAIN'), (273924, 'UN_AVL'), (275743, 'AVL_TRAIN'), (278368, 'UN_AVL'), (284264, 'AVL_TRAIN'), (304016, 'AVL_EVAL'), (309038, 'UN_AVL'), (316540, 'AVL_TRAIN'), (318164, 'UN_AVL'), (318170, 'AVL_TRAIN'), (319158, 'UN_AVL'), (319561, 'AVL_TRAIN'), (319685, 'UN_AVL'), (323111, 'AVL_TRAIN'), (325054, 'UN_AVL'), (325056, 'AVL_TRAIN'), (325060, 'UN_AVL'), (325184, 'AVL_TRAIN'), (326130, 'UN_AVL'), (326196, 'AVL_TRAIN'), (327005, 'UN_AVL'), (329098, 'AVL_TRAIN'), (329425, 'UN_AVL'), (330470, 'AVL_TRAIN'), (330765, 'UN_AVL'), (340261, 'AVL_TRAIN'), (340466, 'UN_AVL'), (340491, 'AVL_TRAIN'), (340515, 'UN_AVL'), (340736, 'AVL_TRAIN'), (341743, 'UN_AVL'), (341758, 'AVL_TRAIN'), (342911, 'UN_AVL'), (342916, 'AVL_TRAIN'), (343086, 'UN_AVL'), (347773, 'AVL_TRAIN'), (352176, 'UN_AVL'), (352187, 'AVL_TRAIN'), (352720, 'UN_AVL'), (358989, 'AVL_TRAIN'), (377191, 'AVL_EVAL'), (405569, 'UN_AVL'), (405569, 'AVL_TRAIN'), (408564, 'UN_AVL'), (409860, 'AVL_TRAIN'), (411396, 'UN_AVL'), (413097, 'AVL_TRAIN'), (414255, 'UN_AVL'), (414275, 'AVL_TRAIN'), (415591, 'UN_AVL'), (415595, 'AVL_TRAIN'), (415620, 'UN_AVL'), (415866, 'AVL_TRAIN'), (416384, 'UN_AVL'), (416397, 'AVL_TRAIN'), (416650, 'UN_AVL'), (416986, 'AVL_TRAIN'), (423733, 'UN_AVL'), (423735, 'AVL_TRAIN'), (423829, 'UN_AVL'), (425784, 'AVL_TRAIN'), (427689, 'UN_AVL'), (427713, 'AVL_TRAIN'), (428101, 'UN_AVL'), (428109, 'AVL_TRAIN'), (429085, 'UN_AVL'), (429096, 'AVL_TRAIN'), (429866, 'UN_AVL'), (429871, 'AVL_TRAIN'), (429877, 'UN_AVL'), (429902, 'AVL_TRAIN'), (429906, 'UN_AVL'), (430153, 'AVL_TRAIN'), (430988, 'UN_AVL'), (435560, 'AVL_TRAIN'), (438811, 'UN_AVL'), (438820, 'AVL_TRAIN'), (441758, 'UN_AVL'), (441780, 'AVL_TRAIN'), (443740, 'UN_AVL'), (444246, 'AVL_TRAIN'), (470980, 'AVL_EVAL'), (477804, 'UN_AVL'), (487254, 'AVL_TRAIN'), (491831, 'UN_AVL'), (498984, 'AVL_TRAIN'), (499701, 'UN_AVL'), (499703, 'AVL_TRAIN'), (499907, 'UN_AVL'), (499914, 'AVL_TRAIN'), (501313, 'UN_AVL'), (503611, 'AVL_TRAIN'), (504699, 'UN_AVL'), (504703, 'AVL_TRAIN'), (504933, 'UN_AVL'), (505663, 'AVL_TRAIN'), (507020, 'UN_AVL'), (507032, 'AVL_TRAIN'), (507034, 'UN_AVL'), (507036, 'AVL_TRAIN'), (511472, 'UN_AVL'), (511476, 'AVL_TRAIN'), (511477, 'UN_AVL'), (511479, 'AVL_TRAIN'), (511629, 'UN_AVL'), (511632, 'AVL_TRAIN'), (511637, 'UN_AVL'), (511643, 'AVL_TRAIN'), (511645, 'UN_AVL'), (511652, 'AVL_TRAIN'), (511668, 'UN_AVL'), (511674, 'AVL_TRAIN'), (511749, 'UN_AVL'), (511750, 'AVL_TRAIN'), (511752, 'UN_AVL'), (511756, 'AVL_TRAIN'), (511850, 'UN_AVL'), (511852, 'AVL_TRAIN'), (511854, 'UN_AVL'), (511855, 'AVL_TRAIN'), (516034, 'UN_AVL'), (516035, 'AVL_TRAIN'), (516054, 'UN_AVL'), (516552, 'AVL_TRAIN'), (519985, 'UN_AVL'), (520172, 'AVL_TRAIN'), (520478, 'UN_AVL'), (525820, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_87.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_87.json index 38d85bd42..d39402168 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_87.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_87.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (3095, 'AVL_TRAIN'), (13962, 'AVL_EVAL'), (103312, 'UN_AVL'), (167439, 'AVL_TRAIN'), (172204, 'UN_AVL'), (172303, 'AVL_TRAIN'), (173399, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (3095, 'AVL_TRAIN'), (13962, 'AVL_EVAL'), (74461, 'UN_AVL'), (167439, 'AVL_TRAIN'), (172204, 'UN_AVL'), (172303, 'AVL_TRAIN'), (173399, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_88.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_88.json index b850a1b1c..5f2a969ad 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_88.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_88.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "4.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1589, 'AVL_TRAIN'), (7008, 'AVL_EVAL'), (14643, 'UN_AVL'), (14643, 'AVL_TRAIN'), (17077, 'AVL_EVAL'), (19743, 'UN_AVL'), (31062, 'AVL_TRAIN'), (43867, 'AVL_EVAL'), (80395, 'UN_AVL'), (80395, 'AVL_TRAIN'), (83187, 'AVL_EVAL'), (83187, 'AVL_TRAIN'), (83197, 'AVL_EVAL'), (86611, 'UN_AVL'), (86973, 'AVL_TRAIN'), (93253, 'AVL_EVAL'), (112792, 'UN_AVL'), (152897, 'AVL_TRAIN'), (156954, 'UN_AVL'), (167128, 'AVL_TRAIN'), (175736, 'AVL_EVAL'), (188181, 'UN_AVL'), (199728, 'AVL_TRAIN'), (203608, 'UN_AVL'), (203644, 'AVL_TRAIN'), (220955, 'AVL_EVAL'), (249838, 'UN_AVL'), (249838, 'AVL_TRAIN'), (263806, 'AVL_EVAL'), (282206, 'UN_AVL'), (297126, 'AVL_TRAIN'), (314253, 'AVL_EVAL'), (343073, 'UN_AVL'), (348564, 'AVL_TRAIN'), (353133, 'AVL_EVAL'), (355061, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1589, 'AVL_TRAIN'), (7008, 'UN_AVL'), (14643, 'AVL_TRAIN'), (17077, 'UN_AVL'), (31062, 'AVL_TRAIN'), (43867, 'AVL_EVAL'), (71270, 'UN_AVL'), (80395, 'AVL_TRAIN'), (83187, 'UN_AVL'), (83187, 'AVL_TRAIN'), (83197, 'UN_AVL'), (86973, 'AVL_TRAIN'), (93253, 'AVL_EVAL'), (100954, 'UN_AVL'), (152897, 'AVL_TRAIN'), (156954, 'UN_AVL'), (167128, 'AVL_TRAIN'), (175736, 'AVL_EVAL'), (177588, 'UN_AVL'), (199728, 'AVL_TRAIN'), (203608, 'UN_AVL'), (203644, 'AVL_TRAIN'), (220955, 'AVL_EVAL'), (237743, 'UN_AVL'), (249838, 'AVL_TRAIN'), (263806, 'AVL_EVAL'), (274530, 'UN_AVL'), (297126, 'AVL_TRAIN'), (314253, 'AVL_EVAL'), (337378, 'UN_AVL'), (348564, 'AVL_TRAIN'), (353133, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_89.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_89.json index 10a27b9bb..2d00026ac 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_89.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_89.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "10.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (128246, 'AVL_EVAL'), (128246, 'AVL_TRAIN'), (133225, 'AVL_EVAL'), (133225, 'AVL_TRAIN'), (133227, 'AVL_EVAL'), (134131, 'AVL_TRAIN'), (134729, 'AVL_EVAL'), (134729, 'AVL_TRAIN'), (134803, 'AVL_EVAL'), (134803, 'AVL_TRAIN'), (134832, 'AVL_EVAL'), (134832, 'AVL_TRAIN'), (134994, 'AVL_EVAL'), (134995, 'AVL_TRAIN'), (135027, 'AVL_EVAL'), (135027, 'AVL_TRAIN'), (135028, 'AVL_EVAL'), (136042, 'AVL_TRAIN'), (136555, 'AVL_EVAL'), (139715, 'AVL_TRAIN'), (139823, 'AVL_EVAL'), (139982, 'AVL_TRAIN'), (139995, 'AVL_EVAL'), (141535, 'UN_AVL'), (141535, 'AVL_TRAIN'), (143991, 'AVL_EVAL'), (143991, 'AVL_TRAIN'), (146197, 'AVL_EVAL'), (151217, 'UN_AVL'), (151362, 'AVL_EVAL'), (151362, 'AVL_TRAIN'), (152118, 'AVL_EVAL'), (152118, 'AVL_TRAIN'), (157474, 'AVL_EVAL'), (167217, 'AVL_TRAIN'), (182917, 'AVL_EVAL'), (183273, 'AVL_TRAIN'), (183278, 'AVL_EVAL'), (198768, 'AVL_TRAIN'), (216668, 'AVL_EVAL'), (221269, 'AVL_TRAIN'), (225915, 'AVL_EVAL'), (230182, 'UN_AVL'), (230305, 'AVL_EVAL'), (230305, 'AVL_TRAIN'), (233655, 'AVL_EVAL'), (235888, 'AVL_TRAIN'), (237491, 'AVL_EVAL'), (239961, 'AVL_TRAIN'), (241331, 'AVL_EVAL'), (241331, 'AVL_TRAIN'), (247968, 'UN_AVL'), (247968, 'AVL_TRAIN'), (248563, 'UN_AVL'), (248563, 'AVL_TRAIN'), (260030, 'AVL_EVAL'), (265435, 'AVL_TRAIN'), (268837, 'AVL_EVAL'), (268837, 'AVL_TRAIN'), (269951, 'AVL_EVAL'), (269951, 'AVL_TRAIN'), (272345, 'AVL_EVAL'), (276611, 'AVL_TRAIN'), (276615, 'AVL_EVAL'), (276615, 'AVL_TRAIN'), (277148, 'AVL_EVAL'), (277577, 'AVL_TRAIN'), (280218, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (128246, 'AVL_TRAIN'), (133225, 'AVL_EVAL'), (133225, 'AVL_TRAIN'), (133227, 'AVL_EVAL'), (134131, 'AVL_TRAIN'), (134729, 'AVL_EVAL'), (134729, 'AVL_TRAIN'), (134803, 'AVL_EVAL'), (134803, 'AVL_TRAIN'), (134832, 'AVL_EVAL'), (134832, 'AVL_TRAIN'), (134994, 'AVL_EVAL'), (134995, 'AVL_TRAIN'), (135027, 'AVL_EVAL'), (135027, 'AVL_TRAIN'), (135028, 'AVL_EVAL'), (136042, 'AVL_TRAIN'), (136555, 'AVL_EVAL'), (139009, 'UN_AVL'), (139715, 'AVL_TRAIN'), (139823, 'UN_AVL'), (139982, 'AVL_TRAIN'), (139995, 'UN_AVL'), (141535, 'AVL_TRAIN'), (143991, 'UN_AVL'), (143991, 'AVL_TRAIN'), (146197, 'AVL_EVAL'), (149464, 'UN_AVL'), (151362, 'AVL_TRAIN'), (152118, 'UN_AVL'), (152118, 'AVL_TRAIN'), (157474, 'AVL_EVAL'), (161056, 'UN_AVL'), (167217, 'AVL_TRAIN'), (182917, 'AVL_EVAL'), (183273, 'AVL_TRAIN'), (183278, 'AVL_EVAL'), (198768, 'UN_AVL'), (198768, 'AVL_TRAIN'), (216668, 'AVL_EVAL'), (221269, 'AVL_TRAIN'), (225915, 'AVL_EVAL'), (230182, 'UN_AVL'), (230305, 'AVL_TRAIN'), (233655, 'AVL_EVAL'), (235888, 'AVL_TRAIN'), (237491, 'AVL_EVAL'), (238774, 'UN_AVL'), (239961, 'AVL_TRAIN'), (241331, 'UN_AVL'), (241331, 'AVL_TRAIN'), (247968, 'UN_AVL'), (247968, 'AVL_TRAIN'), (248563, 'UN_AVL'), (248563, 'AVL_TRAIN'), (260030, 'AVL_EVAL'), (265435, 'UN_AVL'), (265435, 'AVL_TRAIN'), (268837, 'AVL_EVAL'), (268837, 'AVL_TRAIN'), (269951, 'AVL_EVAL'), (269951, 'AVL_TRAIN'), (272345, 'AVL_EVAL'), (276611, 'AVL_TRAIN'), (276615, 'AVL_EVAL'), (276615, 'AVL_TRAIN'), (277148, 'AVL_EVAL'), (277577, 'AVL_TRAIN'), (280218, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_9.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_9.json index da6c78559..1f41b04bb 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_9.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_9.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "11.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (326273, 'AVL_EVAL'), (326273, 'AVL_TRAIN'), (329091, 'AVL_EVAL'), (329102, 'AVL_TRAIN'), (329588, 'AVL_EVAL'), (329700, 'AVL_TRAIN'), (330714, 'AVL_EVAL'), (344675, 'UN_AVL'), (358401, 'AVL_TRAIN'), (362855, 'AVL_EVAL'), (405327, 'UN_AVL'), (415692, 'AVL_TRAIN'), (425692, 'AVL_EVAL'), (444766, 'UN_AVL'), (444815, 'AVL_TRAIN'), (446975, 'AVL_EVAL'), (486646, 'UN_AVL'), (488898, 'AVL_TRAIN'), (490736, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (326273, 'AVL_TRAIN'), (329091, 'AVL_EVAL'), (329102, 'AVL_TRAIN'), (329588, 'AVL_EVAL'), (329700, 'AVL_TRAIN'), (330714, 'AVL_EVAL'), (336215, 'UN_AVL'), (358401, 'AVL_TRAIN'), (362855, 'AVL_EVAL'), (393211, 'UN_AVL'), (415692, 'AVL_TRAIN'), (425692, 'AVL_EVAL'), (438085, 'UN_AVL'), (444815, 'AVL_TRAIN'), (446975, 'AVL_EVAL'), (449413, 'UN_AVL'), (488898, 'AVL_TRAIN'), (490736, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_90.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_90.json index e1afa6361..cf383a628 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_90.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_90.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "38.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (9333, 'UN_AVL'), (23356, 'AVL_TRAIN'), (23404, 'UN_AVL'), (42877, 'AVL_EVAL'), (157978, 'AVL_TRAIN'), (159000, 'AVL_EVAL'), (159000, 'AVL_TRAIN'), (159010, 'AVL_EVAL'), (159098, 'AVL_TRAIN'), (160795, 'AVL_EVAL'), (160838, 'AVL_TRAIN'), (161315, 'AVL_EVAL'), (196950, 'UN_AVL'), (223180, 'AVL_TRAIN'), (225134, 'UN_AVL'), (238509, 'AVL_TRAIN'), (244735, 'AVL_EVAL'), (263439, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (23356, 'AVL_TRAIN'), (23404, 'UN_AVL'), (42877, 'AVL_EVAL'), (157459, 'UN_AVL'), (157978, 'AVL_TRAIN'), (159000, 'UN_AVL'), (159000, 'AVL_TRAIN'), (159010, 'UN_AVL'), (159098, 'AVL_TRAIN'), (160795, 'AVL_EVAL'), (160838, 'AVL_TRAIN'), (161315, 'AVL_EVAL'), (181770, 'UN_AVL'), (223180, 'AVL_TRAIN'), (225134, 'UN_AVL'), (238509, 'AVL_TRAIN'), (244735, 'AVL_EVAL'), (246225, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_91.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_91.json index 2fb26e30b..ffc1ebe09 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_91.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_91.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (1588, 'UN_AVL'), (34198, 'AVL_TRAIN'), (46611, 'AVL_EVAL'), (87990, 'UN_AVL'), (128949, 'AVL_TRAIN'), (129155, 'UN_AVL'), (129157, 'AVL_TRAIN'), (132593, 'AVL_EVAL'), (154209, 'UN_AVL'), (174627, 'AVL_TRAIN'), (175598, 'UN_AVL'), (216273, 'AVL_TRAIN'), (219936, 'AVL_EVAL'), (240188, 'UN_AVL'), (260088, 'AVL_TRAIN'), (264264, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (34198, 'AVL_TRAIN'), (46611, 'AVL_EVAL'), (68846, 'UN_AVL'), (128949, 'AVL_TRAIN'), (129155, 'UN_AVL'), (129157, 'AVL_TRAIN'), (132593, 'AVL_EVAL'), (143769, 'UN_AVL'), (174627, 'AVL_TRAIN'), (175598, 'UN_AVL'), (216273, 'AVL_TRAIN'), (219936, 'AVL_EVAL'), (230030, 'UN_AVL'), (260088, 'AVL_TRAIN'), (264264, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_92.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_92.json index 6fe8dab51..7665cc8f7 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_92.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_92.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "27.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (3831, 'UN_AVL'), (9788, 'AVL_TRAIN'), (19691, 'AVL_EVAL'), (19699, 'AVL_TRAIN'), (20211, 'AVL_EVAL'), (27163, 'AVL_TRAIN'), (59744, 'AVL_EVAL'), (94856, 'UN_AVL'), (102350, 'AVL_TRAIN'), (104233, 'UN_AVL'), (104253, 'AVL_TRAIN'), (106816, 'AVL_EVAL'), (106824, 'AVL_TRAIN'), (108827, 'AVL_EVAL'), (108912, 'AVL_TRAIN'), (148559, 'AVL_EVAL'), (193789, 'AVL_TRAIN'), (219531, 'AVL_EVAL'), (263417, 'UN_AVL'), (269174, 'AVL_TRAIN'), (272072, 'AVL_EVAL'), (274215, 'AVL_TRAIN'), (274350, 'AVL_EVAL'), (278421, 'UN_AVL'), (284956, 'AVL_TRAIN'), (324473, 'AVL_EVAL'), (351483, 'UN_AVL'), (358778, 'AVL_TRAIN'), (369320, 'AVL_EVAL'), (375925, 'AVL_TRAIN'), (392087, 'AVL_EVAL'), (441973, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9788, 'AVL_TRAIN'), (19691, 'AVL_EVAL'), (19699, 'AVL_TRAIN'), (20211, 'AVL_EVAL'), (27163, 'UN_AVL'), (27163, 'AVL_TRAIN'), (59744, 'AVL_EVAL'), (90218, 'UN_AVL'), (102350, 'AVL_TRAIN'), (104233, 'UN_AVL'), (104253, 'AVL_TRAIN'), (106816, 'AVL_EVAL'), (106824, 'AVL_TRAIN'), (108827, 'AVL_EVAL'), (108912, 'AVL_TRAIN'), (148559, 'AVL_EVAL'), (186803, 'UN_AVL'), (193789, 'AVL_TRAIN'), (219531, 'AVL_EVAL'), (255904, 'UN_AVL'), (269174, 'AVL_TRAIN'), (272072, 'UN_AVL'), (274215, 'AVL_TRAIN'), (274350, 'UN_AVL'), (284956, 'AVL_TRAIN'), (324473, 'AVL_EVAL'), (339619, 'UN_AVL'), (358778, 'AVL_TRAIN'), (369320, 'AVL_EVAL'), (375925, 'AVL_TRAIN'), (392087, 'AVL_EVAL'), (434661, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_93.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_93.json index 00c531cf0..93e490175 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_93.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_93.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "3.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'AVL_EVAL'), (21428, 'UN_AVL'), (54281, 'AVL_TRAIN'), (57479, 'AVL_EVAL'), (58058, 'AVL_TRAIN'), (63109, 'AVL_EVAL'), (85393, 'UN_AVL'), (87756, 'AVL_TRAIN'), (93953, 'AVL_EVAL'), (125011, 'UN_AVL'), (131836, 'AVL_TRAIN'), (133718, 'AVL_EVAL'), (138927, 'UN_AVL'), (155799, 'AVL_TRAIN'), (163846, 'AVL_EVAL'), (181623, 'UN_AVL'), (187465, 'AVL_TRAIN'), (198113, 'AVL_EVAL'), (243267, 'UN_AVL'), (248219, 'AVL_TRAIN'), (248659, 'UN_AVL'), (248847, 'AVL_TRAIN'), (249125, 'AVL_EVAL'), (249125, 'AVL_TRAIN'), (249126, 'AVL_EVAL'), (250963, 'UN_AVL'), (254880, 'AVL_TRAIN'), (257126, 'AVL_EVAL'), (258275, 'AVL_TRAIN'), (260891, 'AVL_EVAL'), (273466, 'UN_AVL'), (315289, 'AVL_EVAL'), (326217, 'AVL_TRAIN'), (328876, 'AVL_EVAL'), (339661, 'AVL_TRAIN'), (339948, 'AVL_EVAL'), (347444, 'UN_AVL'), (360321, 'AVL_TRAIN'), (377147, 'AVL_EVAL'), (390802, 'AVL_TRAIN'), (390836, 'AVL_EVAL'), (413796, 'UN_AVL'), (423239, 'AVL_TRAIN'), (428015, 'AVL_EVAL'), (428237, 'AVL_TRAIN'), (430464, 'AVL_EVAL'), (444036, 'UN_AVL'), (472433, 'AVL_TRAIN'), (478155, 'AVL_EVAL'), (493432, 'UN_AVL'), (504932, 'AVL_TRAIN'), (505650, 'UN_AVL'), (505661, 'AVL_TRAIN'), (507880, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (54281, 'AVL_TRAIN'), (57479, 'UN_AVL'), (58058, 'AVL_TRAIN'), (63109, 'AVL_EVAL'), (77110, 'UN_AVL'), (87756, 'AVL_TRAIN'), (93953, 'AVL_EVAL'), (99945, 'UN_AVL'), (131836, 'AVL_TRAIN'), (133718, 'UN_AVL'), (155799, 'AVL_TRAIN'), (163846, 'AVL_EVAL'), (174577, 'UN_AVL'), (187465, 'AVL_TRAIN'), (198113, 'AVL_EVAL'), (226265, 'UN_AVL'), (248219, 'AVL_TRAIN'), (248659, 'UN_AVL'), (248847, 'AVL_TRAIN'), (249125, 'UN_AVL'), (249125, 'AVL_TRAIN'), (249126, 'UN_AVL'), (254880, 'AVL_TRAIN'), (257126, 'UN_AVL'), (258275, 'AVL_TRAIN'), (260891, 'AVL_EVAL'), (264833, 'UN_AVL'), (315289, 'AVL_EVAL'), (323179, 'UN_AVL'), (326217, 'AVL_TRAIN'), (328876, 'AVL_EVAL'), (335248, 'UN_AVL'), (339661, 'AVL_TRAIN'), (339948, 'UN_AVL'), (360321, 'AVL_TRAIN'), (377147, 'AVL_EVAL'), (390802, 'AVL_TRAIN'), (390836, 'AVL_EVAL'), (400261, 'UN_AVL'), (423239, 'AVL_TRAIN'), (428015, 'AVL_EVAL'), (428237, 'AVL_TRAIN'), (430464, 'AVL_EVAL'), (440152, 'UN_AVL'), (472433, 'AVL_TRAIN'), (478155, 'AVL_EVAL'), (483568, 'UN_AVL'), (504932, 'AVL_TRAIN'), (505650, 'UN_AVL'), (505661, 'AVL_TRAIN'), (507880, 'AVL_EVAL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_94.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_94.json index f7c0f0f79..ecdc3fc17 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_94.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_94.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "9.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (344987, 'AVL_TRAIN'), (345555, 'UN_AVL'), (357059, 'AVL_TRAIN'), (358122, 'UN_AVL'), (358123, 'AVL_TRAIN'), (358275, 'UN_AVL'), (358284, 'AVL_TRAIN'), (358813, 'UN_AVL'), (359070, 'AVL_TRAIN'), (359932, 'UN_AVL'), (413829, 'AVL_EVAL'), (425544, 'UN_AVL'), (488395, 'AVL_TRAIN'), (493615, 'AVL_EVAL'), (509034, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (344987, 'AVL_TRAIN'), (345555, 'UN_AVL'), (357059, 'AVL_TRAIN'), (358122, 'UN_AVL'), (358123, 'AVL_TRAIN'), (358275, 'UN_AVL'), (358284, 'AVL_TRAIN'), (358813, 'UN_AVL'), (359070, 'AVL_TRAIN'), (359932, 'UN_AVL'), (413829, 'AVL_EVAL'), (419670, 'UN_AVL'), (488395, 'AVL_TRAIN'), (493615, 'AVL_EVAL'), (500285, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_95.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_95.json index 67064edce..99aaacfa5 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_95.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_95.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "12.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (14867, 'AVL_EVAL'), (17903, 'UN_AVL'), (17923, 'AVL_TRAIN'), (18959, 'UN_AVL'), (18973, 'AVL_TRAIN'), (154828, 'AVL_EVAL'), (160541, 'UN_AVL'), (175628, 'AVL_TRAIN'), (187570, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (17923, 'AVL_TRAIN'), (18959, 'UN_AVL'), (18973, 'AVL_TRAIN'), (154828, 'UN_AVL'), (175628, 'AVL_TRAIN'), (187570, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_96.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_96.json index 395d36bd4..6ba6f227b 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_96.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_96.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "5.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8361, 'AVL_TRAIN'), (13232, 'UN_AVL'), (122507, 'AVL_TRAIN'), (125889, 'UN_AVL'), (163049, 'AVL_TRAIN'), (169890, 'UN_AVL'), (243967, 'AVL_TRAIN'), (246801, 'UN_AVL'), (255850, 'AVL_TRAIN'), (256490, 'UN_AVL'), (258239, 'AVL_TRAIN'), (272150, 'AVL_EVAL'), (301332, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (8361, 'AVL_TRAIN'), (13232, 'UN_AVL'), (122507, 'AVL_TRAIN'), (125889, 'UN_AVL'), (163049, 'AVL_TRAIN'), (169890, 'UN_AVL'), (243967, 'AVL_TRAIN'), (246801, 'UN_AVL'), (255850, 'AVL_TRAIN'), (256490, 'UN_AVL'), (258239, 'AVL_TRAIN'), (272150, 'AVL_EVAL'), (301332, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_97.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_97.json index 074e766ee..ab3624d5e 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_97.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_97.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "7.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (77831, 'AVL_TRAIN'), (82222, 'UN_AVL'), (82631, 'AVL_TRAIN'), (86208, 'UN_AVL'), (86457, 'AVL_TRAIN'), (97952, 'AVL_EVAL'), (100673, 'UN_AVL'), (117083, 'AVL_TRAIN'), (146346, 'AVL_EVAL'), (146781, 'AVL_TRAIN'), (148083, 'AVL_EVAL'), (155226, 'AVL_TRAIN'), (155237, 'AVL_EVAL'), (175529, 'UN_AVL'), (196646, 'AVL_TRAIN'), (196649, 'UN_AVL'), (196650, 'AVL_TRAIN'), (196659, 'UN_AVL'), (196659, 'AVL_TRAIN'), (196669, 'UN_AVL'), (197206, 'AVL_TRAIN'), (197217, 'UN_AVL'), (197303, 'AVL_TRAIN'), (197310, 'UN_AVL'), (197531, 'AVL_TRAIN'), (198948, 'UN_AVL'), (198950, 'AVL_TRAIN'), (198989, 'UN_AVL'), (199028, 'AVL_TRAIN'), (199174, 'UN_AVL'), (199186, 'AVL_TRAIN'), (199215, 'UN_AVL'), (199312, 'AVL_TRAIN'), (199343, 'UN_AVL'), (199343, 'AVL_TRAIN'), (199425, 'UN_AVL'), (199438, 'AVL_TRAIN'), (199535, 'UN_AVL'), (199640, 'AVL_TRAIN'), (199708, 'UN_AVL'), (199730, 'AVL_TRAIN'), (199762, 'UN_AVL'), (199765, 'AVL_TRAIN'), (199775, 'UN_AVL'), (200015, 'AVL_TRAIN'), (200401, 'UN_AVL'), (200475, 'AVL_TRAIN'), (200484, 'UN_AVL'), (200485, 'AVL_TRAIN'), (200602, 'UN_AVL'), (200748, 'AVL_TRAIN'), (201348, 'UN_AVL'), (201423, 'AVL_TRAIN'), (201426, 'UN_AVL'), (201468, 'AVL_TRAIN'), (201641, 'UN_AVL'), (201648, 'AVL_TRAIN'), (201919, 'UN_AVL'), (201919, 'AVL_TRAIN'), (202474, 'UN_AVL'), (203089, 'AVL_TRAIN'), (203116, 'UN_AVL'), (203118, 'AVL_TRAIN'), (204529, 'UN_AVL'), (204691, 'AVL_TRAIN'), (206142, 'UN_AVL'), (206152, 'AVL_TRAIN'), (206314, 'UN_AVL'), (206315, 'AVL_TRAIN'), (207100, 'UN_AVL'), (207100, 'AVL_TRAIN'), (207140, 'UN_AVL'), (207151, 'AVL_TRAIN'), (216543, 'AVL_EVAL'), (216543, 'AVL_TRAIN'), (216545, 'AVL_EVAL'), (243957, 'UN_AVL'), (347630, 'AVL_TRAIN'), (348248, 'UN_AVL'), (349655, 'AVL_TRAIN'), (352809, 'UN_AVL'), (352809, 'AVL_TRAIN'), (352813, 'UN_AVL'), (371981, 'AVL_TRAIN'), (371989, 'UN_AVL'), (372076, 'AVL_TRAIN'), (372187, 'UN_AVL'), (372198, 'AVL_TRAIN'), (372290, 'UN_AVL'), (373313, 'AVL_TRAIN'), (373316, 'UN_AVL'), (373327, 'AVL_TRAIN'), (373406, 'UN_AVL'), (373766, 'AVL_TRAIN'), (374530, 'UN_AVL'), (374534, 'AVL_TRAIN'), (375626, 'UN_AVL'), (443017, 'AVL_TRAIN'), (449749, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (77831, 'AVL_TRAIN'), (82222, 'UN_AVL'), (82631, 'AVL_TRAIN'), (86208, 'UN_AVL'), (86457, 'AVL_TRAIN'), (97952, 'UN_AVL'), (117083, 'AVL_TRAIN'), (146346, 'AVL_EVAL'), (146781, 'AVL_TRAIN'), (148083, 'AVL_EVAL'), (155226, 'AVL_TRAIN'), (155237, 'AVL_EVAL'), (157309, 'UN_AVL'), (196646, 'AVL_TRAIN'), (196649, 'UN_AVL'), (196650, 'AVL_TRAIN'), (196659, 'UN_AVL'), (196659, 'AVL_TRAIN'), (196669, 'UN_AVL'), (197206, 'AVL_TRAIN'), (197217, 'UN_AVL'), (197303, 'AVL_TRAIN'), (197310, 'UN_AVL'), (197531, 'AVL_TRAIN'), (198948, 'UN_AVL'), (198950, 'AVL_TRAIN'), (198989, 'UN_AVL'), (199028, 'AVL_TRAIN'), (199174, 'UN_AVL'), (199186, 'AVL_TRAIN'), (199215, 'UN_AVL'), (199312, 'AVL_TRAIN'), (199343, 'UN_AVL'), (199343, 'AVL_TRAIN'), (199425, 'UN_AVL'), (199438, 'AVL_TRAIN'), (199535, 'UN_AVL'), (199640, 'AVL_TRAIN'), (199708, 'UN_AVL'), (199730, 'AVL_TRAIN'), (199762, 'UN_AVL'), (199765, 'AVL_TRAIN'), (199775, 'UN_AVL'), (200015, 'AVL_TRAIN'), (200401, 'UN_AVL'), (200475, 'AVL_TRAIN'), (200484, 'UN_AVL'), (200485, 'AVL_TRAIN'), (200602, 'UN_AVL'), (200748, 'AVL_TRAIN'), (201348, 'UN_AVL'), (201423, 'AVL_TRAIN'), (201426, 'UN_AVL'), (201468, 'AVL_TRAIN'), (201641, 'UN_AVL'), (201648, 'AVL_TRAIN'), (201919, 'UN_AVL'), (201919, 'AVL_TRAIN'), (202474, 'UN_AVL'), (203089, 'AVL_TRAIN'), (203116, 'UN_AVL'), (203118, 'AVL_TRAIN'), (204529, 'UN_AVL'), (204691, 'AVL_TRAIN'), (206142, 'UN_AVL'), (206152, 'AVL_TRAIN'), (206314, 'UN_AVL'), (206315, 'AVL_TRAIN'), (207100, 'UN_AVL'), (207100, 'AVL_TRAIN'), (207140, 'UN_AVL'), (207151, 'AVL_TRAIN'), (216543, 'UN_AVL'), (216543, 'AVL_TRAIN'), (216545, 'UN_AVL'), (347630, 'AVL_TRAIN'), (348248, 'UN_AVL'), (349655, 'AVL_TRAIN'), (352809, 'UN_AVL'), (352809, 'AVL_TRAIN'), (352813, 'UN_AVL'), (371981, 'AVL_TRAIN'), (371989, 'UN_AVL'), (372076, 'AVL_TRAIN'), (372187, 'UN_AVL'), (372198, 'AVL_TRAIN'), (372290, 'UN_AVL'), (373313, 'AVL_TRAIN'), (373316, 'UN_AVL'), (373327, 'AVL_TRAIN'), (373406, 'UN_AVL'), (373766, 'AVL_TRAIN'), (374530, 'UN_AVL'), (374534, 'AVL_TRAIN'), (375626, 'UN_AVL'), (443017, 'AVL_TRAIN'), (449749, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_98.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_98.json index 44b8ba58a..fe56dd927 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_98.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_98.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "17.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9041, 'AVL_TRAIN'), (11298, 'UN_AVL'), (11303, 'AVL_TRAIN'), (11925, 'UN_AVL'), (50425, 'AVL_EVAL'), (243734, 'UN_AVL'), (243983, 'AVL_TRAIN'), (244007, 'UN_AVL'), (244013, 'AVL_TRAIN'), (244343, 'UN_AVL'), (244365, 'AVL_TRAIN'), (244547, 'UN_AVL'), (244548, 'AVL_TRAIN'), (246255, 'UN_AVL'), (246286, 'AVL_TRAIN'), (250253, 'AVL_EVAL'), (270711, 'UN_AVL'), (270711, 'AVL_TRAIN'), (274195, 'UN_AVL'), (309232, 'AVL_TRAIN'), (309266, 'UN_AVL'), (309266, 'AVL_TRAIN'), (309491, 'UN_AVL'), (309492, 'AVL_TRAIN'), (312576, 'UN_AVL'), (312693, 'AVL_TRAIN'), (312702, 'UN_AVL'), (312716, 'AVL_TRAIN'), (315431, 'UN_AVL')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (9041, 'AVL_TRAIN'), (11298, 'UN_AVL'), (11303, 'AVL_TRAIN'), (11925, 'UN_AVL'), (243983, 'AVL_TRAIN'), (244007, 'UN_AVL'), (244013, 'AVL_TRAIN'), (244343, 'UN_AVL'), (244365, 'AVL_TRAIN'), (244547, 'UN_AVL'), (244548, 'AVL_TRAIN'), (246255, 'UN_AVL'), (246286, 'AVL_TRAIN'), (250253, 'UN_AVL'), (270711, 'AVL_TRAIN'), (274195, 'UN_AVL'), (309232, 'AVL_TRAIN'), (309266, 'UN_AVL'), (309266, 'AVL_TRAIN'), (309491, 'UN_AVL'), (309492, 'AVL_TRAIN'), (312576, 'UN_AVL'), (312693, 'AVL_TRAIN'), (312702, 'UN_AVL'), (312716, 'AVL_TRAIN'), (315431, 'UN_AVL')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_99.json b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_99.json index d6647b067..01119de7e 100644 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_99.json +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/trainer_99.json @@ -50,7 +50,7 @@ "comm_round": 3000, "ci": 0, "dataset": "agnews", - "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", + "data_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/data_files/agnews_data.h5", "partition_file_path": "/home/dgarg39/$FWDLLM_USER/fednlp_data/partition_files/agnews_partition.h5", "partition_method": "uniform", "fl_algorithm": "FedFwd", @@ -71,7 +71,7 @@ "var_control": true, "perturbation_sampling": true, "evaluate_during_training_steps": 100, - "fp16": false, + "fp16": true, "output_dir": "/tmp/", "is_debug_mode": 0, "fedprox_mu": 1, @@ -81,7 +81,7 @@ "manual_seed": 42, "client_num_in_total": 0, "warmup_ratio": 1, - "training_delay_enabled": "True", + "training_delay_enabled": "False", "training_delay_s": "36.0", "avl_events_mobiperf_3st_50": "[(0, 'AVL_TRAIN'), (227, 'AVL_EVAL'), (1757, 'AVL_TRAIN'), (2266, 'AVL_EVAL'), (15753, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'AVL_EVAL'), (86392, 'AVL_TRAIN'), (87189, 'AVL_EVAL'), (89364, 'AVL_TRAIN'), (89365, 'AVL_EVAL'), (97105, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'AVL_EVAL'), (160572, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", "avl_events_mobiperf_3st_75": "[(0, 'AVL_TRAIN'), (300, 'UN_AVL'), (1757, 'AVL_TRAIN'), (2266, 'UN_AVL'), (75638, 'AVL_TRAIN'), (76263, 'UN_AVL'), (77006, 'AVL_TRAIN'), (78334, 'UN_AVL'), (78346, 'AVL_TRAIN'), (78405, 'UN_AVL'), (78406, 'AVL_TRAIN'), (78543, 'UN_AVL'), (78549, 'AVL_TRAIN'), (79377, 'UN_AVL'), (79379, 'AVL_TRAIN'), (79486, 'UN_AVL'), (79495, 'AVL_TRAIN'), (79791, 'UN_AVL'), (79796, 'AVL_TRAIN'), (80059, 'UN_AVL'), (80071, 'AVL_TRAIN'), (80937, 'UN_AVL'), (80967, 'AVL_TRAIN'), (80978, 'UN_AVL'), (80979, 'AVL_TRAIN'), (81195, 'UN_AVL'), (81206, 'AVL_TRAIN'), (85845, 'UN_AVL'), (86392, 'AVL_TRAIN'), (87189, 'UN_AVL'), (89364, 'AVL_TRAIN'), (89365, 'UN_AVL'), (102534, 'AVL_TRAIN'), (124202, 'UN_AVL'), (168151, 'AVL_TRAIN'), (168659, 'UN_AVL'), (168714, 'AVL_TRAIN'), (169183, 'UN_AVL'), (170940, 'AVL_TRAIN'), (171086, 'UN_AVL'), (172784, 'AVL_TRAIN')]", diff --git a/lib/python/examples/fwdllm/expts/run_tc_expts/run_text_classification.sh b/lib/python/examples/fwdllm/expts/run_tc_expts/run_text_classification.sh index 7f16e16ab..db7887d45 100755 --- a/lib/python/examples/fwdllm/expts/run_tc_expts/run_text_classification.sh +++ b/lib/python/examples/fwdllm/expts/run_tc_expts/run_text_classification.sh @@ -8,8 +8,9 @@ LR=$2 FL_ALG=$3 total_client_num=$4 LOG_LEVEL=$5 +ENABLE_WATCHDOG=${6:-false} # Set to "true" to enable error checking, defaults to "false" -pkill -f "gaurav.*/fl_main.py" +pkill -f "$FWDLLM_USER.*fl_main.py" if [ $? -eq 0 ]; then echo "Successfully killed some processes." else @@ -138,16 +139,53 @@ else LOG_SUFFIX="fedFwd_${model_type}_${DATA_NAME}_lr${LR}_client_num_${client_num_per_round}_numerical_${RUN_TIMESTAMP}" AGG_LOG_FILE="$LOG_DIR/test_agg_${LOG_SUFFIX}.log" TRAINER_LOG_FILE="$LOG_DIR/test_trainer_${LOG_SUFFIX}.log" + PARENT_PID=$$ + + # Function to check logs for errors and kill all processes if found + check_errors() { + # Use basic grep (-e for multiple patterns) to find the first file with an error + FOUND_ERR_FILE=$(grep -l -e "Error" -e "Exception" -e "Traceback" "$AGG_LOG_FILE" "$TRAINER_LOG_FILE" | head -n 1) + + if [ ! -z "$FOUND_ERR_FILE" ]; then + echo "--------------------------------------------------------" + echo "ERROR DETECTED in $FOUND_ERR_FILE! Shutting down..." + echo "--------------------------------------------------------" + # Show the first few errors using basic grep + ERR_MSG=$(grep -e "Error" -e "Exception" -e "Traceback" "$FOUND_ERR_FILE" | head -n 20) + + # Append termination message to both logs + TERMINATION_MSG="Killed spawned processes due to error in $FOUND_ERR_FILE\n\n$ERR_MSG" + echo -e "$TERMINATION_MSG" >> "$AGG_LOG_FILE" + echo -e "$TERMINATION_MSG" >> "$TRAINER_LOG_FILE" + echo -e "$TERMINATION_MSG" + + # Trigger the graceful cleanup and exit + # We kill the parent process with TERM; the trap will handle the rest. + kill -TERM $PARENT_PID + exit 1 + fi + } EXPANDED_TMP_DIR="${REPO_PATH}/tmp_expanded_configs_${RUN_TIMESTAMP}" mkdir -p "$EXPANDED_TMP_DIR" - # Clean up expanded configs on exit (but keep log files!) + # Clean up expanded configs and background processes on exit cleanup() { - echo "Cleaning up expanded JSONs: $EXPANDED_TMP_DIR" - rm -rf "$EXPANDED_TMP_DIR" + echo "Cleaning up processes and temporary files..." + # 1. Kill the watchdog first to prevent recursive calls + if [ ! -z "$WATCHDOG_PID" ]; then + kill $WATCHDOG_PID 2>/dev/null + fi + # 2. Kill all python trainer/aggregator processes + pkill -f "$FWDLLM_USER.*fl_main.py" + # 3. Remove temp directory + if [ -d "$EXPANDED_TMP_DIR" ]; then + echo "Removing temporary directory: $EXPANDED_TMP_DIR" + rm -rf "$EXPANDED_TMP_DIR" + fi } - trap cleanup EXIT + # Trap common termination signals + trap cleanup EXIT INT TERM # cleanup function is called no matter how the script ends (normal exit, error, or manual termination) # substitute env variables in the temp files AGG_SRC="$REPO_PATH/lib/python/examples/fwdllm/expts/run_tc_expts/json_scripts/aggregator.json" @@ -164,6 +202,9 @@ else echo "started agg" sleep 10 # Give aggregator time to set up + if [ "$ENABLE_WATCHDOG" = "true" ]; then + check_errors + fi NUM_AVAIL_GPUS=8 @@ -181,11 +222,28 @@ else --config "$TRAIN_EXPANDED" \ --log_level "$LOG_LEVEL" \ >> "$TRAINER_LOG_FILE" 2>&1 & + if [ "$ENABLE_WATCHDOG" = "true" ]; then + check_errors + fi sleep 8 else echo "Trainer config not found, skipping: $TRAIN_SRC" fi done + # Start background periodic check (every 30 seconds) + # The watchdog will automatically exit if the parent process ($PARENT_PID) dies + if [ "$ENABLE_WATCHDOG" = "true" ]; then + ( + while kill -0 $PARENT_PID 2>/dev/null; do # Checks if the parent script is still alive + check_errors + sleep 30 + done + ) & + WATCHDOG_PID=$! + fi + wait + # Clean up watchdog on normal exit + kill $WATCHDOG_PID 2>/dev/null fi \ No newline at end of file diff --git a/lib/python/examples/fwdllm/trainer/forward_training/FedSgdTrainer.py b/lib/python/examples/fwdllm/trainer/forward_training/FedSgdTrainer.py index 7e32496d0..b5ae9271f 100755 --- a/lib/python/examples/fwdllm/trainer/forward_training/FedSgdTrainer.py +++ b/lib/python/examples/fwdllm/trainer/forward_training/FedSgdTrainer.py @@ -340,37 +340,37 @@ def update_model(self, weights): # logger.info(f"NRL: Updated model weights: {weights}") self.trainer.set_model_params(weights) - def update_dataset(self, client_index, round_idx=None): - logger.info(f"NRL: Updated client index: {client_index}, round: {round_idx}") - self.client_index = client_index - self.train_local = [self.train_data_local_dict[id] for id in client_index] - self.local_sample_number = self.train_data_local_num_dict[client_index[0]] - - self.test_local = self.test_data_local_dict[client_index[0]] - - self.train_local_list = [ - [data for data in self.train_local[i]] for i in range(len(self.train_local)) - ] - - # Write all training data for each client to separate files - # Only write if we haven't written during initialization - if not self.data_written_to_file: - logger.info( - f"Writing training data to files during update_dataset for clients {client_index}" - ) - for i, client_id in enumerate(client_index): - if i < len(self.train_local): - self._write_client_data_to_file( - client_id, self.train_local[i], round_idx - ) - self.data_written_to_file = True - logger.info( - "Successfully wrote training data for all clients during update_dataset" - ) - else: - logger.info( - "Training data already written to files during initialization, skipping update_dataset write" - ) + # def update_dataset(self, client_index, round_idx=None): + # logger.info(f"NRL: Updated client index: {client_index}, round: {round_idx}") + # self.client_index = client_index + # self.train_local = [self.train_data_local_dict[id] for id in client_index] + # self.local_sample_number = self.train_data_local_num_dict[client_index[0]] + + # self.test_local = self.test_data_local_dict[client_index[0]] + + # self.train_local_list = [ + # [data for data in self.train_local[i]] for i in range(len(self.train_local)) + # ] + + # # Write all training data for each client to separate files + # # Only write if we haven't written during initialization + # if not self.data_written_to_file: + # logger.info( + # f"Writing training data to files during update_dataset for clients {client_index}" + # ) + # for i, client_id in enumerate(client_index): + # if i < len(self.train_local): + # self._write_client_data_to_file( + # client_id, self.train_local[i], round_idx + # ) + # self.data_written_to_file = True + # logger.info( + # "Successfully wrote training data for all clients during update_dataset" + # ) + # else: + # logger.info( + # "Training data already written to files during initialization, skipping update_dataset write" + # ) def train(self, round_idx=None): logger.info("entered train where weights = params and not grad") @@ -383,18 +383,7 @@ def train(self, round_idx=None): return weights, self.local_sample_number @timer_decorator - def train_with_data_id(self): - # Create FwdLLMStage for timing/metrics logging - self.fwd_llm_stage = FwdLLMStage( - self._round, self.data_id, self.iteration_per_data_id, self.trainer_id - ) - - if self.abort_training == True: - logger.info( - f"Aborting training for trainer id: {self.trainer_id} because it has already sent updates for iteration_per_data_id: {self.iteration_per_data_id}" - ) - return - + def _check_availability(self): if self.avl_state != TrainerAvailState.AVL_TRAIN: if self.wait_until_next_avl: logger.info( @@ -409,7 +398,44 @@ def train_with_data_id(self): logger.info( f"Trainer id {self.trainer_id} is not available to train. Exiting training." ) - return + return False + return True + + @timer_decorator + def _perform_training(self, list_index): + self.trainer.train( + [self.train_local_list[0][list_index]], self.device, self.args, + {"round_id": self._round, "data_id": self.data_id, "iteration": self.iteration_per_data_id} + ) + self.grad_for_var_check = self.trainer.model_trainer.grad_for_var_check + + @timer_decorator + def _emulate_training_delay(self): + if self.training_delay_enabled == "True": + # Eval is 3X faster than training on CPU + # Eval on NPUs is 10-50X is faster than training on CPUs. We could take 20X if we wanted to consider an all-NPU client cohort for Eval (NPUs don't support training) + eval_delay = self.training_delay_s / 3.0 + time.sleep(eval_delay / self.speedup_factor) + logger.info( + f"Delayed eval time for trainer " + f"{self.trainer_id} by {eval_delay}s. Sleeping for {eval_delay / self.speedup_factor}s." + ) + + @timer_decorator + def train_with_data_id(self): + # Create FwdLLMStage for timing/metrics logging + self.fwd_llm_stage = FwdLLMStage( + self._round, self.data_id, self.iteration_per_data_id, self.trainer_id + ) + + if self.abort_training == True: + logger.info( + f"Aborting training for trainer id: {self.trainer_id} because it has already sent updates for iteration_per_data_id: {self.iteration_per_data_id}" + ) + return + + if not self._check_availability(): + return logger.info( f"starting training for trainer id: {self.trainer_id}, data_id = {self.data_id}" @@ -426,25 +452,13 @@ def train_with_data_id(self): # list_index = self._model_version % self._round if self._model_version > self._round else self._model_version list_index = self.data_id # Which data bin to use for training logging.info(f"self._model_version: {self._model_version } - list-index/data-id = {list_index}") - self.trainer.train( - [self.train_local_list[0][list_index]], self.device, self.args, - {"round_id": self._round, "data_id": self.data_id, "iteration": self.iteration_per_data_id} - ) - - self.grad_for_var_check = self.trainer.model_trainer.grad_for_var_check - logger.debug(f"len of grad_for_var_check = {len(self.grad_for_var_check)}") + + + self._perform_training(list_index) # emulate delays in training (due to compute resource and/or # dataset size and/or network latency) - if self.training_delay_enabled == "True": - # Eval is 3X faster than training on CPU - # Eval on NPUs is 10-50X is faster than training on CPUs. We could take 20X if we wanted to consider an all-NPU client cohort for Eval (NPUs don't support training) - eval_delay = self.training_delay_s / 3.0 - time.sleep(eval_delay / self.speedup_factor) - logger.info( - f"Delayed eval time for trainer " - f"{self.trainer_id} by {eval_delay}s. Sleeping for {eval_delay / self.speedup_factor}s." - ) + self._emulate_training_delay() logger.info( f"completed training for trainer id: {self.trainer_id}, data_id = {self.data_id}" diff --git a/lib/python/examples/fwdllm/trainer/forward_training/fwdgrad_utils.py b/lib/python/examples/fwdllm/trainer/forward_training/fwdgrad_utils.py index 6a1fafb78..aa32b5f9d 100644 --- a/lib/python/examples/fwdllm/trainer/forward_training/fwdgrad_utils.py +++ b/lib/python/examples/fwdllm/trainer/forward_training/fwdgrad_utils.py @@ -61,6 +61,7 @@ def functional_get_loss( x (torch.Tensor): Input tensor for the PyTorch model. t (torch.Tensor): Targets. num_classes (int, optional): Maximum number of classes. Defaults to 10. + buffers (list): Model buffers. Returns: torch.Tensor: Cross-entropy loss. diff --git a/lib/python/examples/fwdllm/trainer/forward_training/tc_transformer_trainer_distribute.py b/lib/python/examples/fwdllm/trainer/forward_training/tc_transformer_trainer_distribute.py index 62587dfd7..e18af29ac 100755 --- a/lib/python/examples/fwdllm/trainer/forward_training/tc_transformer_trainer_distribute.py +++ b/lib/python/examples/fwdllm/trainer/forward_training/tc_transformer_trainer_distribute.py @@ -21,6 +21,7 @@ import functorch as fc import gc import os +import contextlib from flame.monitor.runtime import timer_decorator, FwdLLMStage logger = logging.getLogger(__name__) @@ -219,6 +220,12 @@ def __init__( self.total_rng_iter = 0 + # Optimization: cache fmodel, params, and buffers to avoid recreation + self.fmodel = None + self.params = None + self.buffers = None + self.grad_for_var_check = None + # def initialize(self) -> None: """Initialize role.""" self.device = # torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -289,19 +296,6 @@ def _select_optimal_perturbations(self, device, logging_state): index += 1 return v_buffer - @timer_decorator - def _compute_batch_stat_utility(self, device, x, labels): - with torch.no_grad(): - pred = self.model(x) - if hasattr(pred, "logits"): - logits = pred.logits - elif isinstance(pred, (tuple, list)): - logits = pred[0] - else: - logits = pred - loss = self.base_trainer.oort_loss(logits, labels.view(-1), epoch=0, batch_idx=0, reduction="mean") - logging.debug(f"stat_utility for trainerId: {self.trainer_id} is {self.base_trainer._stat_utility}, loss: {loss.mean().item()}") - @timer_decorator def _prepare_perturbation_tensors(self, device, v_buffer): if self.args.perturbation_sampling and v_buffer != {}: @@ -395,7 +389,51 @@ def train_model(self, device=None, logging_state=None): self._make_model_functional(device) self.log_memory("after_fmodel_setup", device) - global_step, tr_loss = 0, 0.0 + + + @timer_decorator + def _force_cuda_memory_cleanup(self, device, tag): + gc.collect() + torch.cuda.empty_cache() + self.log_memory(tag, device) + + @timer_decorator + def _setup_training_state(self, device, logging_state): + @timer_decorator + def _select_optimal_perturbations(self, device, logging_state): + if self.args.var_control: + self.grad = None if self.old_grad is None else [g.clone() for g in self.old_grad] + + v_buffer = {} + all_perturbations_hash = "" + selected_perturbation_hash = "" + index = 0 + for k, v in self.model.named_parameters(): + if self.grad is not None and v.requires_grad: + self.total_rng_iter += 1 + shape = v.shape + candidate_v = _randn_wrapper((1 * 10, *shape), device="cpu", generator=self.torch_rng, logging_state=logging_state, param_name=k) + # torch.randn((1 * 10, *shape), device="cpu", generator=self.torch_rng) + target_grad = self.grad[index] + + target_grad = torch.flatten(target_grad) + candidate_v = torch.flatten(candidate_v, start_dim=1) + + logging.debug(f"candidate_v for client_idx {self.args.client_idx} is {_calculate_hash(candidate_v)} for param_name {k}") + all_perturbations_hash = _calculate_rolling_hash(candidate_v, all_perturbations_hash) + + cos_sim = calculate_cos_sim(candidate_v, target_grad, device) + + sorted_values, sorted_indices = torch.sort(cos_sim, descending=True) + v_buffer[index] = [ + candidate_v[i].reshape(v.shape) for i in sorted_indices[:1] + ] + + del candidate_v, target_grad, cos_sim, sorted_indices, shape + index += 1 + return v_buffer + + self.log_memory("after_fmodel_setup", device) v_buffer = {} # Perturbation selection logic slightly differs from the vanilla FwdLLM implementation. Their logic has a flaw which cannot be used in a true-FL setting @@ -411,40 +449,153 @@ def train_model(self, device=None, logging_state=None): or self.grad is None or len(self.grad) != len(self.params) ): - self.grad = [torch.zeros_like(p, device="cpu") for p in self.params] + # Optimization: Initialize on device to avoid Host to Device transfer every batch + self.grad = [torch.zeros_like(p, device=device) for p in self.params] else: - for fg in self.grad: - fg.zero_() + # Ensure gradients are on the correct device (they might have been moved to CPU in a previous round) + self.grad = [fg.to(device).zero_() for fg in self.grad] + + return v_buffer - with torch.no_grad(): - for epoch in range(self.args.epochs): - logging.info(f"train_dl size: {len(self.train_dl)}") - for batch_idx, batch in enumerate(self.train_dl): - curr_client_idx = self.args.client_idx - self.log_memory( - f"epoch{epoch}_batch{batch_idx}_client{curr_client_idx}_start", - device, + @timer_decorator + def _train_one_batch(self, device, batch, epoch, batch_idx, v_buffer): + @timer_decorator + def _compute_batch_stat_utility(self, device, x, labels): + with torch.no_grad(): + pred = self.model(x) + if hasattr(pred, "logits"): + logits = pred.logits + elif isinstance(pred, (tuple, list)): + logits = pred[0] + else: + logits = pred + loss = self.base_trainer.oort_loss(logits, labels.view(-1), epoch=0, batch_idx=0, reduction="mean") + # Optimization: removed .item() to avoid GPU sync + logging.debug(f"stat_utility for trainerId: {self.trainer_id} is {self.base_trainer._stat_utility}, loss: {loss.mean()}") + + @timer_decorator + def _prepare_perturbation_tensors(self, device, v_buffer): + if self.args.perturbation_sampling and v_buffer != {}: + v_params = [ + ( + v_buffer[i][0].to(device) + if p.requires_grad + else torch.zeros_like(p).to(device) ) + for i, p in enumerate(self.params) + ] + else: + v_params = [ + ( + torch.randn_like(p, device=device) + if p.requires_grad + else torch.zeros_like(p, device=device) + ) + for p in self.params + ] + return v_params + + @timer_decorator + def _compute_forward_jvp(self, device, x, labels, v_params): + # def wrapped_func(p): + # return functional_get_loss( + # p, + # self.fmodel, + # x, + # labels, + # num_classes=self.num_labels, + # buffers=self.buffers, + # ) + # loss, jvp = calculate_jvp_experiment(wrapped_func, self.params, v_params) + + f = partial( + functional_get_loss, + model=self.fmodel, + buffers = self.buffers, + num_classes = self.num_labels, + x=x, + t=labels, + ) + + loss, jvp = calculate_jvp(f, self.params, v_params) + jvp = jvp.to(device) + return loss, jvp + + @timer_decorator + def _accumulate_and_extract_grads(self, device, jvp, v_params): + # Optimization: Accumulate on device. self.grad should be on device. + for j, fg in enumerate(self.grad): + updated = (jvp * v_params[j]) # Keep on device + fg.add_(updated) + if self.args.var_control and j == self.layer_id_for_check: + self.grad_for_var_check = updated.detach().cpu() # Move to CPU only for check + + curr_client_idx = self.args.client_idx + + # Zero-Sync Sequence Length Check on CPU (Prevents GPU Stalls) + # batch[2] is typically the attention_mask. Use it before moving to device. + max_seq_len_in_batch = (batch[2] != 0).sum(dim=1).max().item() + if max_seq_len_in_batch > self.args.max_seq_length: + logging.warning( + f"Trainer {self.trainer_id}: Batch sequence length ({max_seq_len_in_batch}) " + f"exceeds max_seq_length ({self.args.max_seq_length}). This may lead to truncated inputs or memory issues." + ) + + if batch_idx == 0 and epoch == 0: + logging.debug(f"Max active sequence length in first batch: {max_seq_len_in_batch}") - x = batch[1].to(device, non_blocking=True) - labels = batch[4].to(device, non_blocking=True) + self.log_memory( + f"epoch{epoch}_batch{batch_idx}_client{curr_client_idx}_start", + device, + ) + + x = batch[1].to(device, non_blocking=True) + labels = batch[4].to(device, non_blocking=True) + + # Stat-utility calculation + _compute_batch_stat_utility(self, device, x, labels) - # Stat-utility calculation - self._compute_batch_stat_utility(device, x, labels) + v_params = _prepare_perturbation_tensors(self, device, v_buffer) + logging.debug(f"v_params hashes: {[(_calculate_hash(v), v.shape) for v in v_params if v.requires_grad]}") + logging.debug(f"params hashes: {[(_calculate_hash(p), p.shape) for p in self.params]}") - v_params = self._prepare_perturbation_tensors(device, v_buffer) - logging.debug(f"v_params hashes: {[(_calculate_hash(v), v.shape) for v in v_params if v.requires_grad]}") - logging.debug(f"params hashes: {[(_calculate_hash(p), p.shape) for p in self.params]}") + loss, jvp = _compute_forward_jvp(self, device, x, labels, v_params) - loss, jvp = self._compute_forward_jvp(device, x, labels, v_params) + _accumulate_and_extract_grads(self, device, jvp, v_params) - self._accumulate_and_extract_grads(device, jvp, v_params) + # Optimization: Remove GC & buffer flushes from the batch loop + # self._force_cuda_memory_cleanup(device, f"epoch{epoch}_batch{batch_idx}_end") + + if hasattr(self, "base_trainer"): + self.base_trainer.normalize_stat_utility(epoch) + logging.debug( + f"stat_utility - normalized for trainerId: {self.trainer_id} = {self.base_trainer._stat_utility}" + ) + + return loss + + @timer_decorator + def _training_loop(self, device, v_buffer): + global_step = 0 + # Optimization: Accumulate loss on GPU as a tensor to avoid frequent Host to Device syncs + tr_loss = torch.tensor(0.0, device=device) + + # Optimization: Use autocast for training loop if enabled + from torch.cuda.amp import autocast + autocast_cm = autocast() if self.args.fp16 else contextlib.nullcontext() + logging.debug(f"Autocast enabled: {self.args.fp16}") + + with torch.no_grad(), autocast_cm: + for epoch in range(self.args.epochs): + logging.info(f"train_dl size: {len(self.train_dl)}") + for batch_idx, batch in enumerate(self.train_dl): + loss = self._train_one_batch(device, batch, epoch, batch_idx, v_buffer) - current_loss = loss.item() - tr_loss += current_loss + # Optimization: Keep tr_loss on device. + tr_loss += loss global_step += 1 logging.info( - f"epoch = {epoch}, trainer_id = {self.trainer_id}, loss = {current_loss}" + f"epoch = {epoch}, trainer_id = {self.trainer_id}, loss = {loss}" ) if ( @@ -456,15 +607,11 @@ def train_model(self, device=None, logging_state=None): if self.args.is_debug_mode == 1 and global_step > 3: break - del x, labels, jvp, v_params, loss - self._force_cuda_memory_cleanup(device, f"epoch{epoch}_batch{batch_idx}_end") - - if hasattr(self, "base_trainer"): - self.base_trainer.normalize_stat_utility(epoch) - logging.debug( - f"stat_utility - normalized for trainerId: {self.trainer_id} = {self.base_trainer._stat_utility}" - ) + del batch, loss + return global_step, tr_loss + @timer_decorator + def _finalize_training(self, device, global_step, tr_loss): trainable_params = [p for p in self.model.parameters() if p.requires_grad] gradients = [p.grad for p in trainable_params if p.grad is not None] logging.info( @@ -477,39 +624,78 @@ def train_model(self, device=None, logging_state=None): f"Gradients: {len(gradients)} | Size: {human_readable_size(get_size_in_bytes(gradients))}" ) - # Final cleanup - del self.fmodel, self.params, self.buffers - self.fmodel, self.params, self.buffers = None, None, None - - if self.args.perturbation_sampling: - del v_buffer + # Optimization: Set p.grad only ONCE at the end of training + for p, fg in zip(self.model.parameters(), self.grad): + if p.requires_grad: + p.grad = fg.clone() # Already on device self.grad = [fg.detach().cpu() for fg in self.grad] - if hasattr(self, "grad_for_var_check"): + if self.grad_for_var_check is not None: self.grad_for_var_check = self.grad_for_var_check.detach().cpu() - gc.collect() - torch.cuda.empty_cache() - allocated_after = torch.cuda.memory_allocated(device) self.log_memory("end", device) logging.info( - f"[MEM] Allocated Before/After: {allocated_before/1e6:.2f}MB → {allocated_after/1e6:.2f}MB, Δ: {(allocated_after-allocated_before)/1e6:.2f}MB | trainer id: {self.trainer_id}" + f"[MEM] Allocated Before/After: {self.allocated_before/1e6:.2f}MB \u2192 {allocated_after/1e6:.2f}MB, \u0394: {(allocated_after-self.allocated_before)/1e6:.2f}MB | trainer id: {self.trainer_id}" ) - # self.model.train() # See comment about self.model.eval() above. TL;DR: This is used to make training deterministic - return global_step, tr_loss / global_step if global_step > 0 else 0.0 + return (tr_loss / global_step).item() if global_step > 0 else 0.0 + + @timer_decorator + def train_model(self, device=None, logging_state=None): + if not device: + device = self.device + + if logging_state: + self.fwd_llm_stage = FwdLLMStage( + logging_state.get("round_id"), + logging_state.get("data_id"), + logging_state.get("iteration"), + self.trainer_id + ) + + self.log_memory("train_model_start", device) + self.allocated_before = torch.cuda.memory_allocated(device) + + self._make_model_functional(device) + + v_buffer = self._setup_training_state(device, logging_state) + + global_step, tr_loss = self._training_loop(device, v_buffer) + + avg_loss = self._finalize_training(device, global_step, tr_loss) + + if self.args.perturbation_sampling: + del v_buffer + + return global_step, avg_loss + @timer_decorator def eval_model(self, epoch=0, global_step=0, device=None): + """ + Evaluate the model and compute metrics. + + As a future optimization, consider the following improvements mirroring the aggregator: + 1. Autocasting: Wrap the evaluation loop with `torch.cuda.amp.autocast()` to leverage TensorCores. + 2. Keep on GPU: Accumulate `eval_loss`, `preds`, and `out_label_ids` on GPU as `torch.tensor` + and only move to CPU at the end to avoid frequent pipeline flushes. + 3. One-time GPU transfer: Cache evaluation data in GPU memory if it doesn't change between calls. + """ if not device: device = self.device + # Ensure model is on the correct device + self.model.to(device) + # todo: Make sure that the model doesn't need to be put back into train mode using: `self.model.train()` before this method returns self.model.eval() + # Optimization: use cached functional model components self.fmodel, self.params, self.buffers = fc.make_functional_with_buffers( self.model ) + self.params = [p.to(device) for p in self.params] + self.buffers = [b.to(device) for b in self.buffers] eval_loss, nb_eval_steps = 0.0, 0 n_batches = len(self.test_dl) @@ -541,8 +727,9 @@ def eval_model(self, epoch=0, global_step=0, device=None): out_label_ids[start_index:end_index] = labels.detach().cpu().numpy() del x, labels, output, logits, loss - torch.cuda.empty_cache() - gc.collect() + # Optimization: Remove GC & buffer flushes from the batch loop + # torch.cuda.empty_cache() + # gc.collect() nb_eval_steps += 1 @@ -557,11 +744,11 @@ def eval_model(self, epoch=0, global_step=0, device=None): self.results.update(result) logging.info(self.results) - # Free memory - del self.fmodel, self.params, self.buffers - self.fmodel, self.params, self.buffers = None, None, None - gc.collect() - torch.cuda.empty_cache() + # Optimization: keep functional components + # del self.fmodel, self.params, self.buffers + # self.fmodel, self.params, self.buffers = None, None, None + + # Optimization: GC and buffer flushes moved to the framework level. return result, model_outputs, wrong diff --git a/lib/python/flame/mode/horizontal/syncfl/fwdllm_aggregator.py b/lib/python/flame/mode/horizontal/syncfl/fwdllm_aggregator.py index 55cb88868..f16ccb62f 100644 --- a/lib/python/flame/mode/horizontal/syncfl/fwdllm_aggregator.py +++ b/lib/python/flame/mode/horizontal/syncfl/fwdllm_aggregator.py @@ -71,13 +71,13 @@ SEND_TIMEOUT_WAIT_S = 90 # 90 seconds timeout -@timer_decorator -def recv_fifo_wrapper(channel, ends): - logger.debug("Entering recv_fifo_wrapper generator loop") - for msg, metadata in channel.recv_fifo(ends): - logger.debug(f"Yielding msg from {metadata}") - yield msg, metadata - logger.debug("Exiting recv_fifo_wrapper") +# @timer_decorator +# def recv_fifo_wrapper(channel, ends): +# logger.debug("Entering recv_fifo_wrapper generator loop") +# for msg, metadata in channel.recv_fifo(ends): +# logger.debug(f"Yielding msg from {metadata}") +# yield msg, metadata +# logger.debug("Exiting recv_fifo_wrapper") class TopAggregator(AsyncTopAgg): """Top level Aggregator implements an ML aggregation @@ -445,7 +445,6 @@ def _aggregate_grads_async(self, tag: str) -> None: """ Aggregate local model GRADIENTS asynchronously for FwdLLM. - This method is overridden from AsyncTopAgg. It receives gradients, aggregates them until _agg_goal is met, then performs FwdLLM variance check and model update. """ @@ -457,79 +456,95 @@ def _aggregate_grads_async(self, tag: str) -> None: if channel.ends(VAL_CH_STATE_RECV) is None: logger.info("no ends yet") return - time.sleep(0.1) # Slight delay to allow messages to arrive + # time.sleep(0.1) # Slight delay to allow messages to arrive msg, metadata = next(channel.recv_fifo(channel.ends(VAL_CH_STATE_RECV), 1)) - end, _ = metadata + end, timestamp = metadata if not msg: logger.debug(f"No data from {end}; skipping it") return + # Use new extracted helper + if not self._process_single_trainer_message(channel, msg, end, timestamp): + return + + logger.info(f"Received and processed grads from {end}.") + + if self._agg_goal_cnt < self._agg_goal: + logger.info(f"Agg goal not met. Have {self._agg_goal_cnt}/{self._agg_goal}") + channel.set_end_property( + end, PROP_UPDATE_COUNT, self._updates_recevied.get(end, 0) + 1 + ) + return + + if self._agg_goal_cnt == self._agg_goal: + self._process_aggregation_goal_met(tag, channel, is_async=True) + + + + @timer_decorator + def _process_single_trainer_message(self, channel, msg, end, timestamp): + if MessageType.MODEL_VERSION in msg: + version = msg[MessageType.MODEL_VERSION] + if version != self._model_version: + logger.info(f"Received grad with staleness={self._model_version-version}.") + if self.reject_stale_updates == True: + if version != self._model_version: + logger.info( + f"Rejecting trainer update from {end} of version {version}, " + f"agg self._model_version: {self._model_version}. Will return." + ) + if self.is_async: + channel.cleanup_provided_ends(end) + else: + channel.cleanup_recvd_end(end) + return False + if MessageType.GRADIENTS in msg and MessageType.GRADIENTS_FOR_VAR_CHECK in msg: logger.info( - f"Received gradients from {end} " + f"received gradients from {end} " f"with model version {msg[MessageType.MODEL_VERSION]}" ) + self._agg_goal_cnt += 1 + channel.set_end_property( end, PROP_LAST_SELECTED_ROUND, msg[MessageType.MODEL_VERSION] ) channel.set_end_property( end, PROP_LAST_EVAL_ROUND, msg[MessageType.MODEL_VERSION] ) - # receiving stat_utility for every update from trainer - if MessageType.STAT_UTILITY in msg: - logger.info( - f"received stat_utility from {end} " - f"msg[MessageType.STAT_UTILITY] = {msg[MessageType.STAT_UTILITY]}" - ) - channel.set_end_property( - end, PROP_STAT_UTILITY, msg[MessageType.STAT_UTILITY] - ) - elif MessageType.STAT_UTILITY in msg: - logger.info( - f"Received eval-only message from {end}, " - f"stat_utility {msg[MessageType.STAT_UTILITY]}" + logger.debug( + f"Getting channel property {PROP_ROUND_START_TIME} for " + f"end {end}" ) - channel.set_end_property( - end, PROP_STAT_UTILITY, msg[MessageType.STAT_UTILITY] + round_start_time_tup = channel.get_end_property( + end, PROP_ROUND_START_TIME ) - channel.set_end_property( - end, PROP_LAST_EVAL_ROUND, msg[MessageType.MODEL_VERSION] + logger.debug( + f"Returned round_start_time_tup: {round_start_time_tup} for " + f"end {end} and timestamp {timestamp}" ) - channel._selector.trainer_eval_recv_ends.append(end) - channel._selector.remove_from_selected_ends(channel._ends, end) - channel._selector._cleanup_removed_ends(end) - return else: logger.error( f"Invalid message received from {end} in aggregate_weights: {msg}" ) - return + return False - # TODO: Check if we want to discard after putting in the queue - if self.reject_stale_updates == True: - logger.info("Check trainer model version, disallow stale updates") - if MessageType.MODEL_VERSION in msg: - version = msg[MessageType.MODEL_VERSION] - logger.info( - f"Model version aggregator: {self._model_version}, Model version trainer: {version}" - ) - - if version != self._model_version: - logger.info( - f"Rejecting stale update with staleness: {self._model_version-version}. Trainer update version: {version}, " - f" self._model_version: {self._model_version}" - ) - channel.cleanup_provided_ends(end) - return + logger.debug(f"received data from {end}") + channel.set_end_property(end, PROP_ROUND_END_TIME, (self._round, timestamp)) channel._selector.ordered_updates_recv_ends.append(end) self._updates_in_queue += 1 + self._per_round_update_list.append(end) + + if end not in self._updates_recevied.keys(): + self._updates_recevied[end] = 1 + else: + self._updates_recevied[end] += 1 if MessageType.GRADIENTS in msg: trainer_gradients = msg[MessageType.GRADIENTS] version_for_rate = msg[MessageType.MODEL_VERSION] - grad_for_var_check = ( msg[MessageType.GRADIENTS_FOR_VAR_CHECK] if MessageType.GRADIENTS_FOR_VAR_CHECK in msg @@ -543,89 +558,109 @@ def _aggregate_grads_async(self, tag: str) -> None: grad_for_var_check=grad_for_var_check, ) - # del trainer_gradients # Free memory - if MessageType.GRADIENTS_FOR_VAR_CHECK in msg: - self.grad_for_var_check_list.append( - msg[MessageType.GRADIENTS_FOR_VAR_CHECK] - ) + # We already append in aggregate_grads_from_trainers if provided, + # but syncing original logic which had a duplicate check: + if grad_for_var_check is None: # prevent double append if we just did it + self.grad_for_var_check_list.append( + msg[MessageType.GRADIENTS_FOR_VAR_CHECK] + ) + count = 0 if MessageType.DATASET_SIZE in msg: count = msg[MessageType.DATASET_SIZE] - channel.set_end_property(end, PROP_DATASET_SIZE, count) - - logger.info(f"Received and processed grads from {end}.") - - self._agg_goal_cnt += 1 - - if self._agg_goal_cnt < self._agg_goal: - logger.info(f"Agg goal not met. Have {self._agg_goal_cnt}/{self._agg_goal}") channel.set_end_property( - end, PROP_UPDATE_COUNT, self._updates_recevied.get(end, 0) + 1 + end, PROP_DATASET_SIZE, count ) - return - if self._agg_goal_cnt == self._agg_goal: + if MessageType.STAT_UTILITY in msg: logger.info( - f"Aggregation goal {self._agg_goal} reached. Performing FwdLLM aggregation." + f"received stat_utility from {end} " + f"msg[MessageType.STAT_UTILITY] {msg[MessageType.STAT_UTILITY]}" + ) + channel.set_end_property( + end, PROP_STAT_UTILITY, msg[MessageType.STAT_UTILITY] ) - self.grad_pool.append(self.grad) - self.add_local_trained_result( - 0, self.grad, self._agg_goal_cnt - ) # Assuming 0 is ok + version = msg.get(MessageType.MODEL_VERSION, "unknown") + logger.info( + f"Received grads from {end}. It was trained on model version {version}, with {count} samples" + ) + return True - self.fmodel, self.params, self.buffers = fc.make_functional_with_buffers( - self.model - ) - self.grad = [torch.zeros_like(p) for p in self.params] - self.aggregate(self._round) # This sets self.var and self.var_good_enough + @timer_decorator + def _process_aggregation_goal_met(self, tag, channel, is_async=False): + logger.info( + f"Aggregation goal {self._agg_goal} reached. Performing FwdLLM aggregation." + ) - if self.var_good_enough: + self.grad_pool.append(self.grad) + self.add_local_trained_result( + 0, self.grad, self._agg_goal_cnt + ) - logger.info( - f"Variance check PASSED. Evaluating model and advancing data_id." - ) - self.iteration_per_data_id += 1 # This is iter 1 for the new data_id. - result, _, _ = self.eval_model() - logger.info( - f"Round {self._round}, Data ID {self.data_id} Eval Loss: {result['eval_loss']}" - ) - self.data_id += 1 - self.iteration_per_data_id = 0 # Reset iteration count - self._is_model_updated = True - self._model_version += 1 + self.fmodel, self.params, self.buffers = fc.make_functional_with_buffers( + self.model + ) + self.grad = [torch.zeros_like(p) for p in self.params] - if self.data_id == self.total_data_bins: - logger.info( - f"All data bins complete. Incrementing round to {self._round + 1}" - ) - self._round += 1 - self.data_id = 0 - channel.set_property( - "round", self._round - ) # Update channel property + self.aggregate(self._round) + if self.var_good_enough: + logger.info( + f"Variance check PASSED. Evaluating model and advancing data_id." + ) + self.iteration_per_data_id += 1 + result, _, _ = self.eval_model() + logger.info( + f"Round {self._round}, Data ID {self.data_id} Eval Loss: {result['eval_loss']}" + ) + self.data_id += 1 + self.iteration_per_data_id = 0 + self._is_model_updated = True + + if self.config.hyperparameters.inc_model_version_per_data_id: + self._model_version += 1 else: + self._model_version = self._round + + if self.data_id == self.total_data_bins: logger.info( - f"Variance check FAILED. Retrying on same data_id {self.data_id}." + f"All data bins complete. Incrementing round to {self._round + 1}" ) - self.iteration_per_data_id += 1 - self._is_model_updated = False + self._round += 1 + self.data_id = 0 + channel.set_property( + "round", self._round + ) + + else: + logger.info( + f"Variance check FAILED. Retrying on same data_id {self.data_id}." + ) + self.iteration_per_data_id += 1 + self._is_model_updated = False + + self._updates_in_queue -= self._agg_goal + self._agg_goal_cnt = 0 + + self.fwd_llm_stage = FwdLLMStage( + self._round, self.data_id, self.iteration_per_data_id + ) - self._updates_in_queue -= self._agg_goal - self._agg_goal_cnt = 0 # Reset for the next batch - # ASYNC: Clean up ends that just sent data + if is_async: logger.debug( "Agg goal reached, so resetting trainer end states in the channel" ) - channel.cleanup_recvd_ends() + channel.cleanup_recvd_ends() + + # Centralized cleanup + self._force_cuda_memory_cleanup() @timer_decorator def collect_and_accumulate_grads(self, tag, channel): """Aggregate trainer gradients synchronously, with timing and stage metadata.""" - # Create FwdLLMStage for timing/metrics logging self.fwd_llm_stage = FwdLLMStage(self._round, self.data_id, self.iteration_per_data_id, trainer_id=None) recv_ends = channel.ends() @@ -637,12 +672,6 @@ def collect_and_accumulate_grads(self, tag, channel): logger.info(f"Total ends: {len(recv_ends)}, required : {num_min_req}") num_min_req = min(num_min_req, len(recv_ends)) if self.ends_not_selected_yet: - # this is inefficient, but it will work - # can improve this by tracking how many clients need to be freed up - # If weights were not distributed in this iteration, async read messages from - # one trainer until required trainers are available to distribute weights to - # while maintaining concurrency. - # This is best effort sync aggregation, if agg goal is not met, we default to async. logger.info(f"We are waiting to clear up queue") num_min_req = min(num_min_req, 1) @@ -651,256 +680,38 @@ def collect_and_accumulate_grads(self, tag, channel): if not msg: logger.info(f"No data from {end}; skipping it") continue + + self._process_single_trainer_message(channel, msg, end, timestamp) - if MessageType.MODEL_VERSION in msg: - version = msg[MessageType.MODEL_VERSION] - - if self.reject_stale_updates == True: - if version != self._model_version: - logger.info( - f"Rejecting trainer update from {end} of version {version}, " - f"agg self._model_version: {self._model_version}. Will return." - ) - channel.cleanup_recvd_end(end) - # channel._selector.ordered_updates_recv_ends.append(end) - continue - - if ( - MessageType.GRADIENTS in msg - and MessageType.GRADIENTS_FOR_VAR_CHECK in msg - ): - logger.info( - f"received gradients from {end} " - f"with model version {msg[MessageType.MODEL_VERSION]}" - ) - self._agg_goal_cnt += 1 - - # For OORT selector NOTE: (DG) Last selected round should have - # ideally been set in distribute weights. But it was here in the - # old oort code and ive kept it. Instead of - # PROP_LAST_SELECTED_ROUND, it should have been - # PROP_LAST_UPDATE_RECVD_ROUND. - channel.set_end_property( - end, PROP_LAST_SELECTED_ROUND, msg[MessageType.MODEL_VERSION] - ) - - # Set last eval round for the trainer since training also means - # that eval was done for the same round. - channel.set_end_property( - end, PROP_LAST_EVAL_ROUND, msg[MessageType.MODEL_VERSION] - ) - # calculate round duration for this end, if the round number - # information is identical with round_start_time - logger.debug( - f"Getting channel property {PROP_ROUND_START_TIME} for " - f"end {end}" - ) - round_start_time_tup = channel.get_end_property( - end, PROP_ROUND_START_TIME - ) - end = metadata[0] - timestamp = metadata[1] - logger.debug( - f"Returned round_start_time_tup: {round_start_time_tup} for " - f"end {end} and timestamp {timestamp}" - ) - - # TODO: (DG) Also set the end property for task=eval done at - # timestamp=current. - - else: - logger.error( - f"Invalid message received from {end} in aggregate_weights: {msg}" - ) - return - - logger.debug(f"received data from {end}") - channel.set_end_property(end, PROP_ROUND_END_TIME, (round, timestamp)) - - # logger.debug(f"received message in agg_grads_sync {msg} from {end}") - # capture telemetry on trainer participation in rounds - channel._selector.ordered_updates_recv_ends.append(end) - self._updates_in_queue += 1 - self._per_round_update_list.append(end) - - if end not in self._updates_recevied.keys(): - self._updates_recevied[end] = 1 - else: - self._updates_recevied[end] += 1 - - # Process the gradients - if MessageType.GRADIENTS in msg: - trainer_gradients = msg[MessageType.GRADIENTS] - version_for_rate = msg[MessageType.MODEL_VERSION] - - grad_for_var_check = ( - msg[MessageType.GRADIENTS_FOR_VAR_CHECK] - if MessageType.GRADIENTS_FOR_VAR_CHECK in msg - else None - ) - - self.aggregate_grads_from_trainers( - trainer_gradients, - version_for_rate=version_for_rate, - stat_utility=channel.get_end_property(end, PROP_STAT_UTILITY), - grad_for_var_check=grad_for_var_check, - ) - - if MessageType.DATASET_SIZE in msg: - count = msg[MessageType.DATASET_SIZE] - channel.set_end_property( - end, PROP_DATASET_SIZE, msg[MessageType.DATASET_SIZE] - ) - - if MessageType.STAT_UTILITY in msg: - logger.info( - f"received stat_utility from {end} " - f"msg[MessageType.STAT_UTILITY] {msg[MessageType.STAT_UTILITY]}" - ) - channel.set_end_property( - end, PROP_STAT_UTILITY, msg[MessageType.STAT_UTILITY] - ) - - logger.info( - f"Received grads from {end}. It was trained on model version {version}, with {count} samples" - ) - - if self._agg_goal_cnt == self._agg_goal: + if self._agg_goal_cnt >= self._agg_goal: logger.info( f"Reached agg_goal of {self._agg_goal} since agg_goal_count is {self._agg_goal_cnt}. Breaking from for loop, proceeding to aggregate." ) break - # second loop to poll more if needed - # TODO(Aishwwarya): This stalls indefinitely when we have distributed weights but there are no more update to read. - num_freed = 0 # if at least one is freed, exit loop + # Second loop while self._agg_goal_cnt < self._agg_goal and not self.ends_not_selected_yet: for msg, metadata in channel.recv_fifo(channel.ends(), 1): end, timestamp = metadata if not msg: - logger.info(f"No data from {end}; skipping it") continue + + self._process_single_trainer_message(channel, msg, end, timestamp) - if MessageType.MODEL_VERSION in msg: - version = msg[MessageType.MODEL_VERSION] - - if self.reject_stale_updates == True: - if version != self._model_version: - logger.info( - f"Rejecting trainer update from {end} of version {version}, " - f"agg self._model_version: {self._model_version}. Will return." - ) - # num_freed += 1 - channel.cleanup_recvd_end(end) - # channel._selector.ordered_updates_recv_ends.append(end) - continue - - if ( - MessageType.GRADIENTS in msg - and MessageType.GRADIENTS_FOR_VAR_CHECK in msg - ): - logger.info( - f"received gradients from {end} " - f"with model version {msg[MessageType.MODEL_VERSION]}" - ) - self._agg_goal_cnt += 1 - - # For OORT selector NOTE: (DG) Last selected round should have - # ideally been set in distribute weights. But it was here in the - # old oort code and ive kept it. Instead of - # PROP_LAST_SELECTED_ROUND, it should have been - # PROP_LAST_UPDATE_RECVD_ROUND. - channel.set_end_property( - end, PROP_LAST_SELECTED_ROUND, msg[MessageType.MODEL_VERSION] - ) - - # Set last eval round for the trainer since training also means - # that eval was done for the same round. - channel.set_end_property( - end, PROP_LAST_EVAL_ROUND, msg[MessageType.MODEL_VERSION] - ) - # calculate round duration for this end, if the round number - # information is identical with round_start_time - logger.debug( - f"Getting channel property {PROP_ROUND_START_TIME} for " - f"end {end}" - ) - round_start_time_tup = channel.get_end_property( - end, PROP_ROUND_START_TIME - ) - end = metadata[0] - timestamp = metadata[1] - logger.debug( - f"Returned round_start_time_tup: {round_start_time_tup} for " - f"end {end} and timestamp {timestamp}" - ) - - # TODO: (DG) Also set the end property for task=eval done at - # timestamp=current. - - else: - logger.error( - f"Invalid message received from {end} in aggregate_weights: {msg}" - ) - return - - logger.debug(f"received data from {end}") - channel.set_end_property(end, PROP_ROUND_END_TIME, (round, timestamp)) - - # logger.debug(f"received message in agg_grads_sync {msg} from {end}") - # capture telemetry on trainer participation in rounds - channel._selector.ordered_updates_recv_ends.append(end) - self._updates_in_queue += 1 - self._per_round_update_list.append(end) - - if end not in self._updates_recevied.keys(): - self._updates_recevied[end] = 1 - else: - self._updates_recevied[end] += 1 - - # Process the gradients - if MessageType.GRADIENTS in msg: - # weights = weights_to_model_device(msg[MessageType.WEIGHTS], - # self.model) - trainer_gradients = msg[MessageType.GRADIENTS] - self.aggregate_grads_from_trainers(trainer_gradients) - - if MessageType.GRADIENTS_FOR_VAR_CHECK in msg: - logger.info( - f"received GRADIENTS_FOR_VAR_CHECK, {len(msg[MessageType.GRADIENTS_FOR_VAR_CHECK])}" - ) - self.grad_for_var_check_list.append( - msg[MessageType.GRADIENTS_FOR_VAR_CHECK] - ) - - if MessageType.DATASET_SIZE in msg: - count = msg[MessageType.DATASET_SIZE] - channel.set_end_property( - end, PROP_DATASET_SIZE, msg[MessageType.DATASET_SIZE] - ) - - if MessageType.STAT_UTILITY in msg: - channel.set_end_property( - end, PROP_STAT_UTILITY, msg[MessageType.STAT_UTILITY] - ) - stat_utility = msg[MessageType.STAT_UTILITY] - - logger.info( - f"Received grads from {end}. It was trained on model version {version}, with {count} samples" - ) - - if self._agg_goal_cnt == self._agg_goal: + if self._agg_goal_cnt >= self._agg_goal: logger.info( f"Reached agg_goal of {self._agg_goal} since agg_goal_count is {self._agg_goal_cnt}. Breaking from for loop, proceeding to aggregate." ) break + @timer_decorator def _aggregate_grads_sync(self, tag: str) -> None: """Aggregate trainer gradients synchronously.""" logger.info("starting aggregate_grads_sync") self.log_memory("start _aggregate_grads_sync", self.device) self.print_trainable_params_stats(location="[start,_aggregate_grads_sync()]") + if self.ends_not_selected_yet: logger.info("no ends selected yet") return @@ -910,9 +721,6 @@ def _aggregate_grads_sync(self, tag: str) -> None: return logger.debug(f"Channel {channel} found for tag {tag}") - # receive local model parameters from a trainer who arrives first NOTE: - # (DG) Right now, the leave notifications also cause a message to be - # processed and yield (None,None) from recv_fifo(). if channel.ends(VAL_CH_STATE_RECV) is None: logger.info("no ends yet") return @@ -922,89 +730,11 @@ def _aggregate_grads_sync(self, tag: str) -> None: logger.debug(f"received {len(self.cache)} trainer updates in cache") - # Proceed to aggregating gradients - # logger.info("calling aggregate for fwdllm (Sync)") - self.grad_pool.append(self.grad) - self.print_trainable_params_stats( - location="[agg_start,_aggregate_grads_sync()]" - ) - - self.add_local_trained_result(0, self.grad, self._agg_goal_cnt) - self.print_trainable_params_stats( - location="[after_add_local,_aggregate_grads_sync()]" - ) - self.fmodel, self.params, self.buffers = fc.make_functional_with_buffers( - self.model - ) - self.grad = [torch.zeros_like(p) for p in self.params] - if self._agg_goal_cnt < self._agg_goal: - # we enter this only if we have not distributed weights to enough clients - # and want to free up resources - - # skip the aggregation - # do not clean up the ends that we did not aggregate on yet - # we are already cleaning up the ends that gave back stale results - # channel.cleanup_recvd_ends() logger.info(f"did not reach agg goal, not aggregating") return - self.aggregate(self._round) - self.print_trainable_params_stats( - location="[after_aggregate(),_aggregate_grads_sync()]" - ) - self._agg_goal_cnt = 0 - # decrement counter since updates consumed from queue - self._updates_in_queue -= self._agg_goal - - round_to_print = self._round - data_id_to_print = self.data_id - - if self.var_good_enough: - # evaluate model to calculate loss - result, _, _ = self.eval_model() - logger.info(f"eval loss = {result['eval_loss']}") - self.data_id += 1 - self.iteration_per_data_id = 0 - self._is_model_updated = True - # TODO: need to replace it with per end property - if self.data_id == self.total_data_bins: - logger.info("incrementing round number now ") - self._round += 1 - self.data_id = 0 - channel.set_property("round", self._round) - - if self.config.hyperparameters.inc_model_version_per_data_id: - self._model_version += 1 - logger.info( - f"incrementing model version to {self._model_version} now, round id: {self._round}" - ) - else: - self._model_version = self._round - logger.info(f"Model version updated to: {self._model_version}") - - else: - self.iteration_per_data_id += 1 - self._is_model_updated = False - - logger.debug(f"aggregation finished for round {round_to_print}") - logger.info( - f"====== aggregation finished for round {round_to_print}, data id: {data_id_to_print}, " - f"self._agg_goal_cnt: {self._agg_goal_cnt}, self._updates_recevied: " - f"{self._updates_recevied}, self._trainer_participation_in_round_count: " - f"{self._trainer_participation_in_round_count}" - ) - - logger.info( - f"After round: {round_to_print}, remaining _updates_in_queue: " - f"{self._updates_in_queue}" - ) - - self.fwd_llm_stage = FwdLLMStage( - self._round, self.data_id, self.iteration_per_data_id - ) - channel.cleanup_recvd_ends() - + self._process_aggregation_goal_met(tag, channel, is_async=False) self.log_memory("end _aggregate_grads_sync", self.device) @timer_decorator @@ -1013,8 +743,7 @@ def _force_cuda_memory_cleanup(self): gc.collect() @timer_decorator - def invoke_gc(self, payload): - del payload + def invoke_gc(self): gc.collect() @timer_decorator @@ -1046,6 +775,9 @@ def eval_model(self, epoch=0, global_step=0, device=None): input_ids_all = self._cached_test_data[1] labels_all = self._cached_test_data[4] + # For Zero-Sync sequence length monitoring (CPU side source) + attention_mask_all_cpu = self.test_global.dataset.tensors[2] + # Accumulate predictions on GPU preds_gpu = torch.empty((test_sample_len, self.num_labels), device=device) out_label_ids_gpu = torch.empty(test_sample_len, dtype=labels_all.dtype, device=device) @@ -1056,6 +788,8 @@ def eval_model(self, epoch=0, global_step=0, device=None): from torch.cuda.amp import autocast import contextlib autocast_cm = autocast() if self.args.fp16 else contextlib.nullcontext() + if not self.args.fp16: logging.warning(f"Autocast is disabled: {self.args.fp16}") + with torch.no_grad(), autocast_cm: for batch_start_idx in range(0, test_sample_len, batch_size): batch_end_idx = min(batch_start_idx + batch_size, test_sample_len) @@ -1063,6 +797,15 @@ def eval_model(self, epoch=0, global_step=0, device=None): x = input_ids_all[batch_start_idx:batch_end_idx] labels = labels_all[batch_start_idx:batch_end_idx] + # Zero-Sync Sequence Length Check on CPU (May or may not prevent GPU Stalls) + mask_cpu = attention_mask_all_cpu[batch_start_idx:batch_end_idx] + max_seq_len_in_batch = (mask_cpu != 0).sum(dim=1).max().item() + if max_seq_len_in_batch > self.args.max_seq_length: + logger.warning( + f"Aggregator: Batch sequence length ({max_seq_len_in_batch}) " + f"exceeds max_seq_length ({self.args.max_seq_length}). This may lead to truncated inputs or memory issues." + ) + output = self.model(x) if hasattr(output, "logits"): logits = output.logits @@ -1134,55 +877,6 @@ def compute_metrics(self, preds, labels, eval_examples=None): wrong, ) - def oracular_trainer_avail_check(self, end: str) -> bool: - logger.debug("In oracular_trainer_avail_check") - - picked_trainer_is_available = True - - if end in self.trainer_unavail_durations.keys(): - # get aggregator seconds from start - agg_time_since_start_s = time.time() - self.agg_start_time_ts - - curr_trainer_unavail_list = self.trainer_unavail_durations[end] - - # iterate through unavailability list First, check if the current - # time is within any failure window - - for start_time, duration in curr_trainer_unavail_list: - if start_time <= agg_time_since_start_s < start_time + duration: - logger.debug( - f"### Trainer {end} attempted to be picked in failed " f"state." - ) - picked_trainer_is_available = False - return picked_trainer_is_available - else: - logger.debug(f"### Trainer {end} is available.") - picked_trainer_is_available = True - - # Remove entries that occurred in the past - updated_trainer_unavail_list = [ - (start_time, duration) - for start_time, duration in curr_trainer_unavail_list - if (start_time + duration) >= agg_time_since_start_s - ] - - # Remove end from trainer_unavail_durations if list is empty TODO: - # Check if deletion is happening properly - if len(updated_trainer_unavail_list) == 0: - logger.debug( - f"### Trainer {end} will no longer fail, removing from " - f"trainer_unavail_durations" - ) - del self.trainer_unavail_durations[end] - else: - self.trainer_unavail_durations[end] = updated_trainer_unavail_list - else: - logger.info( - f"No info on end {end} in self.trainer_unavail_durations" - f", returning TRUE (default)" - ) - return picked_trainer_is_available - def hearbeat_trainer_avail_check(self, end: str) -> bool: picked_trainer_is_available = True last_acceptable_heartbeat_ts = time.time() - ( @@ -1247,6 +941,69 @@ def check_trainer_availability(self, end: str) -> bool: return picked_trainer_is_available + + @timer_decorator + def _prepare_distribution_payload(self, task_to_perform: str): + if self.var: + logger.info( + f"self.var = {self.var}, self.var_threshold = {self.var_threshold}" + ) + + if not self.var_good_enough: + logger.info( + "Sending variance = bad to trainers since variance is greater than threshold" + ) + logger.info("Variance is BAD. Sending request for more samples.") + return { + MessageType.VAR: "bad", + MessageType.ROUND: self._round, + MessageType.MODEL_VERSION: self._model_version, + MessageType.TASK_TO_PERFORM: task_to_perform, + MessageType.DATA_ID: self.data_id, + MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, + } + + logger.info( + "Will send new weights to ends since variance is less than threshold" + ) + logger.info( + "Variance is GOOD. Preparing new model weights and grad_pool." + ) + + self.print_trainable_params_stats(location="[_prepare_distribution_payload]") + trainable_params = self.get_trainable_param_state_dict() + + shared_weights = weights_to_device(trainable_params, DeviceType.CPU) + + shared_grad_pool = self.aggregate_grad_pool(self.grad_pool) + shared_grad_pool_trainable = [] + if shared_grad_pool is None: + shared_grad_pool_trainable = None + else: + idx = 0 + for param in self.model.parameters(): + if param.requires_grad: + shared_grad_pool_trainable.append(shared_grad_pool[idx].clone()) + idx += 1 + + payload = { + MessageType.WEIGHTS: shared_weights, + MessageType.GRAD_POOL: shared_grad_pool_trainable, + MessageType.ROUND: self._round, + MessageType.MODEL_VERSION: self._model_version, + MessageType.TASK_TO_PERFORM: task_to_perform, + MessageType.DATA_ID: self.data_id, + MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, + } + + if self._is_model_updated: + self.grad_pool = [] + self.grad_for_var_check_list = [] + self._is_model_updated = False + + return payload + + @timer_decorator def _distribute_weights_sync( self, tag: str, task_to_perform: str = "train" ) -> None: @@ -1255,7 +1012,6 @@ def _distribute_weights_sync( trainers, not the actual model weights. This method is overridden from one in synchronous top aggregator - (..top_aggregator). """ logger.info(f"Device for agg: {next(self.model.parameters()).device}") @@ -1264,36 +1020,14 @@ def _distribute_weights_sync( logger.debug(f"channel not found for tag {tag}") return - # this call waits for at least one peer to join this channel channel.await_join() global_model_params = self.get_global_model_params() - self.weights = global_model_params # TODO: check this, not sure where self.weights is initialised - # before distributing weights, update it from global model - # self._update_weights() - - # busy wait for 0.1 seconds before proceeding. This is to wait on - # distribute_weights to let the system state get updated before selector - # is invoked again - + self.weights = global_model_params + logger.debug(f"Starting busy wait at time {time.time()}") time.sleep(0.1) logger.debug(f"Ended busy wait at time {time.time()}") - # before invoking channel.ends() to select, set the trainer_unavail if - # it isn't None if self.trainer_unavail_durations is not None: - # curr_unavail_trainer_list = self.get_curr_unavail_trainers() - # channel.set_curr_unavailable_trainers( - # trainer_unavail_list=curr_unavail_trainer_list ) - # logger.debug(f"Passed curr_unavail_trainer_list: " - # f"{curr_unavail_trainer_list} to channel") else: # Handling the - # case for oort's selector since it expects 3 # arguments - # channel.set_curr_unavailable_trainers(trainer_unavail_list=[]) - - # check if there are any ends to send weights to - - # logger.info( f"Sending weights to trainers with task_to_perform = - # {task_to_perform}" ) - if self.trainer_event_dict is not None: curr_unavail_trainer_list = self.get_curr_unavail_trainers() channel.set_curr_unavailable_trainers( @@ -1303,8 +1037,7 @@ def _distribute_weights_sync( f"Passed curr_unavail_trainer_list: " f"{curr_unavail_trainer_list} to channel" ) - - else: # Handling the case for oort's selector since it expects 3 # arguments + else: channel.set_curr_unavailable_trainers(trainer_unavail_list=[]) ends = channel.ends(VAL_CH_STATE_SEND, task_to_perform) @@ -1313,35 +1046,17 @@ def _distribute_weights_sync( self.ends_not_selected_yet = True else: self.ends_not_selected_yet = False - # NRL TODO: else will take care of randomly selecting x trainers for - # "eval only" operation + if not ends: logger.debug( f"No trainers found for tag {tag}, will " f"move to get() for fetch weights from trainers" ) return - if self.var: - logger.info( - f"self.var = {self.var}, self.var_threshold = {self.var_threshold}" - ) - if self.var_good_enough == True: - logger.info( - "Will send new weights to ends since variance is less than threshold" - ) - else: - logger.info( - "Sending variance = bad to trainers since variance is greater than threshold" - ) - - # send out global model parameters to trainers - self.print_trainable_params_stats(location="[populate_params, _distr_weights]") - trainable_params = self.get_trainable_param_state_dict() - self.print_param_dict_stats(trainable_params, location="After filtering") + + payload = self._prepare_distribution_payload(task_to_perform) for end in ends: - # setting start time for OORT TODO: (DG) round_start_time for all - # trainers in the same round may not be the same logger.debug( f"Setting channel property {PROP_ROUND_START_TIME} for " f"end {end}. For round {self._round} at time: {datetime.now()}" @@ -1350,116 +1065,24 @@ def _distribute_weights_sync( end, PROP_ROUND_START_TIME, (self._round, datetime.now()) ) - # we use _round to indicate a model version - # logger.info(f"sending data id: {self.data_id}") - payload = None - if self.var_good_enough == True: + if self.var_good_enough: logger.info( f"sending weights to {end} with model_version: {self._model_version}, data_id: {self.data_id} for task: {task_to_perform}" ) - - shared_weights = weights_to_device(trainable_params, DeviceType.CPU) - - shared_grad_pool = self.aggregate_grad_pool(self.grad_pool) - shared_grad_pool_trainable = [] - if shared_grad_pool == None: - shared_grad_pool_trainable = None - else: - idx = 0 - for param in self.model.parameters(): - if param.requires_grad: - shared_grad_pool_trainable.append(shared_grad_pool[idx].clone()) - idx += 1 - - payload = { - MessageType.WEIGHTS: shared_weights, - MessageType.GRAD_POOL: shared_grad_pool_trainable, - MessageType.ROUND: self._round, - MessageType.MODEL_VERSION: self._model_version, - MessageType.TASK_TO_PERFORM: task_to_perform, - MessageType.DATA_ID: self.data_id, - MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, - } - sizes_mb = { - key.name if hasattr(key, "name") else str(key): len( - pickle.dumps(value) - ) - / (1024 * 1024) - for key, value in payload.items() - } - total_size_mb = sum(sizes_mb.values()) - - logger.info( - f"[DEBUG] Payload size breakdown for {end}: " - + ", ".join([f"{k}: {v:.2f} MB" for k, v in sizes_mb.items()]) - + f", Total: {total_size_mb:.2f} MB" - ) - - channel.send(end, payload) - # Added a 1 second sleep so as to not overwhelm mqtt and cuda - time.sleep(1) - - self.grad_pool = [] - self.grad_for_var_check_list = [] else: logger.info( f"sending var = bad to {end} with model_version: {self._model_version}, round: {self._round}, data_id: {self.data_id} for task: {task_to_perform}" ) - payload = { - MessageType.VAR: "bad", - MessageType.ROUND: self._round, - MessageType.MODEL_VERSION: self._model_version, - MessageType.TASK_TO_PERFORM: task_to_perform, - MessageType.DATA_ID: self.data_id, - MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, - } - msg_bytes = pickle.dumps(payload) - logger.info( - f"[DEBUG] Payload size for {end}: {len(msg_bytes) / (1024 * 1024):.2f} MB" - ) - channel.send(end, payload) - # Added a 0.5 second sleep so as to not overwhelm mqtt - # time.sleep(0.5) - self.invoke_gc(payload) + + channel.send(end, payload) + logger.info(f"Sent weights to {end}") - # Update send_time in training_duration_s - if end not in self._track_trainer_version_duration_s.keys(): - logger.debug( - f"{end} not in _track_trainer_version_duration_s, " f"will add" - ) - self._track_trainer_version_duration_s[end] = dict() - self._track_trainer_version_duration_s[end]["last_send_wts_ts"] = -1 - - # sent_wts_version_ts, recv_wts_version_ts is a dict of version - # sent/recv and its timestamp. This will be primarily used by - # AsyncOORT selector since it needs round_duration times. TODO: - # (DG) Right now the dict maintains ALL sent/recv versions and - # timestamps for all trainers. For thousands of trainers it - # might incur memory-bloat. Can optimize to retain just the - # versions and timestamps of those that were sent but not - # received back for the trainer. - self._track_trainer_version_duration_s[end]["sent_wts_version_ts"] = {} - self._track_trainer_version_duration_s[end]["recv_wts_version_ts"] = {} - self._track_trainer_version_duration_s[end][ - "total_training_time_s" - ] = -1 - - # Update sent_wts_version_ts with version and timestamp - self._track_trainer_version_duration_s[end]["sent_wts_version_ts"][ - self._model_version - ] = datetime.now() + @timer_decorator def _distribute_weights_async( self, tag: str, task_to_perform: str = "train" ) -> None: - """ - Distribute a global model in asynchronous FL fashion - for FwdLLM. - This method actually sends either gradients or calc_more_var to - trainers, not the actual model weights. - - This method is overridden from one in asynchronous top aggregator - (..top_aggregator). - """ + """Distribute a global model in asynchronous FL fashion - for FwdLLM.""" channel = self.cm.get_by_tag(tag) if not channel: logger.debug(f"channel not found for tag {tag}") @@ -1490,7 +1113,6 @@ def _distribute_weights_async( f"Sending weights to trainers with task_to_perform = {task_to_perform}" ) - # check if there are any ends to send weights to self._curr_agg_version = ( self._model_version, self.data_id, @@ -1506,11 +1128,11 @@ def _distribute_weights_async( trainer_version_states=self._trainer_state_dict, ) logger.info(f"ends: {ends}") - # TODO: check in agg_weights if ends is None if ends is None: self.ends_not_selected_yet = True else: self.ends_not_selected_yet = False + if not ends: logger.debug( f"No trainers found for tag {tag}, will " @@ -1532,6 +1154,7 @@ def _distribute_weights_async( logger.info( "Sending variance = bad to trainers since variance is greater than threshold" ) + if self.var_good_enough: logger.info( f"sending weights to {ends} with model_version: {self._model_version}, round: {self._round}, data_id: {self.data_id} for task: {task_to_perform}" @@ -1539,49 +1162,19 @@ def _distribute_weights_async( logger.info( "Variance is GOOD. Preparing and sending new model weights and grad_pool." ) - self.print_trainable_params_stats( - location="[populate_params, _distr_weights]" - ) - trainable_params = self.get_trainable_param_state_dict() - shared_weights = weights_to_device(trainable_params, DeviceType.CPU) - - shared_grad_pool = self.aggregate_grad_pool(self.grad_pool) + + payload = self._prepare_distribution_payload(task_to_perform) - shared_grad_pool_trainable = [] - if shared_grad_pool is None: - shared_grad_pool_trainable = None - else: - idx = 0 - for param in self.model.parameters(): - if param.requires_grad: - shared_grad_pool_trainable.append(shared_grad_pool[idx].clone()) - idx += 1 - - # Clear pools after model has been updated and we move to the next round! if self._is_model_updated: - self.grad_pool = [] # update when model version is updated! - self.grad_for_var_check_list = [] # Update once agg goal met + self.grad_pool = [] + self.grad_for_var_check_list = [] self._is_model_updated = False - payload = { - MessageType.WEIGHTS: shared_weights, - MessageType.GRAD_POOL: shared_grad_pool_trainable, - MessageType.ROUND: self._round, - MessageType.MODEL_VERSION: self._model_version, - MessageType.TASK_TO_PERFORM: task_to_perform, - MessageType.DATA_ID: self.data_id, - MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, - } - # Clean up memory - # del shared_weights - # del shared_grad_pool - # del shared_grad_pool_trainable - else: logger.info( f"sending var = bad to {ends} with model_version: {self._model_version}, round: {self._round}, data_id: {self.data_id} for task: {task_to_perform}" ) - logger.info("Variance is BAD. Sending request for more variance checks.") + logger.info("Variance is BAD. Sending request for more samples.") payload = { MessageType.VAR: "bad", MessageType.ROUND: self._round, @@ -1591,43 +1184,16 @@ def _distribute_weights_async( MessageType.ITERATION_PER_DATA_ID: self.iteration_per_data_id, } - # ASYNC SEND LOOP (from AsyncTopAgg) for end in ends: - # Updated the trainer state dict - self._trainer_state_dict[end] = ( - self._model_version, - self.data_id, - self.iteration_per_data_id, - ) - - logger.info( - f"Sending payload to {end} with model_version: {self._model_version}, " - f"data_id: {self.data_id}, iter: {self.iteration_per_data_id}" + logger.debug( + f"Setting channel property {PROP_ROUND_START_TIME} for " + f"end {end}. For round {self._round} at time: {datetime.now()}" ) - - # Set OORT property (from AsyncTopAgg) channel.set_end_property( end, PROP_ROUND_START_TIME, (self._round, datetime.now()) ) - - # Send the payload channel.send(end, payload) - - # Track send time (from AsyncTopAgg) - if end not in self._track_trainer_version_duration_s.keys(): - self._track_trainer_version_duration_s[end] = { - "last_send_wts_ts": -1, - "sent_wts_version_ts": {}, - "recv_wts_version_ts": {}, - "total_training_time_s": 0, # Initialize to 0 - } - self._track_trainer_version_duration_s[end]["sent_wts_version_ts"][ - self._model_version - ] = datetime.now() - - # Clean up the large payload object - del payload - gc.collect() + logger.info(f"Sent weights to all ends") def _distribute_weights(self, tag: str, task_to_perform: str = "train") -> None: if self.is_async: @@ -1636,6 +1202,7 @@ def _distribute_weights(self, tag: str, task_to_perform: str = "train") -> None: else: logger.info("Inside distribute of sync") self._distribute_weights_sync(tag, task_to_perform) + def _aggregate_weights(self, tag: str) -> None: if self.is_async: diff --git a/lib/python/flame/mode/horizontal/syncfl/fwdllm_trainer.py b/lib/python/flame/mode/horizontal/syncfl/fwdllm_trainer.py index 9dfeb1542..54e087ec0 100644 --- a/lib/python/flame/mode/horizontal/syncfl/fwdllm_trainer.py +++ b/lib/python/flame/mode/horizontal/syncfl/fwdllm_trainer.py @@ -523,6 +523,12 @@ def _send_grads(self, tag: str) -> None: channel._selector._cleanup_send_ends() + # Optimization: Perform GC and CUDA memory cleanup after sending gradients. + # This moves the "stop the world" synchronous flushes out of the measured + # training/evaluation phases and into the idle time between rounds. + gc.collect() + torch.cuda.empty_cache() + def _perform_channel_leave(self, tag: str) -> None: logger.debug( f"In _perform_channel_leave for tag: {tag} " diff --git a/lib/python/flame/monitor/runtime.py b/lib/python/flame/monitor/runtime.py index cb01e706d..714079989 100644 --- a/lib/python/flame/monitor/runtime.py +++ b/lib/python/flame/monitor/runtime.py @@ -22,7 +22,8 @@ def timer_decorator(func): - """Decorator to time TopAggregator function and log round/data info.""" + """Decorator to time TopAggregator function and log round/data info. + Make sure to populate fwd_llm_stage within the function or in the same class for detailed logging.""" def wrapper(*args, **kwargs): logger.debug("Inside timer_decorator wrapper") diff --git a/scripts/calculate_seq_len_cdf.py b/scripts/calculate_seq_len_cdf.py new file mode 100644 index 000000000..1db15b564 --- /dev/null +++ b/scripts/calculate_seq_len_cdf.py @@ -0,0 +1,63 @@ +import h5py +import numpy as np +from transformers import DistilBertTokenizer +from tqdm import tqdm +import sys + +def calculate_stats(name, data): + print(f"\n--- {name} Sequence Length Stats ---") + print(f"Mean: {np.mean(data):.2f}") + print(f"Median: {np.median(data):.2f}") + print(f"P90: {np.percentile(data, 90):.2f}") + print(f"P95: {np.percentile(data, 95):.2f}") + print(f"P99: {np.percentile(data, 99):.2f}") + print(f"Max: {np.max(data)}") + return np.percentile(data, 95) + +def run_analysis(data_path, partition_path, model_name, method): + print(f"Initializing Tokenizer: {model_name}") + tokenizer = DistilBertTokenizer.from_pretrained(model_name) + + print(f"Loading Data: {data_path}") + data_file = h5py.File(data_path, "r") + x_group = data_file["X"] + + # Aggregator Analysis: Individual sequence lengths + agg_lengths = [] + for key in tqdm(x_group.keys(), desc="Aggregator (Global)"): + text = x_group[key][()].decode("utf-8") + agg_lengths.append(len(tokenizer.encode(text, add_special_tokens=True))) + + # Trainer Analysis: Max length per batch of 8 + print(f"Loading Partition: {partition_path}") + partition_file = h5py.File(partition_path, "r") + partition_data = partition_file[method]["partition_data"] + + trainer_batch_max_lengths = [] + batch_size = 8 + + for client_idx in tqdm(partition_data.keys(), desc="Trainer (Partitions)"): + indices = partition_data[client_idx]["train"][()] + for i in range(0, len(indices), batch_size): + batch_indices = indices[i:i + batch_size] + batch_lengths = [len(tokenizer.encode(x_group[str(idx)][()].decode("utf-8"), add_special_tokens=True)) for idx in batch_indices] + trainer_batch_max_lengths.append(max(batch_lengths)) + + data_file.close() + partition_file.close() + + agg_p95 = calculate_stats("Aggregator (Individual)", agg_lengths) + trainer_p95 = calculate_stats("Trainer (Max per Batch of 8)", trainer_batch_max_lengths) + + print(f"\nRecommended max_seq_length for 95% coverage:") + print(f"Aggregator: {int(agg_p95)}") + print(f"Trainer: {int(trainer_p95)}") + +if __name__ == "__main__": + # Parameters based on your input + DATA = "/Users/gaurav/Projects/fednlp_data/data_files/agnews_data.h5" + PARTITION = "/Users/gaurav/Projects/fednlp_data/partition_files/agnews_partition.h5" + MODEL = "distilbert-base-uncased" + METHOD = "uniform" + + run_analysis(DATA, PARTITION, MODEL, METHOD) diff --git a/scripts/plotters/cdf_plot.py b/scripts/plotters/cdf_plot.py index 07ba3e3e1..27ef21d98 100644 --- a/scripts/plotters/cdf_plot.py +++ b/scripts/plotters/cdf_plot.py @@ -14,7 +14,7 @@ 'input_csv': lambda: f'output/{CONSTANTS['file_prefix']}-train_latency.csv', 'latency_column': 'train_time_sec', 'output_filename': lambda: f'{CONSTANTS['file_prefix']}-train_latency_cdf.png', - 'x_label': 'Train Latency (seconds)', + 'x_label': 'Overall Train Latency (seconds)', 'y_label': 'CDF', 'title': lambda: f'Train Latency ({CONSTANTS['file_prefix']})' }, @@ -107,6 +107,87 @@ 'x_label': 'Force CUDA Memory Cleanup Latency (seconds)', 'y_label': 'CDF', 'title': lambda: f'Force CUDA Memory Cleanup Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'setup_training_state_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-setup_training_state_latency.csv', + 'latency_column': 'setup_training_state_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-setup_training_state_latency_cdf.png', + 'x_label': 'Setup Training State Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Setup Training State Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'train_one_batch_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-train_one_batch_latency.csv', + 'latency_column': 'train_one_batch_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-train_one_batch_latency_cdf.png', + 'x_label': 'Train One Batch Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Train One Batch Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'training_loop_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-training_loop_latency.csv', + 'latency_column': 'training_loop_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-training_loop_latency_cdf.png', + 'x_label': 'Training Loop Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Training Loop Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'finalize_training_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-finalize_training_latency.csv', + 'latency_column': 'finalize_training_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-finalize_training_latency_cdf.png', + 'x_label': 'Finalize Training Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Finalize Training Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'trainer_train_model_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-trainer_train_model_latency.csv', + 'latency_column': 'trainer_train_model_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-trainer_train_model_latency_cdf.png', + 'x_label': 'Trainer Train Model Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Trainer Train Model Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'trainer_eval_model_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-trainer_eval_model_latency.csv', + 'latency_column': 'trainer_eval_model_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-trainer_eval_model_latency_cdf.png', + 'x_label': 'Trainer Eval Model Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Trainer Eval Model Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'check_availability_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-check_availability_latency.csv', + 'latency_column': 'check_availability_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-check_availability_latency_cdf.png', + 'x_label': 'Check Availability Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Check Availability Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'perform_training_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-perform_training_latency.csv', + 'latency_column': 'perform_training_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-perform_training_latency_cdf.png', + 'x_label': 'Perform Training Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Perform Training Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'emulate_training_delay_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-emulate_training_delay_latency.csv', + 'latency_column': 'emulate_training_delay_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-emulate_training_delay_latency_cdf.png', + 'x_label': 'Emulate Training Delay Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'Emulate Training Delay Latency ({CONSTANTS["file_prefix"]})' } ], 'directory_name': 'plots/', @@ -161,6 +242,69 @@ 'y_label': 'CDF', 'title': lambda: f'Aggregate Runtime Latency ({CONSTANTS["file_prefix"]})' }, + { + 'latency_type': 'aggregate_grads_sync_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-aggregate_grads_sync_latency.csv', + 'latency_column': 'aggregate_grads_sync_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-aggregate_grads_sync_latency_cdf.png', + 'x_label': '_aggregate_grads_sync Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_aggregate_grads_sync Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'process_single_trainer_message_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-process_single_trainer_message_latency.csv', + 'latency_column': 'process_single_trainer_message_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-process_single_trainer_message_latency_cdf.png', + 'x_label': '_process_single_trainer_message Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_process_single_trainer_message Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'process_aggregation_goal_met_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-process_aggregation_goal_met_latency.csv', + 'latency_column': 'process_aggregation_goal_met_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-process_aggregation_goal_met_latency_cdf.png', + 'x_label': '_process_aggregation_goal_met Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_process_aggregation_goal_met Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'agg_force_cuda_memory_cleanup_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-agg_force_cuda_memory_cleanup_latency.csv', + 'latency_column': 'agg_force_cuda_memory_cleanup_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-agg_force_cuda_memory_cleanup_latency_cdf.png', + 'x_label': 'agg_force_cuda_memory_cleanup Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'agg_force_cuda_memory_cleanup Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'prepare_distribution_payload_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-prepare_distribution_payload_latency.csv', + 'latency_column': 'prepare_distribution_payload_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-prepare_distribution_payload_latency_cdf.png', + 'x_label': '_prepare_distribution_payload Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_prepare_distribution_payload Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'distribute_weights_sync_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-distribute_weights_sync_latency.csv', + 'latency_column': 'distribute_weights_sync_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-distribute_weights_sync_latency_cdf.png', + 'x_label': '_distribute_weights_sync Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_distribute_weights_sync Latency ({CONSTANTS["file_prefix"]})' + }, + { + 'latency_type': 'distribute_weights_async_latency', + 'input_csv': lambda: f'output/{CONSTANTS["file_prefix"]}-distribute_weights_async_latency.csv', + 'latency_column': 'distribute_weights_async_latency', + 'output_filename': lambda: f'{CONSTANTS["file_prefix"]}-distribute_weights_async_latency_cdf.png', + 'x_label': '_distribute_weights_async Latency (seconds)', + 'y_label': 'CDF', + 'title': lambda: f'_distribute_weights_async Latency ({CONSTANTS["file_prefix"]})' + }, ], 'directory_name': 'plots/', 'x_lim_left': 0, diff --git a/scripts/plotters/configs.py b/scripts/plotters/configs.py index 650315848..80dff09aa 100644 --- a/scripts/plotters/configs.py +++ b/scripts/plotters/configs.py @@ -169,6 +169,108 @@ def handle_stat_utility(parser: "LogParser", match: re.Match) -> Dict[str, Any]: 'trainer_id': ('trainer_id', None) } }, + { + 'name': 'process_single_trainer_message_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _process_single_trainer_message:\s(?P[\d\.]+)s\s" + r"\(Round=(?P\d+),\sDataId=(?P\d+),\sIter=(?P\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('process_single_trainer_message_latency', float), + 'round_id': ('round_id', int), + 'data_id': ('data_id', int), + 'iter_id': ('iteration_id', int), + 'trainer_id': ('trainer_id', None) + } + }, + { + 'name': 'process_aggregation_goal_met_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _process_aggregation_goal_met:\s(?P[\d\.]+)s\s" + r"\(Round=(?P\d+),\sDataId=(?P\d+),\sIter=(?P\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('process_aggregation_goal_met_latency', float), + 'round_id': ('round_id', int), + 'data_id': ('data_id', int), + 'iter_id': ('iteration_id', int), + 'trainer_id': ('trainer_id', None) + } + }, + { + 'name': 'agg_force_cuda_memory_cleanup_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _force_cuda_memory_cleanup:\s(?P[\d\.]+)s\s" + r"\(Round=(?PNone|\d+),\sDataId=(?PNone|\d+),\sIter=(?PNone|\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('agg_force_cuda_memory_cleanup_latency', float), + 'round_id': ('round_id', lambda x: None if x == 'None' else int(x)), + 'data_id': ('data_id', lambda x: None if x == 'None' else int(x)), + 'iter_id': ('iteration_id', lambda x: None if x == 'None' else int(x)), + 'trainer_id': ('trainer_id', None) + } + }, + { + 'name': 'prepare_distribution_payload_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _prepare_distribution_payload:\s(?P[\d\.]+)s\s" + r"\(Round=(?P\d+),\sDataId=(?P\d+),\sIter=(?P\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('prepare_distribution_payload_latency', float), + 'round_id': ('round_id', int), + 'data_id': ('data_id', int), + 'iter_id': ('iteration_id', int), + 'trainer_id': ('trainer_id', None) + } + }, + { + 'name': 'distribute_weights_sync_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _distribute_weights_sync:\s(?P[\d\.]+)s\s" + r"\(Round=(?P\d+),\sDataId=(?P\d+),\sIter=(?P\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('distribute_weights_sync_latency', float), + 'round_id': ('round_id', int), + 'data_id': ('data_id', int), + 'iter_id': ('iteration_id', int), + 'trainer_id': ('trainer_id', None) + } + }, + { + 'name': 'distribute_weights_async_latency', + 'regex': re.compile( + r"^(?P\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}).*?" + r"\[decorator\]\sRuntime of _distribute_weights_async:\s(?P[\d\.]+)s\s" + r"\(Round=(?P\d+),\sDataId=(?P\d+),\sIter=(?P\d+),\sTrainerId=(?P\w+|None)\)" + ), + 'type': 'EXTRACT', + 'group_to_columns': { + 'timestamp': ('timestamp', lambda ts_str: datetime.strptime(ts_str, '%Y-%m-%d %H:%M:%S,%f')), + 'runtime': ('distribute_weights_async_latency', float), + 'round_id': ('round_id', int), + 'data_id': ('data_id', int), + 'iter_id': ('iteration_id', int), + 'trainer_id': ('trainer_id', None) + } + }, ], "flame_fwdllm_trainer": [ { @@ -529,6 +631,41 @@ def handle_stat_utility(parser: "LogParser", match: re.Match) -> Dict[str, Any]: 'log_names': ['aggregate_runtime'], 'columns': ['timestamp', 'aggregate_runtime', 'round_id', 'data_id', 'iteration_id'] }, + 'aggregate_grads_sync_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-aggregate_grads_sync_latency.csv', + 'log_names': ['aggregate_grads_sync_latency'], + 'columns': ['timestamp', 'aggregate_grads_sync_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'process_single_trainer_message_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-process_single_trainer_message_latency.csv', + 'log_names': ['process_single_trainer_message_latency'], + 'columns': ['timestamp', 'process_single_trainer_message_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'process_aggregation_goal_met_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-process_aggregation_goal_met_latency.csv', + 'log_names': ['process_aggregation_goal_met_latency'], + 'columns': ['timestamp', 'process_aggregation_goal_met_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'agg_force_cuda_memory_cleanup_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-agg_force_cuda_memory_cleanup_latency.csv', + 'log_names': ['agg_force_cuda_memory_cleanup_latency'], + 'columns': ['timestamp', 'agg_force_cuda_memory_cleanup_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'prepare_distribution_payload_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-prepare_distribution_payload_latency.csv', + 'log_names': ['prepare_distribution_payload_latency'], + 'columns': ['timestamp', 'prepare_distribution_payload_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'distribute_weights_sync_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-distribute_weights_sync_latency.csv', + 'log_names': ['distribute_weights_sync_latency'], + 'columns': ['timestamp', 'distribute_weights_sync_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'distribute_weights_async_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-distribute_weights_async_latency.csv', + 'log_names': ['distribute_weights_async_latency'], + 'columns': ['timestamp', 'distribute_weights_async_latency', 'round_id', 'data_id', 'iteration_id'] + }, }, 'flame_fwdllm_trainer': { # 'train_times': { @@ -601,6 +738,51 @@ def handle_stat_utility(parser: "LogParser", match: re.Match) -> Dict[str, Any]: 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-force_cuda_memory_cleanup_latency.csv', 'log_names': ['force_cuda_memory_cleanup_latency'], 'columns': ['timestamp', 'trainer_id', 'force_cuda_memory_cleanup_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'setup_training_state_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-setup_training_state_latency.csv', + 'log_names': ['setup_training_state_latency'], + 'columns': ['timestamp', 'trainer_id', 'setup_training_state_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'train_one_batch_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-train_one_batch_latency.csv', + 'log_names': ['train_one_batch_latency'], + 'columns': ['timestamp', 'trainer_id', 'train_one_batch_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'training_loop_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-training_loop_latency.csv', + 'log_names': ['training_loop_latency'], + 'columns': ['timestamp', 'trainer_id', 'training_loop_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'finalize_training_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-finalize_training_latency.csv', + 'log_names': ['finalize_training_latency'], + 'columns': ['timestamp', 'trainer_id', 'finalize_training_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'trainer_train_model_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-trainer_train_model_latency.csv', + 'log_names': ['trainer_train_model_latency'], + 'columns': ['timestamp', 'trainer_id', 'trainer_train_model_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'trainer_eval_model_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-trainer_eval_model_latency.csv', + 'log_names': ['trainer_eval_model_latency'], + 'columns': ['timestamp', 'trainer_id', 'trainer_eval_model_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'check_availability_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-check_availability_latency.csv', + 'log_names': ['check_availability_latency'], + 'columns': ['timestamp', 'trainer_id', 'check_availability_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'perform_training_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-perform_training_latency.csv', + 'log_names': ['perform_training_latency'], + 'columns': ['timestamp', 'trainer_id', 'perform_training_latency', 'round_id', 'data_id', 'iteration_id'] + }, + 'emulate_training_delay_latency': { + 'default_output_filename': lambda: f'{CONSTANTS["file_prefix"]}-emulate_training_delay_latency.csv', + 'log_names': ['emulate_training_delay_latency'], + 'columns': ['timestamp', 'trainer_id', 'emulate_training_delay_latency', 'round_id', 'data_id', 'iteration_id'] } }, "flame_fwdllm_trainer_old": {