rllm-org · NotTheStallion · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/rllm/data/dataset_types.py b/rllm/data/dataset_types.py
@@ -67,6 +67,7 @@ class DatasetConfig:
     dataloader_batch_size: int = 8
 
     def __post_init__(self):
+        # @note : if the self.datasets is a string it goes through both if cases (not that problematic).
         # Handle single string input
         if isinstance(self.datasets, str):
             self.datasets = [self.datasets]
@@ -75,6 +76,7 @@ def __post_init__(self):
         if isinstance(self.datasets[0], str):
             converted_datasets = []
             for dataset_name in self.datasets:
+                # !critical : the comment bellow doesn't match the code created (missing code).
                 # Try to match with TrainDataset first, then TestDataset
                 try:
                     dataset = TrainDataset(dataset_name)

diff --git a/rllm/data/preprocess/difficulty_judge.py b/rllm/data/preprocess/difficulty_judge.py
@@ -67,6 +67,7 @@ def difficulty_fn(idx, entry):
 
 def batch_difficulty(dataset: str, split: str):
 
+    # !critical : the two if cases do not work because the TrainDataset and TestDataset classes are not iterable.
     # Figure out if we need a TrainDataset or TestDataset
     if split == "train":
         dataset_enum = TrainDataset[dataset.upper()]

diff --git a/rllm/rewards/reward_types.py b/rllm/rewards/reward_types.py
@@ -78,6 +78,7 @@ class RewardInput:
     }
     """
 
+# !critical : this class in never used.
 @dataclass(slots=True, kw_only=True)
 class LiveCodebenchInput:
     """Data structure for input required to calculate rewards.

diff --git a/scripts/deepscaler/README.md b/scripts/deepscaler/README.md
@@ -11,7 +11,7 @@ Our 8k context script runs on a single node with 8 A100-80GB GPUs:
 export VLLM_ATTENTION_BACKEND=XFORMERS
 # Run 8K context length training
 export MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-./scripts/[deepscaler|deepcoder]/train/run_deepscaler_1.5b_8k.sh --model $MODEL_PATH
+./scripts/[deepscaler|deepcoder]/train/deepscaler_1.5b_8k.sh --model $MODEL_PATH
 ```
 
 ## Multi-Node Training (32 GPUs)
@@ -37,13 +37,13 @@ ray start --address=[RAY_ADDRESS]
 3. Finally, on the head node, run the training script:
 ```bash
 # Run 16K or 24K context length training
-./scripts/train/run_deepscaler_1.5b_[16k|24k].sh --model [CHECKPOINT_PATH]
+./scripts/train/deepscaler_1.5b_[16k|24k].sh --model [CHECKPOINT_PATH]
 ```
 We welcome the community to try out different models, context legnths, and RL parameters in the training scripts!
 
 ### Ablations
 
 Finally, we provide ablations for the 2k/4k context runs in `scripts/ablation/`. To run:
 ```bash
-./scripts/ablation/run_deepscaler_1.5b_[2k|4k].sh --model [CHECKPOINT_PATH]
+./scripts/ablation/deepscaler_1.5b_[2k|4k].sh --model [CHECKPOINT_PATH]
 ```