diff --git a/rllm/data/dataset_types.py b/rllm/data/dataset_types.py index 9847b24e7..c9b926f2d 100644 --- a/rllm/data/dataset_types.py +++ b/rllm/data/dataset_types.py @@ -67,6 +67,7 @@ class DatasetConfig: dataloader_batch_size: int = 8 def __post_init__(self): + # @note : if the self.datasets is a string it goes through both if cases (not that problematic). # Handle single string input if isinstance(self.datasets, str): self.datasets = [self.datasets] @@ -75,6 +76,7 @@ def __post_init__(self): if isinstance(self.datasets[0], str): converted_datasets = [] for dataset_name in self.datasets: + # !critical : the comment bellow doesn't match the code created (missing code). # Try to match with TrainDataset first, then TestDataset try: dataset = TrainDataset(dataset_name) diff --git a/rllm/data/preprocess/difficulty_judge.py b/rllm/data/preprocess/difficulty_judge.py index 75a8d2ae0..2fec964d2 100644 --- a/rllm/data/preprocess/difficulty_judge.py +++ b/rllm/data/preprocess/difficulty_judge.py @@ -67,6 +67,7 @@ def difficulty_fn(idx, entry): def batch_difficulty(dataset: str, split: str): + # !critical : the two if cases do not work because the TrainDataset and TestDataset classes are not iterable. # Figure out if we need a TrainDataset or TestDataset if split == "train": dataset_enum = TrainDataset[dataset.upper()] diff --git a/rllm/rewards/reward_types.py b/rllm/rewards/reward_types.py index dffff084d..39ede419e 100644 --- a/rllm/rewards/reward_types.py +++ b/rllm/rewards/reward_types.py @@ -78,6 +78,7 @@ class RewardInput: } """ +# !critical : this class in never used. @dataclass(slots=True, kw_only=True) class LiveCodebenchInput: """Data structure for input required to calculate rewards. diff --git a/scripts/deepscaler/README.md b/scripts/deepscaler/README.md index e27e0f7a6..e5e16ba2e 100644 --- a/scripts/deepscaler/README.md +++ b/scripts/deepscaler/README.md @@ -11,7 +11,7 @@ Our 8k context script runs on a single node with 8 A100-80GB GPUs: export VLLM_ATTENTION_BACKEND=XFORMERS # Run 8K context length training export MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" -./scripts/[deepscaler|deepcoder]/train/run_deepscaler_1.5b_8k.sh --model $MODEL_PATH +./scripts/[deepscaler|deepcoder]/train/deepscaler_1.5b_8k.sh --model $MODEL_PATH ``` ## Multi-Node Training (32 GPUs) @@ -37,7 +37,7 @@ ray start --address=[RAY_ADDRESS] 3. Finally, on the head node, run the training script: ```bash # Run 16K or 24K context length training -./scripts/train/run_deepscaler_1.5b_[16k|24k].sh --model [CHECKPOINT_PATH] +./scripts/train/deepscaler_1.5b_[16k|24k].sh --model [CHECKPOINT_PATH] ``` We welcome the community to try out different models, context legnths, and RL parameters in the training scripts! @@ -45,5 +45,5 @@ We welcome the community to try out different models, context legnths, and RL pa Finally, we provide ablations for the 2k/4k context runs in `scripts/ablation/`. To run: ```bash -./scripts/ablation/run_deepscaler_1.5b_[2k|4k].sh --model [CHECKPOINT_PATH] +./scripts/ablation/deepscaler_1.5b_[2k|4k].sh --model [CHECKPOINT_PATH] ``` \ No newline at end of file