From ca02300a41cdcd90be072e93363f14df1cecc474 Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Mon, 3 Mar 2025 15:51:09 +0100 Subject: [PATCH] Extend max_model_length to prevent context truncation --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b520ee022..a4cdc2928 100644 --- a/README.md +++ b/README.md @@ -272,7 +272,7 @@ We use `lighteval` to evaluate models, with custom tasks defined in `src/open_r1 ```shell MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" +MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=38768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" OUTPUT_DIR=data/evals/$MODEL # AIME 2024 @@ -303,14 +303,14 @@ lighteval vllm $MODEL_ARGS "extended|lcb:codegeneration|0|0" \ ``` > [!IMPORTANT] -> You must set `max_model_length=32768` in the `vllm` command to align with the `max_new_tokens` we define per eval. Without this, `lighteval` will throw an error. +> You must set `max_model_length=38768` in the `vllm` command to align with the `max_new_tokens` we define per eval. Without this, `lighteval` will throw an error. To increase throughput across multiple GPUs, use _data parallel_ as follows: ```shell NUM_GPUS=8 MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,data_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" +MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,data_parallel_size=$NUM_GPUS,max_model_length=38768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" TASK=aime24 OUTPUT_DIR=data/evals/$MODEL @@ -325,7 +325,7 @@ For large models which require sharding across GPUs, use _tensor parallel_ and r ```shell NUM_GPUS=8 MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B -MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,tensor_parallel_size=$NUM_GPUS,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" +MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,tensor_parallel_size=$NUM_GPUS,max_model_length=38768,gpu_memory_utilization=0.8,generation_parameters={max_new_tokens:32768,temperature:0.6,top_p:0.95}" TASK=aime24 OUTPUT_DIR=data/evals/$MODEL