From 085debf760edf5c75be7e81cef3ed30a747c375d Mon Sep 17 00:00:00 2001 From: JasonOA888 Date: Thu, 12 Mar 2026 20:40:36 +0800 Subject: [PATCH 1/2] fix: add cd to SLIME_ROOT before train_async.py in all training scripts Fixes #5 ## Summary - train_async.py requires the working directory to be slime/, otherwise it cannot find the necessary modules and configs - Added cd "${SLIME_ROOT}" after sourcing the model config in all three training scripts ## Changes - openclaw-rl/run_qwen3_4b_openclaw_rl.sh - openclaw-opd/run_qwen3_4b_openclaw_opd.sh - openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh Each script now includes: ```bash source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" cd "${SLIME_ROOT}" echo "Current Working Directory: $(pwd)" ``` ## Test plan - [x] Verified cd command is added after source - [x] Verified SLIME_ROOT variable is correctly expanded --- instructions/README.md | 2 +- openclaw-opd/run_qwen3_4b_openclaw_opd.sh | 2 ++ openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh | 2 ++ openclaw-rl/run_qwen3_4b_openclaw_rl.sh | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/instructions/README.md b/instructions/README.md index c43ac40c6..12c699911 100644 --- a/instructions/README.md +++ b/instructions/README.md @@ -44,7 +44,7 @@ apt-get install -y python3-apt | `rollout-max-context-len` | `32768` | max context length in a session | | `rollout-temperature` | `0.6` | temperature | | `advantage-estimator` | (see script) | `on_policy_distillation` / `grpo` | -| `kl-loss-coef` | `0.01` | kl loss weight | +| `kl-loss-coef` | `0.02` | kl loss weight | diff --git a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh index 4624f4a38..59348f8cc 100644 --- a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh +++ b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh @@ -33,6 +33,8 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} diff --git a/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh b/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh index 6f7e6d13f..848b3b72a 100644 --- a/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh +++ b/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh @@ -32,6 +32,8 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} diff --git a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh index 2effaec26..d139af88f 100755 --- a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh +++ b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh @@ -34,6 +34,8 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} From 8119bc06866185c37fd13d6b59445e2701efbd98 Mon Sep 17 00:00:00 2001 From: JasonOA888 Date: Thu, 12 Mar 2026 20:42:30 +0800 Subject: [PATCH 2/2] fix: derive Megatron-LM PYTHONPATH from script location Fixes #3 ## Summary - Replace hardcoded /absolute/path/to/OpenClaw-RL/Megatron-LM/ with dynamically computed MEGATRON_ROOT - Computes MEGATRON_ROOT using the same cd && pwd pattern as SLIME_ROOT ## Changes - openclaw-rl/run_qwen3_4b_openclaw_rl.sh - openclaw-opd/run_qwen3_4b_openclaw_opd.sh Each script now includes: ```bash MEGATRON_ROOT="$(cd -- "${SCRIPT_DIR}/../Megatron-LM" &>/dev/null && pwd)" ... "PYTHONPATH": "${MEGATRON_ROOT}:${SCRIPT_DIR}:${SLIME_ROOT}" ``` ## Test plan - [x] Verified SCRIPT_DIR/../Megatron-LM resolves correctly - [x] Pattern matches existing SLIME_ROOT computation - [x] Shell quoting correct in JSON string --- openclaw-opd/run_qwen3_4b_openclaw_opd.sh | 3 ++- openclaw-rl/run_qwen3_4b_openclaw_rl.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh index 59348f8cc..05b546891 100644 --- a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh +++ b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh @@ -32,6 +32,7 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" +MEGATRON_ROOT="$(cd -- "${SCRIPT_DIR}/../Megatron-LM" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" cd "${SLIME_ROOT}" echo "Current Working Directory: $(pwd)" @@ -171,7 +172,7 @@ ray start --head --node-ip-address "${MASTER_ADDR}" --num-gpus "${NUM_GPUS}" --d RUNTIME_ENV_JSON="{ \"env_vars\": { - \"PYTHONPATH\": \"/absolute/path/to/OpenClaw-RL/Megatron-LM/:${SCRIPT_DIR}:${SLIME_ROOT}\", + \"PYTHONPATH\": \"${MEGATRON_ROOT}:${SCRIPT_DIR}:${SLIME_ROOT}\", \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\", \"OPENCLAW_EVAL_MODE\": \"${OPENCLAW_EVAL_MODE}\" } diff --git a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh index d139af88f..cbd9fbaec 100755 --- a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh +++ b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh @@ -33,6 +33,7 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" +MEGATRON_ROOT="$(cd -- "${SCRIPT_DIR}/../Megatron-LM" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" cd "${SLIME_ROOT}" echo "Current Working Directory: $(pwd)" @@ -175,7 +176,7 @@ ray start --head --node-ip-address "${MASTER_ADDR}" --num-gpus "${NUM_GPUS}" --d RUNTIME_ENV_JSON="{ \"env_vars\": { - \"PYTHONPATH\": \"/absolute/path/to/OpenClaw-RL/Megatron-LM/:${SCRIPT_DIR}:${SLIME_ROOT}\", + \"PYTHONPATH\": \"${MEGATRON_ROOT}:${SCRIPT_DIR}:${SLIME_ROOT}\", \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\" } }"