diff --git a/instructions/README.md b/instructions/README.md index c43ac40c6..12c699911 100644 --- a/instructions/README.md +++ b/instructions/README.md @@ -44,7 +44,7 @@ apt-get install -y python3-apt | `rollout-max-context-len` | `32768` | max context length in a session | | `rollout-temperature` | `0.6` | temperature | | `advantage-estimator` | (see script) | `on_policy_distillation` / `grpo` | -| `kl-loss-coef` | `0.01` | kl loss weight | +| `kl-loss-coef` | `0.02` | kl loss weight | diff --git a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh index 4624f4a38..05b546891 100644 --- a/openclaw-opd/run_qwen3_4b_openclaw_opd.sh +++ b/openclaw-opd/run_qwen3_4b_openclaw_opd.sh @@ -32,7 +32,10 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" +MEGATRON_ROOT="$(cd -- "${SCRIPT_DIR}/../Megatron-LM" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} @@ -169,7 +172,7 @@ ray start --head --node-ip-address "${MASTER_ADDR}" --num-gpus "${NUM_GPUS}" --d RUNTIME_ENV_JSON="{ \"env_vars\": { - \"PYTHONPATH\": \"/absolute/path/to/OpenClaw-RL/Megatron-LM/:${SCRIPT_DIR}:${SLIME_ROOT}\", + \"PYTHONPATH\": \"${MEGATRON_ROOT}:${SCRIPT_DIR}:${SLIME_ROOT}\", \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\", \"OPENCLAW_EVAL_MODE\": \"${OPENCLAW_EVAL_MODE}\" } diff --git a/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh b/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh index 6f7e6d13f..848b3b72a 100644 --- a/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh +++ b/openclaw-opd/run_qwen3_4b_openclaw_opd_topk.sh @@ -32,6 +32,8 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} diff --git a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh index 2effaec26..cbd9fbaec 100755 --- a/openclaw-rl/run_qwen3_4b_openclaw_rl.sh +++ b/openclaw-rl/run_qwen3_4b_openclaw_rl.sh @@ -33,7 +33,10 @@ export RAY_num_heartbeats_timeout=60 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" SLIME_ROOT="$(cd -- "${SCRIPT_DIR}/../slime" &>/dev/null && pwd)" +MEGATRON_ROOT="$(cd -- "${SCRIPT_DIR}/../Megatron-LM" &>/dev/null && pwd)" source "${SLIME_ROOT}/scripts/models/qwen3-4B.sh" +cd "${SLIME_ROOT}" +echo "Current Working Directory: $(pwd)" HF_CKPT=${HF_CKPT:-/absolute/path/to/Qwen3-4B-Thinking-2507} REF_LOAD=${REF_LOAD:-${HF_CKPT}} @@ -173,7 +176,7 @@ ray start --head --node-ip-address "${MASTER_ADDR}" --num-gpus "${NUM_GPUS}" --d RUNTIME_ENV_JSON="{ \"env_vars\": { - \"PYTHONPATH\": \"/absolute/path/to/OpenClaw-RL/Megatron-LM/:${SCRIPT_DIR}:${SLIME_ROOT}\", + \"PYTHONPATH\": \"${MEGATRON_ROOT}:${SCRIPT_DIR}:${SLIME_ROOT}\", \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\" } }"