Update (base update)

vmoens · vmoens · commit ce4d189c6cf4 · 2025-10-23T10:10:17.000-07:00
[ghstack-poisoned]
diff --git a/.github/unittest/llm/scripts_llm/setup_env.sh b/.github/unittest/llm/scripts_llm/setup_env.sh
@@ -9,31 +9,16 @@ set -e
 export DEBIAN_FRONTEND=noninteractive
 export TZ=UTC
 apt-get update
-apt-get install -yq --no-install-recommends git cmake
+apt-get install -yq --no-install-recommends git wget unzip curl patchelf
 # Avoid error: "fatal: unsafe repository"
 git config --global --add safe.directory '*'
-apt-get install -yq --no-install-recommends wget \
-    gcc \
-    g++ \
-    unzip \
-    curl \
-    patchelf \
-    libosmesa6-dev \
-    libgl1-mesa-glx \
-    libglfw3 \
-    swig3.0 \
-    libglew-dev \
-    libglvnd0 \
-    libgl1 \
-    libglx0 \
-    libegl1 \
-    libgles2
+# The base PyTorch devel image provides compilers, CMake >= 3.22, and most build deps.
+# Install only minimal utilities not guaranteed to be present.
 
-# Upgrade specific package
-apt-get install -yq --no-install-recommends --only-upgrade libstdc++6
+# CMake available in the PyTorch devel image (Ubuntu 22.04) is sufficient.
 
-apt-get clean
-rm -rf /var/lib/apt/lists/*
+# Cleanup APT cache
+apt-get clean && rm -rf /var/lib/apt/lists/*
 
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 root_dir="$(git rev-parse --show-toplevel)"
diff --git a/.github/workflows/test-linux-llm.yml b/.github/workflows/test-linux-llm.yml
@@ -32,7 +32,7 @@ jobs:
       runner: "linux.g6.4xlarge.experimental.nvidia.gpu"
       # gpu-arch-type: cuda
       # gpu-arch-version: "11.7"
-      docker-image: "nvidia/cudagl:11.4.0-base"
+      docker-image: "pytorch/pytorch:2.8.0-cuda12.8-cudnn9-devel"
       timeout: 120
       script: |
         if [[ "${{ github.ref }}" =~ release/* ]]; then
@@ -45,7 +45,7 @@ jobs:
 
         set -euo pipefail
         export PYTHON_VERSION="3.9"
-        export CU_VERSION="cu117"
+        export CU_VERSION="cu128"
         export TAR_OPTIONS="--no-same-owner"
         export UPLOAD_CHANNEL="nightly"
         export TF_CPP_MIN_LOG_LEVEL=0
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -475,6 +475,18 @@ def update_policy_weights_(
                 # Apply to local policy
                 if hasattr(self, "policy") and isinstance(self.policy, nn.Module):
                     strategy.apply_weights(self.policy, weights)
+            elif (
+                hasattr(self, "_original_policy")
+                and isinstance(self._original_policy, nn.Module)
+                and hasattr(self, "policy")
+                and isinstance(self.policy, nn.Module)
+            ):
+                # If no weights were provided, mirror weights from the original (trainer) policy
+                from torchrl.weight_update.weight_sync_schemes import WeightStrategy
+
+                strategy = WeightStrategy(extract_as="tensordict")
+                weights = strategy.extract_weights(self._original_policy)
+                strategy.apply_weights(self.policy, weights)
             # Otherwise, no action needed - policy is local and changes are immediately visible
 
     def __iter__(self) -> Iterator[TensorDictBase]:
diff --git a/torchrl/modules/llm/backends/vllm/vllm_async.py b/torchrl/modules/llm/backends/vllm/vllm_async.py
@@ -20,7 +20,6 @@
 from concurrent.futures import ThreadPoolExecutor, wait
 from typing import Any, Literal, TYPE_CHECKING
 
-
 import torch
 
 from torchrl._utils import logger as torchrl_logger
@@ -58,6 +57,7 @@ def _get_ray():
             "ray is not installed. Please install it with `pip install ray`."
         ) from e
 
+
 class _AsyncvLLMWorker:
     """Async vLLM worker for Ray with weight update capabilities.