Update

vmoens · vmoens · commit a6e1c7f5a1c5 · 2025-10-23T11:42:47.000-07:00
[ghstack-poisoned]
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -162,7 +162,7 @@ def forward(self, observation):
         output = self.linear(observation)
         if self.multiple_outputs:
             return output, output.sum(), output.min(), output.max()
-        return self.linear(observation)
+        return output
 
 
 class UnwrappablePolicy(nn.Module):
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -486,6 +486,9 @@ def update_policy_weights_(
 
                 strategy = WeightStrategy(extract_as="tensordict")
                 weights = strategy.extract_weights(self._original_policy)
+                # Cast weights to the policy device before applying
+                if self.policy_device is not None:
+                    weights = weights.to(self.policy_device)
                 strategy.apply_weights(self.policy, weights)
             # Otherwise, no action needed - policy is local and changes are immediately visible