Add xla random generator. (#9539)

iwknow · web-flow · commit a1c6ee92c85e · 2025-08-16T16:59:35.000-07:00
This is the very first PR for #9159. It purely add the generator without any utilization of it. #9159 (comment) comment outlines the steps for entire change.
diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh
@@ -55,6 +55,7 @@ function run_torch_xla_cpp_tests() {
                "test_tensor"
                # disable test_xla_backend_intf since it is flaky on upstream
                #"test_xla_backend_intf"
+               "test_xla_generator"
                "test_xla_sharding"
                "test_runtime"
                "test_status_dont_show_cpp_stacktraces"
diff --git a/BUILD b/BUILD
@@ -72,15 +72,16 @@ test_suite(
         "//test/cpp:test_aten_xla_tensor_4",
         "//test/cpp:test_aten_xla_tensor_5",
         "//test/cpp:test_aten_xla_tensor_6",
+        "//test/cpp:test_debug_macros",
         "//test/cpp:test_ir",
         "//test/cpp:test_lazy",
         "//test/cpp:test_replication",
-        "//test/cpp:test_tensor",
-        "//test/cpp:test_xla_sharding",
         "//test/cpp:test_runtime",
         "//test/cpp:test_status_dont_show_cpp_stacktraces",
         "//test/cpp:test_status_show_cpp_stacktraces",
-        "//test/cpp:test_debug_macros",
+        "//test/cpp:test_tensor",
+        "//test/cpp:test_xla_generator",
+        "//test/cpp:test_xla_sharding",
         "//torch_xla/csrc/runtime:pjrt_computation_client_test",
         # "//torch_xla/csrc/runtime:ifrt_computation_client_test",
     ],
diff --git a/test/cpp/BUILD b/test/cpp/BUILD
@@ -204,3 +204,15 @@ ptxla_cc_test(
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+ptxla_cc_test(
+    name = "test_xla_generator",
+    srcs = ["test_xla_generator.cpp"],
+    deps = [
+        ":cpp_test_util",
+        ":torch_xla_test",
+        "//torch_xla/csrc:tensor",
+        "//torch_xla/csrc:aten_cuda_functions",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/test/cpp/run_tests.sh b/test/cpp/run_tests.sh
@@ -100,6 +100,7 @@ if [[ "$RUN_CPP_TESTS" == "cpp_tests" ]]; then
               # disable test_xla_backend_intf since it is flaky on upstream
               #"test_xla_backend_intf"
               "test_xla_sharding"
+              "test_xla_generator"
               "test_runtime"
               "test_status_dont_show_cpp_stacktraces"
               "test_status_show_cpp_stacktraces"
diff --git a/test/cpp/test_xla_generator.cpp b/test/cpp/test_xla_generator.cpp
@@ -0,0 +1,106 @@
+#include <gtest/gtest.h>
+#include <torch/torch.h>
+
+#include "test/cpp/torch_xla_test.h"
+#include "torch_xla/csrc/xla_generator.h"
+
+namespace torch_xla {
+namespace cpp_test {
+
+// Test fixture for XLAGenerator tests
+class XLAGeneratorTest : public ::torch_xla::cpp_test::TorchXlaTest {
+ protected:
+  void SetUp() {
+    // Create a generator for XLA device 0
+    gen_ = at::make_generator<at::XLAGeneratorImpl>(0);
+  }
+
+  at::Generator gen_;
+};
+
+TEST_F(XLAGeneratorTest, Constructor) {
+  // Check that the generator was created for the correct device
+  ASSERT_EQ(gen_.device().type(), at::DeviceType::XLA);
+  ASSERT_EQ(gen_.device().index(), 0);
+
+  // Check that the initial seed is 0
+  ASSERT_EQ(gen_.current_seed(), 0);
+}
+
+TEST_F(XLAGeneratorTest, Seed) {
+  // Test setting and getting the current seed
+  uint64_t seed_val = 12345;
+  gen_.set_current_seed(seed_val);
+  ASSERT_EQ(gen_.current_seed(), seed_val);
+
+  // Test the seed() method, which should set a non-deterministic seed
+  uint64_t old_seed = gen_.current_seed();
+  uint64_t new_seed = gen_.seed();
+  // The new seed should be different from the old one and set as the current
+  // seed
+  ASSERT_NE(new_seed, old_seed);
+  ASSERT_EQ(gen_.current_seed(), new_seed);
+}
+
+TEST_F(XLAGeneratorTest, GetAndSetState) {
+  uint64_t seed_val = 98765;
+  uint64_t offset_val = 0;
+
+  // Set seed and offset on the original generator
+  gen_.set_current_seed(seed_val);
+  gen_.set_offset(offset_val);
+
+  // Get the state from the original generator
+  at::Tensor state_tensor = gen_.get_state();
+
+  // Create a new generator
+  auto new_gen = at::make_generator<at::XLAGeneratorImpl>(1);
+  ASSERT_NE(new_gen.current_seed(), seed_val);
+
+  // Set the state of the new generator
+  new_gen.set_state(state_tensor);
+
+  // Verify the state of the new generator
+  ASSERT_EQ(new_gen.current_seed(), seed_val);
+  ASSERT_EQ(new_gen.get_offset(), offset_val);
+}
+
+TEST_F(XLAGeneratorTest, SetStateValidation) {
+  // Test that set_state throws with incorrect tensor properties
+  auto new_gen = at::make_generator<at::XLAGeneratorImpl>(0);
+
+  // Incorrect size
+  auto wrong_size_tensor = at::empty({10}, at::kByte);
+  EXPECT_THROW(new_gen.set_state(wrong_size_tensor), c10::Error);
+
+  // Incorrect dtype
+  auto wrong_dtype_tensor = at::empty({16}, at::kInt);
+  EXPECT_THROW(new_gen.set_state(wrong_dtype_tensor), c10::Error);
+}
+
+TEST_F(XLAGeneratorTest, Clone) {
+  uint64_t seed_val = 1;
+  uint64_t offset_val = 0;
+
+  // Set state on the original generator
+  gen_.set_current_seed(seed_val);
+  gen_.set_offset(offset_val);
+
+  // Clone the generator
+  auto cloned_gen = gen_.clone();
+
+  // Verify that the cloned generator has the same state but is a different
+  // object
+  ASSERT_NE(std::addressof(cloned_gen), std::addressof(gen_));
+  ASSERT_EQ(cloned_gen.device(), gen_.device());
+  ASSERT_EQ(cloned_gen.current_seed(), gen_.current_seed());
+  ASSERT_EQ(cloned_gen.get_offset(), offset_val);
+
+  // Modify the original generator's seed and check that the clone is unaffected
+  gen_.set_current_seed(9999);
+  ASSERT_EQ(cloned_gen.current_seed(), seed_val);
+  ASSERT_NE(cloned_gen.current_seed(), gen_.current_seed());
+}
+
+}  // namespace cpp_test
+}  // namespace torch_xla
diff --git a/torch_xla/csrc/BUILD b/torch_xla/csrc/BUILD
@@ -64,6 +64,7 @@ ptxla_cc_library(
         "torch_util.cpp",
         "view.cpp",
         "xla_backend_impl.cpp",
+        "xla_generator.cpp",
         "xla_graph_executor.cpp",
         "xla_lower_util.cpp",
         "xla_op_builder.cpp",
@@ -107,6 +108,7 @@ ptxla_cc_library(
         "torch_util.h",
         "view.h",
         "xla_backend_impl.h",
+        "xla_generator.h",
         "xla_graph_executor.h",
         "xla_lower_util.h",
         "xla_op_builder.h",
diff --git a/torch_xla/csrc/xla_generator.cpp b/torch_xla/csrc/xla_generator.cpp
@@ -0,0 +1,84 @@
+#include "xla_generator.h"
+
+#include <ATen/Functions.h>
+#include <ATen/core/ScalarType.h>
+#include <ATen/core/Tensor.h>
+#include <c10/core/Device.h>
+#include <c10/core/DeviceType.h>
+#include <c10/core/TensorImpl.h>
+#include <c10/util/intrusive_ptr.h>
+
+#include <cstring>
+
+namespace at {
+
+XLAGeneratorImpl::XLAGeneratorImpl(DeviceIndex device_index)
+    : c10::GeneratorImpl{Device(DeviceType::XLA, device_index),
+                         DispatchKeySet(c10::DispatchKey::XLA)} {
+  state_ = c10::make_intrusive<XLAGeneratorState>();
+}
+
+XLAGeneratorImpl::XLAGeneratorImpl(DeviceIndex device_index,
+                                   c10::intrusive_ptr<XLAGeneratorState> state)
+    : c10::GeneratorImpl{Device(DeviceType::XLA, device_index),
+                         DispatchKeySet(c10::DispatchKey::XLA)},
+      state_(std::move(state)) {}
+
+DeviceType XLAGeneratorImpl::device_type() { return DeviceType::XLA; }
+
+std::shared_ptr<XLAGeneratorImpl> XLAGeneratorImpl::clone() const {
+  return std::shared_ptr<XLAGeneratorImpl>(clone_impl());
+}
+
+XLAGeneratorImpl* XLAGeneratorImpl::clone_impl() const {
+  return new XLAGeneratorImpl(device_.index(), state_->clone());
+}
+
+void XLAGeneratorImpl::set_current_seed(uint64_t seed) { state_->seed_ = seed; }
+
+uint64_t XLAGeneratorImpl::current_seed() const { return state_->seed_; }
+
+uint64_t XLAGeneratorImpl::seed() {
+  uint64_t random = c10::detail::getNonDeterministicRandom(true);
+  set_current_seed(random);
+  return random;
+}
+
+void XLAGeneratorImpl::set_offset(uint64_t offset) { state_->offset_ = offset; }
+
+uint64_t XLAGeneratorImpl::get_offset() const { return state_->offset_; }
+
+/* Serialize the generator state into a CPU tensor. */
+c10::intrusive_ptr<c10::TensorImpl> XLAGeneratorImpl::get_state() const {
+  static const size_t seed_size = sizeof(uint64_t);
+  static const size_t offset_size = sizeof(uint64_t);
+  static const size_t total_size = seed_size + offset_size;
+
+  auto state_tensor =
+      at::empty({(int64_t)total_size},
+                at::TensorOptions().dtype(at::kByte).device(at::kCPU));
+  uint8_t* data_ptr = state_tensor.data_ptr<uint8_t>();
+  memcpy(data_ptr, &state_->seed_, seed_size);
+  memcpy(data_ptr + seed_size, &state_->offset_, offset_size);
+  return state_tensor.getIntrusivePtr();
+}
+
+void XLAGeneratorImpl::set_state(const c10::TensorImpl& new_state) {
+  static const size_t seed_size = sizeof(uint64_t);
+  static const size_t offset_size = sizeof(uint64_t);
+  static const size_t total_size = seed_size + offset_size;
+
+  TORCH_CHECK(new_state.numel() == total_size,
+              "The given state must be a byte tensor of size ", total_size,
+              ", but was size ", new_state.numel());
+  TORCH_CHECK(new_state.dtype() == at::kByte,
+              "The given state must be a byte tensor, but was ",
+              new_state.dtype());
+  TORCH_CHECK(new_state.is_cpu(), "The given state must be a CPU tensor");
+
+  auto new_rng_state = new_state.data_dtype_initialized<uint8_t>();
+  memcpy(&state_->seed_, new_rng_state, seed_size);
+  memcpy(&state_->offset_, new_rng_state + seed_size, offset_size);
+}
+
+}  // namespace at
diff --git a/torch_xla/csrc/xla_generator.h b/torch_xla/csrc/xla_generator.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <ATen/core/Generator.h>
+#include <ATen/core/Tensor.h>
+#include <c10/util/intrusive_ptr.h>
+
+#include <cstdint>
+
+namespace at {
+
+// Holds the actual state variables for the XLA generator.
+struct XLAGeneratorState : c10::intrusive_ptr_target {
+  uint64_t seed_ = 0;
+  uint64_t offset_ = 0;
+
+  // Constructor
+  XLAGeneratorState(uint64_t seed = 0, uint64_t offset = 0)
+      : seed_(seed), offset_(offset) {}
+
+  // Cloning method
+  c10::intrusive_ptr<XLAGeneratorState> clone() {
+    return c10::make_intrusive<XLAGeneratorState>(seed_, offset_);
+  }
+};
+
+struct TORCH_API XLAGeneratorImpl : public c10::GeneratorImpl {
+  // Constructors
+  XLAGeneratorImpl(DeviceIndex device_index = -1);
+  XLAGeneratorImpl(DeviceIndex device_index,
+                   c10::intrusive_ptr<XLAGeneratorState> state);
+  ~XLAGeneratorImpl() override = default;
+
+  // Cloning support
+  std::shared_ptr<XLAGeneratorImpl> clone() const;
+
+  // --- Core Virtual Methods to Override ---
+  void set_current_seed(uint64_t seed) override;
+  uint64_t current_seed() const override;
+  uint64_t seed() override;
+  void set_offset(uint64_t offset) override;
+  uint64_t get_offset() const override;
+  c10::intrusive_ptr<c10::TensorImpl> get_state() const override;
+  void set_state(const c10::TensorImpl& new_state) override;
+
+  // --- Additional Methods ---
+  static c10::DeviceType device_type();
+
+ private:
+  // Private clone implementation
+  XLAGeneratorImpl* clone_impl() const override;
+
+  // The actual state is held in a separate, cloneable object.
+  c10::intrusive_ptr<XLAGeneratorState> state_;
+};
+
+}  // namespace at