expand: improve error handling and error messages. (#9645)

ysiraichi · web-flow · commit 0c0ae2d22d5e · 2025-09-17T16:19:49.000-03:00
This PR refactors the `expand` operation implementation by improving its
error message, and returning a status type value.

**Key Changes:**

- Make `tensor_methods::expand` return `StatusOr&lt;XLATensorPtr&gt;`
- Improve error messages and error handling
- Create new `CheckExpandValidRank` for checking the input and the given
sizes' rank
- Modified `GetExpandDimensions`, calling the check function above, and
also checking whether input and sizes corresponding dimensions are valid
diff --git a/test/run_tests.sh b/test/run_tests.sh
@@ -151,6 +151,7 @@ function run_xla_op_tests1 {
   run_eager_debug "$_TEST_DIR/test_operations.py" "$@" --verbosity=$VERBOSITY
   run_test "$_TEST_DIR/test_operations.py" "$@" --verbosity=$VERBOSITY
   run_test "$_TEST_DIR/test_ops_error_message.py"
+  run_test "$_TEST_DIR/test_ops_error_message_functionalization_disabled.py"
   run_test "$_TEST_DIR/test_xla_graph_execution.py" "$@" --verbosity=$VERBOSITY
   run_pt_xla_debug_level2 "$_TEST_DIR/test_xla_graph_execution.py" "$@" --verbosity=$VERBOSITY
   run_test_without_functionalization "$_TEST_DIR/test_operations.py" "$@" --verbosity=$VERBOSITY
diff --git a/test/test_ops_error_message_functionalization_disabled.py b/test/test_ops_error_message_functionalization_disabled.py
@@ -0,0 +1,43 @@
+import os
+
+os.environ["XLA_DISABLE_FUNCTIONALIZATION"] = "1"
+
+import expecttest
+import torch
+import torch_xla
+import unittest
+
+
+class TestOpsErrorMessageFunctionalizationDisabled(expecttest.TestCase):
+
+  def test_expand_raises_error_on_higher_rank_tensor(self):
+    device = torch_xla.device()
+    a = torch.rand(1, 1, 2, 3, device=device)
+    sizes = [-1, 3]
+
+    def test():
+      return a.expand(sizes)
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=test,
+        expect="""expand(): expected the `input` tensor f32[1,1,2,3] (rank: 4) to have a rank smaller or equal to the given `sizes` [-1, 3] (rank: 2)."""
+    )
+
+  def test_expand_raises_error_on_size_mismatch(self):
+    device = torch_xla.device()
+    a = torch.rand(1, 1, 2, 3, device=device)
+    sizes = [1, 1, 1, 3]
+
+    def test():
+      return a.expand(sizes)
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=test,
+        expect="""expand(): expected dimension 2 of the given `sizes` [1, 1, 1, 3] (1) to be -1, or equal to the size of the `input` tensor f32[1,1,2,3] at dimension 2 (2)."""
+    )
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/torch_xla/csrc/aten_xla_type.cpp b/torch_xla/csrc/aten_xla_type.cpp
@@ -1786,19 +1786,21 @@ at::Tensor XLANativeFunctions::empty_strided_symint(
 }
 
 at::Tensor XLANativeFunctions::expand_copy_symint(const at::Tensor& self,
-                                                  at::SymIntArrayRef sym_size,
+                                                  at::SymIntArrayRef sym_sizes,
                                                   bool implicit) {
   TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::");
-  std::optional<at::IntArrayRef> size = c10::asIntArrayRefSlowOpt(sym_size);
-  XLA_ASSIGN_OR_THROW(XLATensorPtr xla_self, bridge::GetXlaTensor(self));
-  if (size.has_value()) {
-    return bridge::AtenFromXlaTensor(tensor_methods::expand(
-        xla_self, torch::lazy::ToVector<int64_t>(*size)));
+  XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr xla_self,
+                      bridge::GetXlaTensor(self));
+  std::optional<at::IntArrayRef> sizes = c10::asIntArrayRefSlowOpt(sym_sizes);
+  if (sizes.has_value()) {
+    XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr output,
+                        tensor_methods::expand(xla_self, *sizes));
+    return bridge::AtenFromXlaTensor(std::move(output));
   } else {
     // at least one of the dimension is symbolic, use the sym_int version of the
     // node
     return bridge::AtenFromXlaTensor(
-        tensor_methods::expand_symint(xla_self, sym_size));
+        tensor_methods::expand_symint(xla_self, sym_sizes));
   }
 }
 
@@ -4563,19 +4565,21 @@ at::Tensor XLANativeFunctions::diagonal(const at::Tensor& self, int64_t offset,
 }
 
 at::Tensor XLANativeFunctions::expand_symint(const at::Tensor& self,
-                                             at::SymIntArrayRef sym_size,
+                                             at::SymIntArrayRef sym_sizes,
                                              bool implicit) {
   TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::");
-  std::optional<at::IntArrayRef> size = c10::asIntArrayRefSlowOpt(sym_size);
-  XLA_ASSIGN_OR_THROW(XLATensorPtr xla_self, bridge::GetXlaTensor(self));
-  if (size.has_value()) {
-    return bridge::AtenFromXlaTensor(tensor_methods::expand(
-        xla_self, torch::lazy::ToVector<int64_t>(*size)));
+  XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr xla_self,
+                      bridge::GetXlaTensor(self));
+  std::optional<at::IntArrayRef> sizes = c10::asIntArrayRefSlowOpt(sym_sizes);
+  if (sizes.has_value()) {
+    XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr output,
+                        tensor_methods::expand(xla_self, *sizes));
+    return bridge::AtenFromXlaTensor(std::move(output));
   } else {
     // at least one of the dimension is symbolic, use the sym_int version of the
     // node
     return bridge::AtenFromXlaTensor(
-        tensor_methods::expand_symint(xla_self, sym_size));
+        tensor_methods::expand_symint(xla_self, sym_sizes));
   }
 }
 
diff --git a/torch_xla/csrc/init_python_bindings.cpp b/torch_xla/csrc/init_python_bindings.cpp
@@ -24,6 +24,7 @@
 #include <unordered_map>
 #include <vector>
 
+#include "absl/base/nullability.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/log/absl_check.h"
 #include "absl/strings/str_cat.h"
@@ -451,15 +452,18 @@ at::Tensor AllReduce(const std::string& reduce_type, const at::Tensor& input,
 }
 
 at::Tensor DynamicExpand(const at::Tensor& input,
-                         const std::vector<int64_t>& size,
+                         const std::vector<int64_t>& sizes,
                          const at::Tensor& src_tensor, int src_dim,
                          int target_dim) {
-  XLA_ASSIGN_OR_THROW(XLATensorPtr xla_input, bridge::GetXlaTensor(input));
-  XLA_ASSIGN_OR_THROW(XLATensorPtr xla_src_tensor,
+  XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr xla_input,
+                      bridge::GetXlaTensor(input));
+  XLA_ASSIGN_OR_THROW(absl_nonnull XLATensorPtr xla_src_tensor,
                       bridge::GetXlaTensor(src_tensor));
-  XLATensorPtr result = tensor_methods::dynamic_expand(
-      xla_input, size, xla_src_tensor, src_dim, target_dim);
-  return bridge::AtenFromXlaTensor(std::move(result));
+  XLA_ASSIGN_OR_THROW(
+      absl_nonnull XLATensorPtr output,
+      tensor_methods::dynamic_expand(xla_input, sizes, xla_src_tensor, src_dim,
+                                     target_dim));
+  return bridge::AtenFromXlaTensor(std::move(output));
 }
 
 at::Tensor DynamicView(const at::Tensor& input,
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -10,11 +10,13 @@
 #include <functional>
 #include <iterator>
 
+#include "absl/base/nullability.h"
 #include "absl/log/absl_check.h"
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
+#include "absl/types/span.h"
 #include "torch_xla/csrc/LazyIr.h"
 #include "torch_xla/csrc/aten_xla_bridge.h"
 #include "torch_xla/csrc/data_ops.h"
@@ -234,16 +236,45 @@ void CheckBmmDimension(const std::string& tag, const XLATensorPtr& batch1,
                      "batch2", 2);
 }
 
-std::vector<int64_t> GetExpandDimensions(const xla::Shape& shape,
-                                         std::vector<int64_t> dimensions) {
-  XLA_CHECK_GE(dimensions.size(), shape.dimensions_size()) << shape;
-  int64_t base = dimensions.size() - shape.dimensions_size();
-  for (size_t i = 0; i < shape.dimensions_size(); ++i) {
-    if (dimensions[base + i] == -1) {
-      dimensions[base + i] = shape.dimensions(i);
+absl::Status CheckExpandValidRank(const XLATensorPtr& input,
+                                  const absl::Span<const int64_t> sizes) {
+  xla::Shape shape = input->shape();
+  int64_t rank = shape.dimensions().size();
+  if (rank > sizes.size()) {
+    return XLA_ERROR_WITH_LOCATION(absl::InvalidArgumentError(absl::StrCat(
+        "expand(): expected the `input` tensor ", shape.ToString(), " (rank: ",
+        rank, ") to have a rank smaller or equal to the given `sizes` [",
+        absl::StrJoin(sizes, /* sep= */ ", "), "] (rank: ", sizes.size(),
+        ").")));
+  }
+  return absl::OkStatus();
+}
+
+absl::StatusOr<std::vector<int64_t>> GetExpandDimensions(
+    const XLATensorPtr& input, const absl::Span<const int64_t> sizes) {
+  XLA_RETURN_IF_ERROR(CheckExpandValidRank(input, sizes));
+
+  xla::Shape shape = input->shape();
+  const int64_t rank = shape.dimensions().size();
+  const int64_t base = sizes.size() - rank;
+
+  std::vector<int64_t> expanded_dimensions(sizes.begin(), sizes.end());
+  for (size_t i = 0; i < shape.dimensions().size(); ++i) {
+    const int64_t dim = base + i;
+    const int64_t size = sizes[dim];
+    if (size == -1) {
+      expanded_dimensions[dim] = shape.dimensions(i);
+    } else if (shape.dimensions(i) != 1 && size != shape.dimensions(i)) {
+      return XLA_ERROR_WITH_LOCATION(absl::InvalidArgumentError(absl::StrCat(
+          "expand(): expected dimension ", dim, " of the given `sizes` [",
+          absl::StrJoin(sizes, /* sep= */ ", "), "] (", size,
+          ") to be -1, or equal to the size of the `input` tensor ",
+          shape.ToString(), " at dimension ", i, " (", shape.dimensions(i),
+          ").")));
     }
   }
-  return dimensions;
+
+  return expanded_dimensions;
 }
 
 // Resizes and / or checks whether a list is of the given size. The list is only
@@ -1791,11 +1822,12 @@ XLATensorPtr exp(const XLATensorPtr& input) {
   return input->CreateFrom(Exp(input->GetIrValue()));
 }
 
-XLATensorPtr expand(const XLATensorPtr& input, std::vector<int64_t> size) {
-  auto input_shape = input->shape();
-  auto output = input->CreateFrom(torch_xla::MakeNode<Expand>(
-      input->GetIrValue(),
-      GetExpandDimensions(input_shape.get(), std::move(size))));
+absl::StatusOr<absl_nonnull XLATensorPtr> expand(
+    const XLATensorPtr& input, const absl::Span<const int64_t> sizes) {
+  XLA_ASSIGN_OR_RETURN(std::vector<int64_t> expanded_dimensions,
+                       GetExpandDimensions(input, sizes));
+  auto output = input->CreateFrom(
+      torch_xla::MakeNode<Expand>(input->GetIrValue(), expanded_dimensions));
   output->SetStorage(input->Storage());
   return output;
 }
@@ -2927,15 +2959,14 @@ XLATensorPtr cast_int4(const XLATensorPtr& weight,
 // Dynamic Reshape ops here.
 //////////////////////////////////////////////////////////////////////////////
 
-XLATensorPtr dynamic_expand(const XLATensorPtr& input,
-                            const std::vector<int64_t>& size,
-                            const XLATensorPtr& src_tensor, int src_dim,
-                            int target_dim) {
-  std::vector<int64_t> expanded_size =
-      GetExpandDimensions(input->shape().get(), size);
+absl::StatusOr<absl_nonnull XLATensorPtr> dynamic_expand(
+    const XLATensorPtr& input, const absl::Span<const int64_t> sizes,
+    const XLATensorPtr& src_tensor, int src_dim, int target_dim) {
+  XLA_ASSIGN_OR_RETURN(std::vector<int64_t> expanded_dimensions,
+                       GetExpandDimensions(input, sizes));
   torch::lazy::NodePtr node = torch_xla::MakeNode<DynamicExpand>(
-      input->GetIrValue(), expanded_size, src_tensor->GetIrValue(), src_dim,
-      target_dim);
+      input->GetIrValue(), expanded_dimensions, src_tensor->GetIrValue(),
+      src_dim, target_dim);
   return input->CreateFrom(torch::lazy::Value(node));
 }
 
diff --git a/torch_xla/csrc/tensor_methods.h b/torch_xla/csrc/tensor_methods.h
@@ -2,6 +2,7 @@
 #define XLA_TORCH_XLA_CSRC_TENSOR_METHODS_H_
 
 #include "absl/base/nullability.h"
+#include "absl/types/span.h"
 #include "torch_xla/csrc/cross_replica_reduces.h"
 #include "torch_xla/csrc/ops/custom_sharding.h"
 #include "torch_xla/csrc/runtime/computation_client.h"
@@ -158,10 +159,9 @@ XLATensorPtr cast_int4(const XLATensorPtr& weight,
 // Dynamic Reshape ops here.
 //////////////////////////////////////////////////////////////////////////////
 
-XLATensorPtr dynamic_expand(const XLATensorPtr& input,
-                            const std::vector<int64_t>& size,
-                            const XLATensorPtr& src_tensor, int src_dim,
-                            int target_dim);
+absl::StatusOr<absl_nonnull XLATensorPtr> dynamic_expand(
+    const XLATensorPtr& input, const absl::Span<const int64_t> sizes,
+    const XLATensorPtr& src_tensor, int src_dim, int target_dim);
 
 XLATensorPtr dynamic_view(const XLATensorPtr& input,
                           const std::vector<int64_t>& size,
@@ -427,7 +427,8 @@ XLATensorPtr eq(const XLATensorPtr& input, const XLATensorPtr& other);
 
 XLATensorPtr exp(const XLATensorPtr& input);
 
-XLATensorPtr expand(const XLATensorPtr& input, std::vector<int64_t> size);
+absl::StatusOr<absl_nonnull XLATensorPtr> expand(
+    const XLATensorPtr& input, const absl::Span<const int64_t> sizes);
 
 XLATensorPtr expand_symint(const XLATensorPtr& input,
                            c10::SymIntArrayRef sym_size);
diff --git a/torch_xla/csrc/tensor_ops.cpp b/torch_xla/csrc/tensor_ops.cpp
@@ -240,9 +240,9 @@ XLATensorPtr EmbeddingDenseBackward(const XLATensorPtr& grad_output,
   // padding_idx.
   XLATensorPtr skip_padding = tensor_methods::unsqueeze(
       tensor_methods::ne(indices_rank1, padding_idx), 1);
-  skip_padding = tensor_methods::expand(
+  XLA_ASSIGN_OR_THROW(
       skip_padding,
-      torch::lazy::ToVector<int64_t>(grad->shape().get().dimensions()));
+      tensor_methods::expand(skip_padding, grad->shape().get().dimensions()));
   XLATensorPtr zero_grad =
       tensor_methods::full_like(grad, 0, grad->GetDevice(), grad->dtype());
   return tensor_methods::index_put(