PaddlePaddle · youge325 · Mar 24, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -70,6 +70,7 @@ third_party/
 *~
 bazel-*
 .humanize
+.codex
 
 build_*
 # clion workspace.

diff --git a/paddle/phi/api/include/compat/ATen/core/TensorBody.h b/paddle/phi/api/include/compat/ATen/core/TensorBody.h
@@ -687,12 +687,6 @@ class Tensor : public TensorBase {
   }
 
   void record_stream(at::Stream s) const;
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  void record_stream(at::cuda::CUDAStream s) const;
-  // TODO(youge325): Remove after DeepEP paddle branch is updated to use
-  // at::Stream
-  void record_stream(cudaStream_t s) const;
-#endif
 
   Tensor var(int dim) const { return var(at::IntArrayRef{dim}, true, false); }
 

diff --git a/paddle/phi/api/include/compat/ATen/core/ivalue.h b/paddle/phi/api/include/compat/ATen/core/ivalue.h
@@ -224,6 +224,16 @@ class IValue {
   bool is_custom_class() const { return tag_ == TypeTag::CustomClass; }
   bool is_tuple() const { return tag_ == TypeTag::Tuple; }
 
+  bool isNone() const { return is_none(); }
+  bool isBool() const { return is_bool(); }
+  bool isInt() const { return is_int(); }
+  bool isDouble() const { return is_double(); }
+  bool isString() const { return is_string(); }
+  bool isList() const { return is_list(); }
+  bool isTensor() const { return is_tensor(); }
+  bool isCustomClass() const { return is_custom_class(); }
+  bool isTuple() const { return is_tuple(); }
+
   bool to_bool() const {
     if (!is_bool()) throw std::runtime_error("Not a bool");
     return std::get<bool>(value_);
@@ -280,6 +290,39 @@ class IValue {
     return static_cast<at::ScalarType>(std::get<int64_t>(value_));
   }
 
+  bool toBool() const { return to_bool(); }
+  int64_t toInt() const { return to_int(); }
+  double toDouble() const { return to_double(); }
+  const std::string& toStringRef() const { return to_string(); }
+  std::string_view toStringView() const { return to_string_view(); }
+  at::Tensor toTensor() const { return to_tensor(); }
+  at::ScalarType toScalarType() const { return to_scalar_type(); }
+
+  std::string tagKind() const {
+    switch (tag_) {
+      case TypeTag::None:
+        return "None";
+      case TypeTag::Bool:
+        return "Bool";
+      case TypeTag::Int:
+        return "Int";
+      case TypeTag::Double:
+        return "Double";
+      case TypeTag::String:
+        return "String";
+      case TypeTag::Tensor:
+        return "Tensor";
+      case TypeTag::GenericList:
+        return "GenericList";
+      case TypeTag::CustomClass:
+        return "CustomClass";
+      case TypeTag::Tuple:
+        return "Tuple";
+      default:
+        return "InvalidTag";
+    }
+  }
+
   template <typename T>
   intrusive_ptr<T> to_custom_class() const {
     if (!is_custom_class()) throw std::runtime_error("Not a custom class");
@@ -637,3 +680,7 @@ intrusive_ptr<T> generic_to(const IValue& ivalue,
 }
 
 }  // namespace torch
+
+namespace c10 {
+using IValue = ::torch::IValue;
+}
diff --git a/paddle/phi/api/include/compat/ATen/ops/arange.h b/paddle/phi/api/include/compat/ATen/ops/arange.h
@@ -25,50 +25,48 @@
 
 namespace at {
 
-inline at::Tensor arange(const at::Scalar& end,
-                         at::TensorOptions options = {}) {
-  if (options.pinned_memory()) {
-    // Pinning memory is only supported for CPU tensors
-    if (options.has_device() && !options.device().is_cpu()) {
-      PD_THROW(
-          "pin_memory=true requires device to be CPU, but got non-CPU device");
-    }
-    phi::Place base_place = options._PD_GetPlace();
-    phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(base_place);
-    auto dense = paddle::experimental::arange(
-        paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
-        paddle::experimental::full(
-            {}, end.to<double>(), phi::DataType::FLOAT64),
-        paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
-        compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
-        phi::CPUPlace());
-    return dense.copy_to(pinned_place, /*blocking=*/true);
+namespace detail {
+
+inline bool _PD_IsIntegralArangeScalar(const at::Scalar& scalar) {
+  switch (scalar.dtype()) {
+    case phi::DataType::BOOL:
+    case phi::DataType::UINT8:
+    case phi::DataType::INT8:
+    case phi::DataType::UINT16:
+    case phi::DataType::INT16:
+    case phi::DataType::UINT32:
+    case phi::DataType::INT32:
+    case phi::DataType::UINT64:
+    case phi::DataType::INT64:
+      return true;
+    default:
+      return false;
   }
-  return paddle::experimental::arange(
-      paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
-      compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
-      options._PD_GetPlace());
 }
 
-inline at::Tensor arange(const at::Scalar& end,
-                         ::std::optional<at::ScalarType> dtype,
-                         ::std::optional<at::Layout> layout,
-                         ::std::optional<at::Device> device,
-                         ::std::optional<bool> pin_memory) {
-  auto options =
-      at::TensorOptions()
-          .dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
-          .layout(layout)
-          .device(device.value_or(at::kCPU))
-          .pinned_memory(pin_memory);
-  return arange(end, options);
+inline at::ScalarType _PD_ResolveArangeDtype(const at::Scalar& start,
+                                             const at::Scalar& end,
+                                             const at::Scalar& step,
+                                             const at::TensorOptions& options) {
+  if (options.has_dtype()) {
+    return options.dtype().toScalarType();
+  }
+  if (_PD_IsIntegralArangeScalar(start) && _PD_IsIntegralArangeScalar(end) &&
+      _PD_IsIntegralArangeScalar(step)) {
+    return at::kLong;
+  }
+  return c10::get_default_dtype_as_scalartype();
 }
 
+}  // namespace detail
+
 inline at::Tensor arange(const at::Scalar& start,
                          const at::Scalar& end,
+                         const at::Scalar& step,
                          at::TensorOptions options = {}) {
+  // Match PyTorch: step must be non-zero and consistent with (end - start).
+  at::native::arange_check_bounds(start, end, step);
+  auto dtype = detail::_PD_ResolveArangeDtype(start, end, step, options);
   if (options.pinned_memory()) {
     // Pinning memory is only supported for CPU tensors
     if (options.has_device() && !options.device().is_cpu()) {
@@ -82,67 +80,57 @@ inline at::Tensor arange(const at::Scalar& start,
             {}, start.to<double>(), phi::DataType::FLOAT64),
         paddle::experimental::full(
             {}, end.to<double>(), phi::DataType::FLOAT64),
-        paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
-        compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
+        paddle::experimental::full(
+            {}, step.to<double>(), phi::DataType::FLOAT64),
+        compat::_PD_AtenScalarTypeToPhiDataType(dtype),
         phi::CPUPlace());
     return dense.copy_to(pinned_place, /*blocking=*/true);
   }
   return paddle::experimental::arange(
       paddle::experimental::full(
           {}, start.to<double>(), phi::DataType::FLOAT64),
       paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
-      compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
+      paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
+      compat::_PD_AtenScalarTypeToPhiDataType(dtype),
       options._PD_GetPlace());
 }
 
-inline at::Tensor arange(const at::Scalar& start,
-                         const at::Scalar& end,
+inline at::Tensor arange(const at::Scalar& end,
+                         at::TensorOptions options = {}) {
+  return arange(/*start=*/0, end, /*step=*/1, options);
+}
+
+inline at::Tensor arange(const at::Scalar& end,
                          ::std::optional<at::ScalarType> dtype,
                          ::std::optional<at::Layout> layout,
                          ::std::optional<at::Device> device,
                          ::std::optional<bool> pin_memory) {
-  auto options =
-      at::TensorOptions()
-          .dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
-          .layout(layout)
-          .device(device.value_or(at::kCPU))
-          .pinned_memory(pin_memory);
-  return arange(start, end, options);
+  auto options = at::TensorOptions()
+                     .dtype(dtype)
+                     .layout(layout)
+                     .device(device)
+                     .pinned_memory(pin_memory);
+  return arange(/*start=*/0, end, /*step=*/1, options);
 }
 
 inline at::Tensor arange(const at::Scalar& start,
                          const at::Scalar& end,
-                         const at::Scalar& step,
                          at::TensorOptions options = {}) {
-  // Match PyTorch: step must be non-zero and consistent with (end - start).
-  at::native::arange_check_bounds(start, end, step);
-  if (options.pinned_memory()) {
-    // Pinning memory is only supported for CPU tensors
-    if (options.has_device() && !options.device().is_cpu()) {
-      PD_THROW(
-          "pin_memory=true requires device to be CPU, but got non-CPU device");
-    }
-    phi::Place base_place = options._PD_GetPlace();
-    phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(base_place);
-    auto dense = paddle::experimental::arange(
-        paddle::experimental::full(
-            {}, start.to<double>(), phi::DataType::FLOAT64),
-        paddle::experimental::full(
-            {}, end.to<double>(), phi::DataType::FLOAT64),
-        paddle::experimental::full(
-            {}, step.to<double>(), phi::DataType::FLOAT64),
-        compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
-        phi::CPUPlace());
-    return dense.copy_to(pinned_place, /*blocking=*/true);
-  }
-  return paddle::experimental::arange(
-      paddle::experimental::full(
-          {}, start.to<double>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
-      paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
-      compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
-      options._PD_GetPlace());
+  return arange(start, end, /*step=*/1, options);
+}
+
+inline at::Tensor arange(const at::Scalar& start,
+                         const at::Scalar& end,
+                         ::std::optional<at::ScalarType> dtype,
+                         ::std::optional<at::Layout> layout,
+                         ::std::optional<at::Device> device,
+                         ::std::optional<bool> pin_memory) {
+  auto options = at::TensorOptions()
+                     .dtype(dtype)
+                     .layout(layout)
+                     .device(device)
+                     .pinned_memory(pin_memory);
+  return arange(start, end, /*step=*/1, options);
 }
 
 inline at::Tensor arange(const at::Scalar& start,
@@ -152,12 +140,11 @@ inline at::Tensor arange(const at::Scalar& start,
                          ::std::optional<at::Layout> layout,
                          ::std::optional<at::Device> device,
                          ::std::optional<bool> pin_memory) {
-  auto options =
-      at::TensorOptions()
-          .dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
-          .layout(layout)
-          .device(device.value_or(at::kCPU))
-          .pinned_memory(pin_memory);
+  auto options = at::TensorOptions()
+                     .dtype(dtype)
+                     .layout(layout)
+                     .device(device)
+                     .pinned_memory(pin_memory);
   return arange(start, end, step, options);
 }
 

diff --git a/paddle/phi/api/include/compat/ATen/ops/equal.h b/paddle/phi/api/include/compat/ATen/ops/equal.h
@@ -22,6 +22,12 @@
 namespace at {
 
 inline bool equal(const at::Tensor& self, const at::Tensor& other) {
+  PD_CHECK(self.defined(),
+           "Expected a proper Tensor but got None (or an undefined Tensor in "
+           "C++)");
+  PD_CHECK(other.defined(),
+           "Expected a proper Tensor but got None (or an undefined Tensor in "
+           "C++)");
   PD_CHECK(self.device() == other.device(),
            "Cannot compare two tensors on "
            "different devices. Got: ",

diff --git a/paddle/phi/api/include/compat/ATen/ops/record_stream.h b/paddle/phi/api/include/compat/ATen/ops/record_stream.h
@@ -49,22 +49,4 @@ inline void Tensor::record_stream(at::Stream s) const {
 #endif
 }
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-inline void Tensor::record_stream(at::cuda::CUDAStream s) const {
-  record_stream(static_cast<at::Stream>(s));
-}
-
-// TODO(youge325): Remove after DeepEP paddle branch is updated to use
-// at::Stream
-inline void Tensor::record_stream(cudaStream_t s) const {
-  auto dense_tensor =
-      std::dynamic_pointer_cast<phi::DenseTensor>(tensor_.impl());
-  PD_CHECK(dense_tensor != nullptr,
-           "record_stream only supports DenseTensor, but got a non-dense "
-           "tensor implementation.");
-  PD_CHECK(dense_tensor->place().GetType() != phi::AllocationType::CPU,
-           "record_stream is not supported for CPU tensors.");
-  paddle::memory::RecordStream(dense_tensor->Holder(), s);
-}
-#endif
 }  // namespace at
diff --git a/paddle/phi/api/include/compat/ATen/ops/resize.h b/paddle/phi/api/include/compat/ATen/ops/resize.h
@@ -23,13 +23,31 @@
 
 namespace at {
 
-// resize_ - in-place resize using reshape
+// resize_ - use reshape for same-numel cases and set_ for storage-changing
+// cases so repeated resize_ calls stay stable.
 inline const at::Tensor& Tensor::resize_(
     at::IntArrayRef size,
     ::std::optional<at::MemoryFormat> memory_format) const {
-  auto result =
-      paddle::experimental::reshape(tensor_, size._PD_ToPaddleIntArray());
-  const_cast<Tensor*>(this)->tensor_ = result;
+  if (memory_format.has_value()) {
+    TORCH_CHECK(*memory_format == at::MemoryFormat::Contiguous,
+                "resize_ only supports contiguous memory format, but got ",
+                static_cast<int>(*memory_format));
+  }
+
+  std::vector<int64_t> dims(size.begin(), size.end());
+  int64_t new_numel = 1;
+  for (auto dim : dims) {
+    new_numel *= dim;
+  }
+
+  if (tensor_.numel() == new_numel) {
+    const_cast<Tensor*>(this)->tensor_ =
+        paddle::experimental::reshape(tensor_, phi::IntArray(dims));
+    return *this;
+  }
+
+  auto source = tensor_.copy_to(tensor_.place(), /*blocking=*/true);
+  paddle::experimental::set_(const_cast<Tensor*>(this)->tensor_, source, dims);
   return *this;
 }
 

diff --git a/paddle/phi/api/include/compat/ATen/ops/select.h b/paddle/phi/api/include/compat/ATen/ops/select.h
@@ -19,13 +19,35 @@
 namespace at {
 
 inline at::Tensor select(const at::Tensor& self, int64_t dim, int64_t index) {
+  // Normalize dim to positive value for error messages
+  int64_t orig_dim = dim;
   if (dim < 0) {
     dim += self.dim();
   }
-  // Handle negative indexing
+  // Check dim is valid
+  if (dim < 0 || dim >= self.dim()) {
+    PD_CHECK(false,
+             "select(): index ",
+             orig_dim,
+             " out of range for tensor of size ",
+             self.sizes(),
+             " at dimension ",
+             orig_dim);
+  }
+  // Handle negative index
+  int64_t orig_index = index;
   if (index < 0) {
-    int64_t dim_size = self.size(dim);
-    index = dim_size + index;
+    index = self.size(dim) + index;
+  }
+  // Check index is valid
+  if (index < 0 || index >= self.size(dim)) {
+    PD_CHECK(false,
+             "select(): index ",
+             orig_index,
+             " out of range for tensor of size ",
+             self.sizes(),
+             " at dimension ",
+             orig_dim < 0 ? orig_dim + self.dim() : orig_dim);
   }
 
   return Tensor(
-Original file line number
+Diff line change
@@ Expand Up / @@ -70,6 +70,7 @@ third_party/ @@
     *~
     bazel-*
     .humanize
+    .codex
     build_*
     # clion workspace.
@@ Expand Down @@