Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ third_party/
*~
bazel-*
.humanize
.codex

build_*
# clion workspace.
Expand Down
6 changes: 0 additions & 6 deletions paddle/phi/api/include/compat/ATen/core/TensorBody.h
Original file line number Diff line number Diff line change
Expand Up @@ -687,12 +687,6 @@ class Tensor : public TensorBase {
}

void record_stream(at::Stream s) const;
Comment on lines 687 to 689
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After removing the record_stream(at::cuda::CUDAStream) / record_stream(cudaStream_t) overloads, the earlier forward-declaration block for c10::cuda::CUDAStream in this header (and its accompanying comment about the overload) is now unused/misleading. Please remove or update that forward-declaration/comment block to reflect the new API surface.

Copilot uses AI. Check for mistakes.
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void record_stream(at::cuda::CUDAStream s) const;
// TODO(youge325): Remove after DeepEP paddle branch is updated to use
// at::Stream
void record_stream(cudaStream_t s) const;
#endif

Tensor var(int dim) const { return var(at::IntArrayRef{dim}, true, false); }

Expand Down
47 changes: 47 additions & 0 deletions paddle/phi/api/include/compat/ATen/core/ivalue.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,16 @@ class IValue {
bool is_custom_class() const { return tag_ == TypeTag::CustomClass; }
bool is_tuple() const { return tag_ == TypeTag::Tuple; }

bool isNone() const { return is_none(); }
bool isBool() const { return is_bool(); }
bool isInt() const { return is_int(); }
bool isDouble() const { return is_double(); }
bool isString() const { return is_string(); }
bool isList() const { return is_list(); }
bool isTensor() const { return is_tensor(); }
bool isCustomClass() const { return is_custom_class(); }
bool isTuple() const { return is_tuple(); }

bool to_bool() const {
if (!is_bool()) throw std::runtime_error("Not a bool");
return std::get<bool>(value_);
Expand Down Expand Up @@ -280,6 +290,39 @@ class IValue {
return static_cast<at::ScalarType>(std::get<int64_t>(value_));
}

bool toBool() const { return to_bool(); }
int64_t toInt() const { return to_int(); }
double toDouble() const { return to_double(); }
const std::string& toStringRef() const { return to_string(); }
std::string_view toStringView() const { return to_string_view(); }
at::Tensor toTensor() const { return to_tensor(); }
at::ScalarType toScalarType() const { return to_scalar_type(); }

std::string tagKind() const {
switch (tag_) {
case TypeTag::None:
return "None";
case TypeTag::Bool:
return "Bool";
case TypeTag::Int:
return "Int";
case TypeTag::Double:
return "Double";
case TypeTag::String:
return "String";
case TypeTag::Tensor:
return "Tensor";
case TypeTag::GenericList:
return "GenericList";
case TypeTag::CustomClass:
return "CustomClass";
case TypeTag::Tuple:
return "Tuple";
default:
return "InvalidTag";
}
}

template <typename T>
intrusive_ptr<T> to_custom_class() const {
if (!is_custom_class()) throw std::runtime_error("Not a custom class");
Expand Down Expand Up @@ -637,3 +680,7 @@ intrusive_ptr<T> generic_to(const IValue& ivalue,
}

} // namespace torch

namespace c10 {
using IValue = ::torch::IValue;
}
155 changes: 71 additions & 84 deletions paddle/phi/api/include/compat/ATen/ops/arange.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,50 +25,48 @@

namespace at {

inline at::Tensor arange(const at::Scalar& end,
at::TensorOptions options = {}) {
if (options.pinned_memory()) {
// Pinning memory is only supported for CPU tensors
if (options.has_device() && !options.device().is_cpu()) {
PD_THROW(
"pin_memory=true requires device to be CPU, but got non-CPU device");
}
phi::Place base_place = options._PD_GetPlace();
phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(base_place);
auto dense = paddle::experimental::arange(
paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
paddle::experimental::full(
{}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
phi::CPUPlace());
return dense.copy_to(pinned_place, /*blocking=*/true);
namespace detail {

inline bool _PD_IsIntegralArangeScalar(const at::Scalar& scalar) {
switch (scalar.dtype()) {
case phi::DataType::BOOL:
case phi::DataType::UINT8:
case phi::DataType::INT8:
case phi::DataType::UINT16:
case phi::DataType::INT16:
case phi::DataType::UINT32:
case phi::DataType::INT32:
case phi::DataType::UINT64:
case phi::DataType::INT64:
return true;
default:
return false;
}
return paddle::experimental::arange(
paddle::experimental::full({}, 0, phi::DataType::FLOAT64),
paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
options._PD_GetPlace());
}

inline at::Tensor arange(const at::Scalar& end,
::std::optional<at::ScalarType> dtype,
::std::optional<at::Layout> layout,
::std::optional<at::Device> device,
::std::optional<bool> pin_memory) {
auto options =
at::TensorOptions()
.dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
.layout(layout)
.device(device.value_or(at::kCPU))
.pinned_memory(pin_memory);
return arange(end, options);
inline at::ScalarType _PD_ResolveArangeDtype(const at::Scalar& start,
const at::Scalar& end,
const at::Scalar& step,
const at::TensorOptions& options) {
if (options.has_dtype()) {
return options.dtype().toScalarType();
}
if (_PD_IsIntegralArangeScalar(start) && _PD_IsIntegralArangeScalar(end) &&
_PD_IsIntegralArangeScalar(step)) {
return at::kLong;
}
return c10::get_default_dtype_as_scalartype();
}

} // namespace detail

inline at::Tensor arange(const at::Scalar& start,
const at::Scalar& end,
const at::Scalar& step,
at::TensorOptions options = {}) {
// Match PyTorch: step must be non-zero and consistent with (end - start).
at::native::arange_check_bounds(start, end, step);
auto dtype = detail::_PD_ResolveArangeDtype(start, end, step, options);
if (options.pinned_memory()) {
// Pinning memory is only supported for CPU tensors
if (options.has_device() && !options.device().is_cpu()) {
Expand All @@ -82,67 +80,57 @@ inline at::Tensor arange(const at::Scalar& start,
{}, start.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full(
{}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
paddle::experimental::full(
{}, step.to<double>(), phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(dtype),
phi::CPUPlace());
return dense.copy_to(pinned_place, /*blocking=*/true);
}
return paddle::experimental::arange(
paddle::experimental::full(
{}, start.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, 1, phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(dtype),
options._PD_GetPlace());
}

inline at::Tensor arange(const at::Scalar& start,
const at::Scalar& end,
inline at::Tensor arange(const at::Scalar& end,
at::TensorOptions options = {}) {
return arange(/*start=*/0, end, /*step=*/1, options);
}

inline at::Tensor arange(const at::Scalar& end,
::std::optional<at::ScalarType> dtype,
::std::optional<at::Layout> layout,
::std::optional<at::Device> device,
::std::optional<bool> pin_memory) {
auto options =
at::TensorOptions()
.dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
.layout(layout)
.device(device.value_or(at::kCPU))
.pinned_memory(pin_memory);
return arange(start, end, options);
auto options = at::TensorOptions()
.dtype(dtype)
.layout(layout)
.device(device)
.pinned_memory(pin_memory);
return arange(/*start=*/0, end, /*step=*/1, options);
}

inline at::Tensor arange(const at::Scalar& start,
const at::Scalar& end,
const at::Scalar& step,
at::TensorOptions options = {}) {
// Match PyTorch: step must be non-zero and consistent with (end - start).
at::native::arange_check_bounds(start, end, step);
if (options.pinned_memory()) {
// Pinning memory is only supported for CPU tensors
if (options.has_device() && !options.device().is_cpu()) {
PD_THROW(
"pin_memory=true requires device to be CPU, but got non-CPU device");
}
phi::Place base_place = options._PD_GetPlace();
phi::Place pinned_place = compat::_PD_GetCreatePinnedPlace(base_place);
auto dense = paddle::experimental::arange(
paddle::experimental::full(
{}, start.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full(
{}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full(
{}, step.to<double>(), phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
phi::CPUPlace());
return dense.copy_to(pinned_place, /*blocking=*/true);
}
return paddle::experimental::arange(
paddle::experimental::full(
{}, start.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, end.to<double>(), phi::DataType::FLOAT64),
paddle::experimental::full({}, step.to<double>(), phi::DataType::FLOAT64),
compat::_PD_AtenScalarTypeToPhiDataType(options.dtype()),
options._PD_GetPlace());
return arange(start, end, /*step=*/1, options);
}

inline at::Tensor arange(const at::Scalar& start,
const at::Scalar& end,
::std::optional<at::ScalarType> dtype,
::std::optional<at::Layout> layout,
::std::optional<at::Device> device,
::std::optional<bool> pin_memory) {
auto options = at::TensorOptions()
.dtype(dtype)
.layout(layout)
.device(device)
.pinned_memory(pin_memory);
return arange(start, end, /*step=*/1, options);
}

inline at::Tensor arange(const at::Scalar& start,
Expand All @@ -152,12 +140,11 @@ inline at::Tensor arange(const at::Scalar& start,
::std::optional<at::Layout> layout,
::std::optional<at::Device> device,
::std::optional<bool> pin_memory) {
auto options =
at::TensorOptions()
.dtype(dtype.value_or(c10::get_default_dtype_as_scalartype()))
.layout(layout)
.device(device.value_or(at::kCPU))
.pinned_memory(pin_memory);
auto options = at::TensorOptions()
.dtype(dtype)
.layout(layout)
.device(device)
.pinned_memory(pin_memory);
return arange(start, end, step, options);
}

Expand Down
6 changes: 6 additions & 0 deletions paddle/phi/api/include/compat/ATen/ops/equal.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
namespace at {

inline bool equal(const at::Tensor& self, const at::Tensor& other) {
PD_CHECK(self.defined(),
"Expected a proper Tensor but got None (or an undefined Tensor in "
"C++)");
PD_CHECK(other.defined(),
"Expected a proper Tensor but got None (or an undefined Tensor in "
"C++)");
PD_CHECK(self.device() == other.device(),
"Cannot compare two tensors on "
"different devices. Got: ",
Expand Down
18 changes: 0 additions & 18 deletions paddle/phi/api/include/compat/ATen/ops/record_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,4 @@ inline void Tensor::record_stream(at::Stream s) const {
#endif
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
inline void Tensor::record_stream(at::cuda::CUDAStream s) const {
record_stream(static_cast<at::Stream>(s));
}

// TODO(youge325): Remove after DeepEP paddle branch is updated to use
// at::Stream
inline void Tensor::record_stream(cudaStream_t s) const {
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor_.impl());
PD_CHECK(dense_tensor != nullptr,
"record_stream only supports DenseTensor, but got a non-dense "
"tensor implementation.");
PD_CHECK(dense_tensor->place().GetType() != phi::AllocationType::CPU,
"record_stream is not supported for CPU tensors.");
paddle::memory::RecordStream(dense_tensor->Holder(), s);
}
#endif
} // namespace at
Comment on lines 49 to 52
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the CUDAStream/cudaStream_t record_stream overloads removed from this header, the conditional #include <c10/cuda/CUDAStream.h> at the top of this file appears to be unused now. Consider dropping that include to reduce unnecessary dependencies/compile time.

Copilot uses AI. Check for mistakes.
26 changes: 22 additions & 4 deletions paddle/phi/api/include/compat/ATen/ops/resize.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,31 @@

namespace at {

// resize_ - in-place resize using reshape
// resize_ - use reshape for same-numel cases and set_ for storage-changing
// cases so repeated resize_ calls stay stable.
inline const at::Tensor& Tensor::resize_(
at::IntArrayRef size,
::std::optional<at::MemoryFormat> memory_format) const {
auto result =
paddle::experimental::reshape(tensor_, size._PD_ToPaddleIntArray());
const_cast<Tensor*>(this)->tensor_ = result;
if (memory_format.has_value()) {
TORCH_CHECK(*memory_format == at::MemoryFormat::Contiguous,
"resize_ only supports contiguous memory format, but got ",
static_cast<int>(*memory_format));
}

std::vector<int64_t> dims(size.begin(), size.end());
int64_t new_numel = 1;
for (auto dim : dims) {
new_numel *= dim;
}

if (tensor_.numel() == new_numel) {
const_cast<Tensor*>(this)->tensor_ =
paddle::experimental::reshape(tensor_, phi::IntArray(dims));
return *this;
}

auto source = tensor_.copy_to(tensor_.place(), /*blocking=*/true);
paddle::experimental::set_(const_cast<Tensor*>(this)->tensor_, source, dims);
return *this;
}

Expand Down
28 changes: 25 additions & 3 deletions paddle/phi/api/include/compat/ATen/ops/select.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,35 @@
namespace at {

inline at::Tensor select(const at::Tensor& self, int64_t dim, int64_t index) {
// Normalize dim to positive value for error messages
int64_t orig_dim = dim;
if (dim < 0) {
dim += self.dim();
}
// Handle negative indexing
// Check dim is valid
if (dim < 0 || dim >= self.dim()) {
PD_CHECK(false,
"select(): index ",
orig_dim,
" out of range for tensor of size ",
self.sizes(),
" at dimension ",
orig_dim);
}
// Handle negative index
int64_t orig_index = index;
if (index < 0) {
int64_t dim_size = self.size(dim);
index = dim_size + index;
index = self.size(dim) + index;
}
// Check index is valid
if (index < 0 || index >= self.size(dim)) {
PD_CHECK(false,
"select(): index ",
orig_index,
" out of range for tensor of size ",
self.sizes(),
" at dimension ",
orig_dim < 0 ? orig_dim + self.dim() : orig_dim);
}

return Tensor(
Expand Down
Loading
Loading