Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions paddle/phi/api/include/compat/ATen/core/TensorBody.h
Original file line number Diff line number Diff line change
Expand Up @@ -689,9 +689,6 @@ class Tensor : public TensorBase {
void record_stream(at::Stream s) const;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void record_stream(at::cuda::CUDAStream s) const;
// TODO(youge325): Remove after DeepEP paddle branch is updated to use
// at::Stream
void record_stream(cudaStream_t s) const;
#endif

Tensor var(int dim) const { return var(at::IntArrayRef{dim}, true, false); }
Expand Down
16 changes: 2 additions & 14 deletions paddle/phi/api/include/compat/ATen/ops/record_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,8 @@ inline void Tensor::record_stream(at::Stream s) const {

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
inline void Tensor::record_stream(at::cuda::CUDAStream s) const {
record_stream(static_cast<at::Stream>(s));
}

// TODO(youge325): Remove after DeepEP paddle branch is updated to use
// at::Stream
inline void Tensor::record_stream(cudaStream_t s) const {
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(tensor_.impl());
PD_CHECK(dense_tensor != nullptr,
"record_stream only supports DenseTensor, but got a non-dense "
"tensor implementation.");
PD_CHECK(dense_tensor->place().GetType() != phi::AllocationType::CPU,
"record_stream is not supported for CPU tensors.");
paddle::memory::RecordStream(dense_tensor->Holder(), s);
record_stream(s.unwrap());
}
#endif

} // namespace at
11 changes: 0 additions & 11 deletions paddle/phi/api/include/compat/c10/core/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,6 @@ struct Event final {
void record(const c10::cuda::CUDAStream& stream) { record(stream.unwrap()); }
#endif

#ifdef PADDLE_WITH_CUDA
// TODO(youge325): Remove after DeepEP paddle branch is updated to use
// c10::Stream
void record(const cudaStream_t& stream) {
TORCH_CHECK(
device_type_ == DeviceType::CUDA,
"Raw cudaStream_t recording is only supported for CUDA events.");
RecordBackendEvent(stream, phi::backends::gpu::GetCurrentDeviceId());
}
#endif

void block(const Stream& stream) const {
if (!was_marked_for_recording_) {
return;
Expand Down
4 changes: 4 additions & 0 deletions paddle/phi/api/include/compat/c10/core/Stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,7 @@ struct hash<c10::Stream> {
}
};
} // namespace std

namespace at {
using c10::Stream;
}
Comment on lines +108 to +111
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding namespace at { using c10::Stream; } here conflicts with an existing at::Stream definition in paddle/phi/api/include/compat/ATen/core/TensorBody.h (currently using Stream = c10::Stream;). Since TensorBody.h includes <c10/core/Stream.h>, this will trigger a redefinition error when compiling any TU that includes ATen/core/TensorBody.h.

To fix: keep a single canonical at::Stream definition (either remove the Stream alias from TensorBody.h, or drop this new at::Stream export and rely on the existing one).

Suggested change
namespace at {
using c10::Stream;
}

Copilot uses AI. Check for mistakes.
3 changes: 0 additions & 3 deletions paddle/phi/api/include/compat/c10/cuda/CUDAStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ class CUDAStream {

Device device() const { return Device(DeviceType::CUDA, device_index()); }

cudaStream_t raw_stream() const { return stream(); }

struct c10::StreamData3 pack3() const {
return stream_.pack3();
}
Expand Down Expand Up @@ -219,7 +217,6 @@ inline CUDAStream getStreamFromPool(const int priority,
});

cudaStream_t raw;

// Keep parity with PyTorch API shape: negative priority selects the
// high-priority pool, non-negative selects the low-priority pool.
if (priority < 0) {
Expand Down
9 changes: 0 additions & 9 deletions test/cpp/compat/ATen_record_stream_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ using RecordCudaStreamMethod = void (at::Tensor::*)(at::cuda::CUDAStream) const;
[[maybe_unused]] static RecordCudaStreamMethod g_record_cuda_stream_method =
&at::Tensor::record_stream;

using RecordRawCudaStreamMethod = void (at::Tensor::*)(cudaStream_t) const;
[[maybe_unused]] static RecordRawCudaStreamMethod
g_record_raw_cuda_stream_method = &at::Tensor::record_stream;

TEST_F(RecordStreamTest, CudaTensorCurrentCudaStream) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
auto stream = at::cuda::getCurrentCUDAStream();
Expand All @@ -69,11 +65,6 @@ TEST_F(RecordStreamTest, CudaTensorDefaultCudaStream) {
EXPECT_NO_THROW(cuda_tensor.record_stream(default_stream));
}

TEST_F(RecordStreamTest, CudaTensorRawCudaStream) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
auto stream = at::cuda::getCurrentCUDAStream();
EXPECT_NO_THROW(cuda_tensor.record_stream(stream.raw_stream()));
}
#endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP

// --- Error path: CPU tensor + CPU stream (record_stream does not support CPU
Expand Down
17 changes: 0 additions & 17 deletions test/cpp/compat/c10_Event_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,6 @@ TEST(EventTest, CpuEventRecordThrows) {
EXPECT_THROW(event.recordOnce(stream), std::exception);
}

#ifdef PADDLE_WITH_CUDA
using RawEventRecordMethod = void (c10::Event::*)(const cudaStream_t&);
[[maybe_unused]] static RawEventRecordMethod g_raw_event_record_method =
&c10::Event::record;
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(EventTest, CudaEventLazyCreateAndRecord) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
Expand Down Expand Up @@ -93,17 +87,6 @@ TEST(EventTest, CudaEventElapsedTimeWithTimingEnabled) {
EXPECT_GE(elapsed_ms, 0.0);
}

#ifdef PADDLE_WITH_CUDA
TEST(EventTest, CudaEventRawStreamRecordCompatibility) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
auto stream = c10::cuda::getCurrentCUDAStream();
c10::Event event(c10::DeviceType::CUDA);
EXPECT_NO_THROW(event.record(stream.raw_stream()));
EXPECT_EQ(event.device_index(), stream.device_index());
EXPECT_TRUE(event.was_marked_for_recording());
}
#endif

TEST(EventTest, CudaEventRejectsDifferentDeviceRecord) {
SKIP_IF_CUDA_RUNTIME_UNAVAILABLE();
if (c10::cuda::device_count() < 2) {
Expand Down
Loading