From 9571fecee4600bfea0afa1c87bac511b7da5ba61 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Thu, 14 Aug 2025 13:38:20 +0200 Subject: [PATCH 01/35] [SYCL] Add structure to pass references to ranges This allows to have single entry point for all dimentions and keep reference to user-provided range data, not copy them. --- .../ext/oneapi/experimental/rangesref.hpp | 56 +++++++++ sycl/include/sycl/nd_range.hpp | 6 + sycl/unittests/scheduler/CMakeLists.txt | 1 + sycl/unittests/scheduler/RangesRefUsage.cpp | 112 ++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp create mode 100644 sycl/unittests/scheduler/RangesRefUsage.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp new file mode 100644 index 0000000000000..fa0389dd7acfc --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp @@ -0,0 +1,56 @@ +//==-------- rangesref.hpp --- SYCL iteration with reference to ranges --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +// The structure to keep references to ranges and dimension unified for +// all dimensions. +class RangesRefT { + +public: + RangesRefT() = default; + RangesRefT(const RangesRefT &Desc) = default; + RangesRefT(RangesRefT &&Desc) = default; + + template + RangesRefT(sycl::range &GlobalSizes, sycl::range &LocalSizes) + : GlobalSize(&(GlobalSizes[0])), + LocalSize(&(LocalSizes[0])), + Dims{size_t(Dims_)} {} + + // to support usage in sycl::ext::oneapi::experimental::submit_with_event() + template + RangesRefT(sycl::nd_range &ExecutionRange) + : GlobalSize(&ExecutionRange.globalSize[0]), + LocalSize(&ExecutionRange.localSize[0]), + GlobalOffset(&ExecutionRange.offset[0]), + Dims{size_t(Dims_)} {} + + template + RangesRefT(sycl::range &Range) + : GlobalSize(&(Range[0])), + Dims{size_t(Dims_)} {} + + RangesRefT &operator=(const RangesRefT &Desc) = default; + RangesRefT &operator=(RangesRefT &&Desc) = default; + + const size_t *GlobalSize = nullptr; + const size_t *LocalSize = nullptr; + const size_t *GlobalOffset = nullptr; + const size_t Dims = 0; +}; + +} // namespace ext::oneapi::experimental +} // inline namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/nd_range.hpp b/sycl/include/sycl/nd_range.hpp index e4ff4881be17a..71a550679a49a 100644 --- a/sycl/include/sycl/nd_range.hpp +++ b/sycl/include/sycl/nd_range.hpp @@ -15,6 +15,10 @@ namespace sycl { inline namespace _V1 { +namespace ext::oneapi::experimental { + class RangesRefT; +} + /// Defines the iteration domain of both the work-groups and the overall /// dispatch. /// @@ -65,6 +69,8 @@ template class nd_range { bool operator!=(const nd_range &rhs) const { return !(*this == rhs); } + + friend class sycl::_V1::ext::oneapi::experimental::RangesRefT; }; } // namespace _V1 diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index afc0e185eb7c0..bc2a2e4c2f420 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -22,4 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT AccessorDefaultCtor.cpp HostTaskAndBarrier.cpp BarrierDependencies.cpp + RangesRefUsage.cpp ) diff --git a/sycl/unittests/scheduler/RangesRefUsage.cpp b/sycl/unittests/scheduler/RangesRefUsage.cpp new file mode 100644 index 0000000000000..5bf1442a9c8fa --- /dev/null +++ b/sycl/unittests/scheduler/RangesRefUsage.cpp @@ -0,0 +1,112 @@ +//==---- RangesRefUsage.cpp --- Check RangesRefT --------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include + +#include + +TEST(RangesRefUsage, RangesRefUsage) { + sycl::ext::oneapi::experimental::RangesRefT r0; + ASSERT_EQ(r0.Dims, size_t{0}); + + { + sycl::range<1> global_range{1024}; + sycl::range<1> local_range{64}; + sycl::id<1> offset{10}; + sycl::nd_range<1> nd_range{global_range, local_range, offset}; + + { + sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + ASSERT_EQ(r.Dims, size_t{1}); + ASSERT_EQ(*r.GlobalSize, global_range[0]); + ASSERT_EQ(*r.LocalSize, local_range[0]); + ASSERT_EQ(*r.GlobalOffset, offset[0]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + ASSERT_EQ(r.Dims, size_t{1}); + ASSERT_EQ(*r.GlobalSize, global_range[0]); + ASSERT_EQ(*r.LocalSize, local_range[0]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + ASSERT_EQ(r.Dims, size_t{1}); + ASSERT_EQ(*r.GlobalSize, global_range[0]); + ASSERT_EQ(r.LocalSize, nullptr); + } + } + { + sycl::range<2> global_range{1024, 512}; + sycl::range<2> local_range{64, 32}; + sycl::id<2> offset{10, 20}; + sycl::nd_range<2> nd_range{global_range, local_range, offset}; + + { + sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + ASSERT_EQ(r.Dims, size_t{2}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.LocalSize[0], local_range[0]); + ASSERT_EQ(r.LocalSize[1], local_range[1]); + ASSERT_EQ(r.GlobalOffset[0], offset[0]); + ASSERT_EQ(r.GlobalOffset[1], offset[1]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + ASSERT_EQ(r.Dims, size_t{2}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.LocalSize[0], local_range[0]); + ASSERT_EQ(r.LocalSize[1], local_range[1]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + ASSERT_EQ(r.Dims, size_t{2}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.LocalSize, nullptr); + } + } + { + sycl::range<3> global_range{1024, 512, 256}; + sycl::range<3> local_range{64, 32, 16}; + sycl::id<3> offset{10, 20, 30}; + sycl::nd_range<3> nd_range{global_range, local_range, offset}; + + { + sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + ASSERT_EQ(r.Dims, size_t{3}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.GlobalSize[2], global_range[2]); + ASSERT_EQ(r.LocalSize[0], local_range[0]); + ASSERT_EQ(r.LocalSize[1], local_range[1]); + ASSERT_EQ(r.LocalSize[2], local_range[2]); + ASSERT_EQ(r.GlobalOffset[0], offset[0]); + ASSERT_EQ(r.GlobalOffset[1], offset[1]); + ASSERT_EQ(r.GlobalOffset[2], offset[2]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + ASSERT_EQ(r.Dims, size_t{3}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.GlobalSize[2], global_range[2]); + ASSERT_EQ(r.LocalSize[0], local_range[0]); + ASSERT_EQ(r.LocalSize[1], local_range[1]); + ASSERT_EQ(r.LocalSize[2], local_range[2]); + } + { + sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + ASSERT_EQ(r.Dims, size_t{3}); + ASSERT_EQ(r.GlobalSize[0], global_range[0]); + ASSERT_EQ(r.GlobalSize[1], global_range[1]); + ASSERT_EQ(r.GlobalSize[2], global_range[2]); + ASSERT_EQ(r.LocalSize, nullptr); + } + } +} From 2521805710421b97ade5f8087cc2dfac071a0f07 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Thu, 14 Aug 2025 13:55:09 +0200 Subject: [PATCH 02/35] Fix formatting. --- .../sycl/ext/oneapi/experimental/rangesref.hpp | 17 +++++++---------- sycl/include/sycl/nd_range.hpp | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp index fa0389dd7acfc..d16d6a0ad71b6 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp @@ -1,4 +1,4 @@ -//==-------- rangesref.hpp --- SYCL iteration with reference to ranges --------==// +//==-------- rangesref.hpp --- SYCL iteration with reference to ranges -----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,22 +25,19 @@ class RangesRefT { template RangesRefT(sycl::range &GlobalSizes, sycl::range &LocalSizes) - : GlobalSize(&(GlobalSizes[0])), - LocalSize(&(LocalSizes[0])), - Dims{size_t(Dims_)} {} + : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), + Dims{size_t(Dims_)} {} // to support usage in sycl::ext::oneapi::experimental::submit_with_event() template RangesRefT(sycl::nd_range &ExecutionRange) : GlobalSize(&ExecutionRange.globalSize[0]), - LocalSize(&ExecutionRange.localSize[0]), - GlobalOffset(&ExecutionRange.offset[0]), - Dims{size_t(Dims_)} {} + LocalSize(&ExecutionRange.localSize[0]), + GlobalOffset(&ExecutionRange.offset[0]), Dims{size_t(Dims_)} {} template RangesRefT(sycl::range &Range) - : GlobalSize(&(Range[0])), - Dims{size_t(Dims_)} {} + : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} RangesRefT &operator=(const RangesRefT &Desc) = default; RangesRefT &operator=(RangesRefT &&Desc) = default; @@ -52,5 +49,5 @@ class RangesRefT { }; } // namespace ext::oneapi::experimental -} // inline namespace _V1 +} // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/nd_range.hpp b/sycl/include/sycl/nd_range.hpp index 71a550679a49a..34c81fa1620e9 100644 --- a/sycl/include/sycl/nd_range.hpp +++ b/sycl/include/sycl/nd_range.hpp @@ -16,7 +16,7 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { - class RangesRefT; +class RangesRefT; } /// Defines the iteration domain of both the work-groups and the overall From 1a90e4822c6a84bcdf68fc1b4092cfc6df64d275 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Thu, 14 Aug 2025 15:35:37 +0200 Subject: [PATCH 03/35] Fix const. --- sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp index d16d6a0ad71b6..65ad2b8934afa 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp @@ -45,7 +45,7 @@ class RangesRefT { const size_t *GlobalSize = nullptr; const size_t *LocalSize = nullptr; const size_t *GlobalOffset = nullptr; - const size_t Dims = 0; + size_t Dims = 0; }; } // namespace ext::oneapi::experimental From 38945f3c0434e14a736bdcc80f1bc179e61a2679 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Mon, 18 Aug 2025 13:54:41 +0200 Subject: [PATCH 04/35] Rename RangesRefT to ranges_ref_view and move to sycl::detail namespace. --- .../ranges_ref_view.hpp} | 26 +++++++++---------- sycl/include/sycl/nd_range.hpp | 6 ++--- sycl/unittests/scheduler/CMakeLists.txt | 2 +- ...gesRefUsage.cpp => RangesRefViewUsage.cpp} | 24 ++++++++--------- 4 files changed, 29 insertions(+), 29 deletions(-) rename sycl/include/sycl/{ext/oneapi/experimental/rangesref.hpp => detail/ranges_ref_view.hpp} (61%) rename sycl/unittests/scheduler/{RangesRefUsage.cpp => RangesRefViewUsage.cpp} (80%) diff --git a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp b/sycl/include/sycl/detail/ranges_ref_view.hpp similarity index 61% rename from sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp rename to sycl/include/sycl/detail/ranges_ref_view.hpp index 65ad2b8934afa..5f0eccbcc2fed 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/rangesref.hpp +++ b/sycl/include/sycl/detail/ranges_ref_view.hpp @@ -1,4 +1,4 @@ -//==-------- rangesref.hpp --- SYCL iteration with reference to ranges -----==// +//==---- ranges_ref_view.hpp --- SYCL iteration with reference to ranges ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,35 +12,35 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::experimental { +namespace detail { -// The structure to keep references to ranges and dimension unified for +// The structure to keep dimension and references to ranges unified for // all dimensions. -class RangesRefT { +class ranges_ref_view { public: - RangesRefT() = default; - RangesRefT(const RangesRefT &Desc) = default; - RangesRefT(RangesRefT &&Desc) = default; + ranges_ref_view() = default; + ranges_ref_view(const ranges_ref_view &Desc) = default; + ranges_ref_view(ranges_ref_view &&Desc) = default; template - RangesRefT(sycl::range &GlobalSizes, sycl::range &LocalSizes) + ranges_ref_view(sycl::range &GlobalSizes, sycl::range &LocalSizes) : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), Dims{size_t(Dims_)} {} // to support usage in sycl::ext::oneapi::experimental::submit_with_event() template - RangesRefT(sycl::nd_range &ExecutionRange) + ranges_ref_view(sycl::nd_range &ExecutionRange) : GlobalSize(&ExecutionRange.globalSize[0]), LocalSize(&ExecutionRange.localSize[0]), GlobalOffset(&ExecutionRange.offset[0]), Dims{size_t(Dims_)} {} template - RangesRefT(sycl::range &Range) + ranges_ref_view(sycl::range &Range) : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} - RangesRefT &operator=(const RangesRefT &Desc) = default; - RangesRefT &operator=(RangesRefT &&Desc) = default; + ranges_ref_view &operator=(const ranges_ref_view &Desc) = default; + ranges_ref_view &operator=(ranges_ref_view &&Desc) = default; const size_t *GlobalSize = nullptr; const size_t *LocalSize = nullptr; @@ -48,6 +48,6 @@ class RangesRefT { size_t Dims = 0; }; -} // namespace ext::oneapi::experimental +} // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/nd_range.hpp b/sycl/include/sycl/nd_range.hpp index 34c81fa1620e9..4ba352910b522 100644 --- a/sycl/include/sycl/nd_range.hpp +++ b/sycl/include/sycl/nd_range.hpp @@ -15,8 +15,8 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::experimental { -class RangesRefT; +namespace detail { +class ranges_ref_view; } /// Defines the iteration domain of both the work-groups and the overall @@ -70,7 +70,7 @@ template class nd_range { return !(*this == rhs); } - friend class sycl::_V1::ext::oneapi::experimental::RangesRefT; + friend class sycl::_V1::detail::ranges_ref_view; }; } // namespace _V1 diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index bc2a2e4c2f420..3d44f1d81a57e 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -22,5 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT AccessorDefaultCtor.cpp HostTaskAndBarrier.cpp BarrierDependencies.cpp - RangesRefUsage.cpp + RangesRefViewUsage.cpp ) diff --git a/sycl/unittests/scheduler/RangesRefUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp similarity index 80% rename from sycl/unittests/scheduler/RangesRefUsage.cpp rename to sycl/unittests/scheduler/RangesRefViewUsage.cpp index 5bf1442a9c8fa..a705e0f01f4eb 100644 --- a/sycl/unittests/scheduler/RangesRefUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -1,16 +1,16 @@ -//==---- RangesRefUsage.cpp --- Check RangesRefT --------------------------==// +//==---- RangesRefViewUsage.cpp --- Check ranges_ref_view ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include TEST(RangesRefUsage, RangesRefUsage) { - sycl::ext::oneapi::experimental::RangesRefT r0; + sycl::detail::ranges_ref_view r0; ASSERT_EQ(r0.Dims, size_t{0}); { @@ -20,20 +20,20 @@ TEST(RangesRefUsage, RangesRefUsage) { sycl::nd_range<1> nd_range{global_range, local_range, offset}; { - sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + sycl::detail::ranges_ref_view r{nd_range}; ASSERT_EQ(r.Dims, size_t{1}); ASSERT_EQ(*r.GlobalSize, global_range[0]); ASSERT_EQ(*r.LocalSize, local_range[0]); ASSERT_EQ(*r.GlobalOffset, offset[0]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + sycl::detail::ranges_ref_view r{global_range, local_range}; ASSERT_EQ(r.Dims, size_t{1}); ASSERT_EQ(*r.GlobalSize, global_range[0]); ASSERT_EQ(*r.LocalSize, local_range[0]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + sycl::detail::ranges_ref_view r{global_range}; ASSERT_EQ(r.Dims, size_t{1}); ASSERT_EQ(*r.GlobalSize, global_range[0]); ASSERT_EQ(r.LocalSize, nullptr); @@ -46,7 +46,7 @@ TEST(RangesRefUsage, RangesRefUsage) { sycl::nd_range<2> nd_range{global_range, local_range, offset}; { - sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + sycl::detail::ranges_ref_view r{nd_range}; ASSERT_EQ(r.Dims, size_t{2}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); @@ -56,7 +56,7 @@ TEST(RangesRefUsage, RangesRefUsage) { ASSERT_EQ(r.GlobalOffset[1], offset[1]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + sycl::detail::ranges_ref_view r{global_range, local_range}; ASSERT_EQ(r.Dims, size_t{2}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); @@ -64,7 +64,7 @@ TEST(RangesRefUsage, RangesRefUsage) { ASSERT_EQ(r.LocalSize[1], local_range[1]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + sycl::detail::ranges_ref_view r{global_range}; ASSERT_EQ(r.Dims, size_t{2}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); @@ -78,7 +78,7 @@ TEST(RangesRefUsage, RangesRefUsage) { sycl::nd_range<3> nd_range{global_range, local_range, offset}; { - sycl::ext::oneapi::experimental::RangesRefT r{nd_range}; + sycl::detail::ranges_ref_view r{nd_range}; ASSERT_EQ(r.Dims, size_t{3}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); @@ -91,7 +91,7 @@ TEST(RangesRefUsage, RangesRefUsage) { ASSERT_EQ(r.GlobalOffset[2], offset[2]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range, local_range}; + sycl::detail::ranges_ref_view r{global_range, local_range}; ASSERT_EQ(r.Dims, size_t{3}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); @@ -101,7 +101,7 @@ TEST(RangesRefUsage, RangesRefUsage) { ASSERT_EQ(r.LocalSize[2], local_range[2]); } { - sycl::ext::oneapi::experimental::RangesRefT r{global_range}; + sycl::detail::ranges_ref_view r{global_range}; ASSERT_EQ(r.Dims, size_t{3}); ASSERT_EQ(r.GlobalSize[0], global_range[0]); ASSERT_EQ(r.GlobalSize[1], global_range[1]); From d4c267e5fa38c3e5d448413200e10d72feecfe93 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Mon, 18 Aug 2025 14:04:41 +0200 Subject: [PATCH 05/35] Fix formatting. --- sycl/include/sycl/detail/ranges_ref_view.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/ranges_ref_view.hpp b/sycl/include/sycl/detail/ranges_ref_view.hpp index 5f0eccbcc2fed..661c62c4ac10c 100644 --- a/sycl/include/sycl/detail/ranges_ref_view.hpp +++ b/sycl/include/sycl/detail/ranges_ref_view.hpp @@ -24,7 +24,8 @@ class ranges_ref_view { ranges_ref_view(ranges_ref_view &&Desc) = default; template - ranges_ref_view(sycl::range &GlobalSizes, sycl::range &LocalSizes) + ranges_ref_view(sycl::range &GlobalSizes, + sycl::range &LocalSizes) : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), Dims{size_t(Dims_)} {} From 0d76be90a0489a758aa3be6aef43410b0c906f2a Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 19 Aug 2025 18:19:08 +0200 Subject: [PATCH 06/35] Move assignment near ctors. --- sycl/include/sycl/detail/ranges_ref_view.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/detail/ranges_ref_view.hpp b/sycl/include/sycl/detail/ranges_ref_view.hpp index 661c62c4ac10c..e81a989b2ed16 100644 --- a/sycl/include/sycl/detail/ranges_ref_view.hpp +++ b/sycl/include/sycl/detail/ranges_ref_view.hpp @@ -22,6 +22,8 @@ class ranges_ref_view { ranges_ref_view() = default; ranges_ref_view(const ranges_ref_view &Desc) = default; ranges_ref_view(ranges_ref_view &&Desc) = default; + ranges_ref_view &operator=(const ranges_ref_view &Desc) = default; + ranges_ref_view &operator=(ranges_ref_view &&Desc) = default; template ranges_ref_view(sycl::range &GlobalSizes, @@ -40,9 +42,6 @@ class ranges_ref_view { ranges_ref_view(sycl::range &Range) : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} - ranges_ref_view &operator=(const ranges_ref_view &Desc) = default; - ranges_ref_view &operator=(ranges_ref_view &&Desc) = default; - const size_t *GlobalSize = nullptr; const size_t *LocalSize = nullptr; const size_t *GlobalOffset = nullptr; From 858bf21676caf7a04aed08e849e7ae2518b949ec Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 19 Aug 2025 18:25:16 +0200 Subject: [PATCH 07/35] Decrease dumpilcation is the test. --- .../scheduler/RangesRefViewUsage.cpp | 124 +++++------------- 1 file changed, 33 insertions(+), 91 deletions(-) diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp index a705e0f01f4eb..85cb4c505e1cb 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -9,104 +9,46 @@ #include -TEST(RangesRefUsage, RangesRefUsage) { - sycl::detail::ranges_ref_view r0; - ASSERT_EQ(r0.Dims, size_t{0}); - +template +void TestNDRangesRefView(sycl::range global, sycl::range local, + sycl::id offset) { { - sycl::range<1> global_range{1024}; - sycl::range<1> local_range{64}; - sycl::id<1> offset{10}; - sycl::nd_range<1> nd_range{global_range, local_range, offset}; - - { - sycl::detail::ranges_ref_view r{nd_range}; - ASSERT_EQ(r.Dims, size_t{1}); - ASSERT_EQ(*r.GlobalSize, global_range[0]); - ASSERT_EQ(*r.LocalSize, local_range[0]); - ASSERT_EQ(*r.GlobalOffset, offset[0]); - } - { - sycl::detail::ranges_ref_view r{global_range, local_range}; - ASSERT_EQ(r.Dims, size_t{1}); - ASSERT_EQ(*r.GlobalSize, global_range[0]); - ASSERT_EQ(*r.LocalSize, local_range[0]); - } - { - sycl::detail::ranges_ref_view r{global_range}; - ASSERT_EQ(r.Dims, size_t{1}); - ASSERT_EQ(*r.GlobalSize, global_range[0]); - ASSERT_EQ(r.LocalSize, nullptr); + sycl::nd_range nd_range{global, local, offset}; + sycl::detail::ranges_ref_view r{nd_range}; + ASSERT_EQ(r.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.GlobalSize[d], global[d]); + ASSERT_EQ(r.LocalSize[d], local[d]); + ASSERT_EQ(r.GlobalOffset[d], offset[d]); } } { - sycl::range<2> global_range{1024, 512}; - sycl::range<2> local_range{64, 32}; - sycl::id<2> offset{10, 20}; - sycl::nd_range<2> nd_range{global_range, local_range, offset}; - - { - sycl::detail::ranges_ref_view r{nd_range}; - ASSERT_EQ(r.Dims, size_t{2}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.LocalSize[0], local_range[0]); - ASSERT_EQ(r.LocalSize[1], local_range[1]); - ASSERT_EQ(r.GlobalOffset[0], offset[0]); - ASSERT_EQ(r.GlobalOffset[1], offset[1]); - } - { - sycl::detail::ranges_ref_view r{global_range, local_range}; - ASSERT_EQ(r.Dims, size_t{2}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.LocalSize[0], local_range[0]); - ASSERT_EQ(r.LocalSize[1], local_range[1]); - } - { - sycl::detail::ranges_ref_view r{global_range}; - ASSERT_EQ(r.Dims, size_t{2}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.LocalSize, nullptr); + sycl::detail::ranges_ref_view r{global, local}; + ASSERT_EQ(r.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.GlobalSize[d], global[d]); + ASSERT_EQ(r.LocalSize[d], local[d]); } + ASSERT_EQ(r.GlobalOffset, nullptr); } { - sycl::range<3> global_range{1024, 512, 256}; - sycl::range<3> local_range{64, 32, 16}; - sycl::id<3> offset{10, 20, 30}; - sycl::nd_range<3> nd_range{global_range, local_range, offset}; - - { - sycl::detail::ranges_ref_view r{nd_range}; - ASSERT_EQ(r.Dims, size_t{3}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.GlobalSize[2], global_range[2]); - ASSERT_EQ(r.LocalSize[0], local_range[0]); - ASSERT_EQ(r.LocalSize[1], local_range[1]); - ASSERT_EQ(r.LocalSize[2], local_range[2]); - ASSERT_EQ(r.GlobalOffset[0], offset[0]); - ASSERT_EQ(r.GlobalOffset[1], offset[1]); - ASSERT_EQ(r.GlobalOffset[2], offset[2]); - } - { - sycl::detail::ranges_ref_view r{global_range, local_range}; - ASSERT_EQ(r.Dims, size_t{3}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.GlobalSize[2], global_range[2]); - ASSERT_EQ(r.LocalSize[0], local_range[0]); - ASSERT_EQ(r.LocalSize[1], local_range[1]); - ASSERT_EQ(r.LocalSize[2], local_range[2]); - } - { - sycl::detail::ranges_ref_view r{global_range}; - ASSERT_EQ(r.Dims, size_t{3}); - ASSERT_EQ(r.GlobalSize[0], global_range[0]); - ASSERT_EQ(r.GlobalSize[1], global_range[1]); - ASSERT_EQ(r.GlobalSize[2], global_range[2]); - ASSERT_EQ(r.LocalSize, nullptr); + sycl::detail::ranges_ref_view r{global}; + ASSERT_EQ(r.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.GlobalSize[d], global[d]); } + ASSERT_EQ(r.LocalSize, nullptr); + ASSERT_EQ(r.GlobalOffset, nullptr); } } + + +TEST(RangesRefUsage, RangesRefUsage) { + TestNDRangesRefView(sycl::range<1>{1024}, sycl::range<1>{64}, + sycl::id<1>{10}); + TestNDRangesRefView( + sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, sycl::id<2>{10, 5}); + TestNDRangesRefView( + sycl::range<3>{1024, 512, 256}, sycl::range<3>{64, 32, 16}, + sycl::id<3>{10, 5, 2}); +} From 95beb6a98c821ab47c3d6947dd7a90beeb3177d0 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 19 Aug 2025 18:30:59 +0200 Subject: [PATCH 08/35] Fix formatting. --- sycl/unittests/scheduler/RangesRefViewUsage.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp index 85cb4c505e1cb..6f18a26915af3 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -45,10 +45,9 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, TEST(RangesRefUsage, RangesRefUsage) { TestNDRangesRefView(sycl::range<1>{1024}, sycl::range<1>{64}, - sycl::id<1>{10}); - TestNDRangesRefView( - sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, sycl::id<2>{10, 5}); - TestNDRangesRefView( - sycl::range<3>{1024, 512, 256}, sycl::range<3>{64, 32, 16}, - sycl::id<3>{10, 5, 2}); + sycl::id<1>{10}); + TestNDRangesRefView(sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, + sycl::id<2>{10, 5}); + TestNDRangesRefView(sycl::range<3>{1024, 512, 256}, + sycl::range<3>{64, 32, 16}, sycl::id<3>{10, 5, 2}); } From f2e347dae4d491213115d2c6fb6d1616f6707688 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Tue, 19 Aug 2025 18:52:29 +0200 Subject: [PATCH 09/35] Fix formatting. --- sycl/unittests/scheduler/RangesRefViewUsage.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp index 6f18a26915af3..37d82511dadff 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -42,7 +42,6 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, } } - TEST(RangesRefUsage, RangesRefUsage) { TestNDRangesRefView(sycl::range<1>{1024}, sycl::range<1>{64}, sycl::id<1>{10}); From 71e2eee620f6cd446c13aa1c244de72b1cbcee46 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Fri, 22 Aug 2025 16:51:40 +0200 Subject: [PATCH 10/35] Add export to sycl::detail::NDRDescT. --- sycl/include/sycl/detail/ranges_ref_view.hpp | 4 +++ sycl/source/detail/queue_impl.cpp | 19 ++++++++++ .../scheduler/RangesRefViewUsage.cpp | 36 +++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/sycl/include/sycl/detail/ranges_ref_view.hpp b/sycl/include/sycl/detail/ranges_ref_view.hpp index e81a989b2ed16..5d6c5eb7a24d5 100644 --- a/sycl/include/sycl/detail/ranges_ref_view.hpp +++ b/sycl/include/sycl/detail/ranges_ref_view.hpp @@ -14,6 +14,8 @@ namespace sycl { inline namespace _V1 { namespace detail { +class NDRDescT; + // The structure to keep dimension and references to ranges unified for // all dimensions. class ranges_ref_view { @@ -42,6 +44,8 @@ class ranges_ref_view { ranges_ref_view(sycl::range &Range) : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} + sycl::detail::NDRDescT toNDRDescT() const; + const size_t *GlobalSize = nullptr; const size_t *LocalSize = nullptr; const size_t *GlobalOffset = nullptr; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 24ed44b219e3a..696476a678c9a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,24 @@ prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) { return detail::createSyclObjFromImpl(EventImpl); } +sycl::detail::NDRDescT ranges_ref_view::toNDRDescT() const { + NDRDescT NDRDesc; + + NDRDesc.Dims = Dims; + for (size_t i = 0; i < Dims; ++i) { + NDRDesc.GlobalSize[i] = GlobalSize[i]; + } + if (LocalSize) + for (size_t i = 0; i < Dims; ++i) { + NDRDesc.LocalSize[i] = LocalSize[i]; + } + if (GlobalOffset) + for (size_t i = 0; i < Dims; ++i) { + NDRDesc.GlobalOffset[i] = GlobalOffset[i]; + } + return NDRDesc; +} + const std::vector & queue_impl::getExtendDependencyList(const std::vector &DepEvents, std::vector &MutableVec, diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp index 37d82511dadff..152b486709ec3 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include +#include #include @@ -21,6 +22,14 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, ASSERT_EQ(r.LocalSize[d], local[d]); ASSERT_EQ(r.GlobalOffset[d], offset[d]); } + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + ASSERT_EQ(NDRDesc.LocalSize[d], local[d]); + ASSERT_EQ(NDRDesc.GlobalOffset[d], offset[d]); + } } { sycl::detail::ranges_ref_view r{global, local}; @@ -30,6 +39,20 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, ASSERT_EQ(r.LocalSize[d], local[d]); } ASSERT_EQ(r.GlobalOffset, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + ASSERT_EQ(NDRDesc.LocalSize[d], local[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + } + for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } } { sycl::detail::ranges_ref_view r{global}; @@ -39,6 +62,19 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, } ASSERT_EQ(r.LocalSize, nullptr); ASSERT_EQ(r.GlobalOffset, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + } + for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } } } From 7a3f2699105352161d44f5bc11e5d322231b66c4 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Fri, 22 Aug 2025 17:32:26 +0200 Subject: [PATCH 11/35] Fix formatting. --- sycl/unittests/scheduler/RangesRefViewUsage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/RangesRefViewUsage.cpp index 152b486709ec3..f28b914e16c11 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/RangesRefViewUsage.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include #include +#include #include From d309440bb1cf67576aa7195548c271824b6df207 Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 27 Aug 2025 12:00:36 +0200 Subject: [PATCH 12/35] Rename sycl::detail::ranges_ref_view to nd_range_view. --- ...{ranges_ref_view.hpp => nd_range_view.hpp} | 24 +++++++++---------- sycl/include/sycl/nd_range.hpp | 4 ++-- sycl/source/detail/queue_impl.cpp | 8 +++---- sycl/unittests/scheduler/CMakeLists.txt | 2 +- ...sRefViewUsage.cpp => NdRangeViewUsage.cpp} | 24 +++++++++---------- 5 files changed, 31 insertions(+), 31 deletions(-) rename sycl/include/sycl/detail/{ranges_ref_view.hpp => nd_range_view.hpp} (64%) rename sycl/unittests/scheduler/{RangesRefViewUsage.cpp => NdRangeViewUsage.cpp} (78%) diff --git a/sycl/include/sycl/detail/ranges_ref_view.hpp b/sycl/include/sycl/detail/nd_range_view.hpp similarity index 64% rename from sycl/include/sycl/detail/ranges_ref_view.hpp rename to sycl/include/sycl/detail/nd_range_view.hpp index 5d6c5eb7a24d5..de3b52c075c38 100644 --- a/sycl/include/sycl/detail/ranges_ref_view.hpp +++ b/sycl/include/sycl/detail/nd_range_view.hpp @@ -1,4 +1,4 @@ -//==---- ranges_ref_view.hpp --- SYCL iteration with reference to ranges ---==// +//==---- nd_range_view.hpp --- SYCL iteration with reference to ranges ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -18,37 +18,37 @@ class NDRDescT; // The structure to keep dimension and references to ranges unified for // all dimensions. -class ranges_ref_view { +class nd_range_view { public: - ranges_ref_view() = default; - ranges_ref_view(const ranges_ref_view &Desc) = default; - ranges_ref_view(ranges_ref_view &&Desc) = default; - ranges_ref_view &operator=(const ranges_ref_view &Desc) = default; - ranges_ref_view &operator=(ranges_ref_view &&Desc) = default; + nd_range_view() = default; + nd_range_view(const nd_range_view &Desc) = default; + nd_range_view(nd_range_view &&Desc) = default; + nd_range_view &operator=(const nd_range_view &Desc) = default; + nd_range_view &operator=(nd_range_view &&Desc) = default; template - ranges_ref_view(sycl::range &GlobalSizes, + nd_range_view(sycl::range &GlobalSizes, sycl::range &LocalSizes) : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), Dims{size_t(Dims_)} {} // to support usage in sycl::ext::oneapi::experimental::submit_with_event() template - ranges_ref_view(sycl::nd_range &ExecutionRange) + nd_range_view(sycl::nd_range &ExecutionRange) : GlobalSize(&ExecutionRange.globalSize[0]), LocalSize(&ExecutionRange.localSize[0]), - GlobalOffset(&ExecutionRange.offset[0]), Dims{size_t(Dims_)} {} + Offset(&ExecutionRange.offset[0]), Dims{size_t(Dims_)} {} template - ranges_ref_view(sycl::range &Range) + nd_range_view(sycl::range &Range) : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} sycl::detail::NDRDescT toNDRDescT() const; const size_t *GlobalSize = nullptr; const size_t *LocalSize = nullptr; - const size_t *GlobalOffset = nullptr; + const size_t *Offset = nullptr; size_t Dims = 0; }; diff --git a/sycl/include/sycl/nd_range.hpp b/sycl/include/sycl/nd_range.hpp index 4ba352910b522..30816b8a4b354 100644 --- a/sycl/include/sycl/nd_range.hpp +++ b/sycl/include/sycl/nd_range.hpp @@ -16,7 +16,7 @@ namespace sycl { inline namespace _V1 { namespace detail { -class ranges_ref_view; +class nd_range_view; } /// Defines the iteration domain of both the work-groups and the overall @@ -70,7 +70,7 @@ template class nd_range { return !(*this == rhs); } - friend class sycl::_V1::detail::ranges_ref_view; + friend class sycl::_V1::detail::nd_range_view; }; } // namespace _V1 diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 2ae4b74d0f369..50f4446c925a9 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -127,7 +127,7 @@ prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) { return detail::createSyclObjFromImpl(EventImpl); } -sycl::detail::NDRDescT ranges_ref_view::toNDRDescT() const { +sycl::detail::NDRDescT nd_range_view::toNDRDescT() const { NDRDescT NDRDesc; NDRDesc.Dims = Dims; @@ -138,9 +138,9 @@ sycl::detail::NDRDescT ranges_ref_view::toNDRDescT() const { for (size_t i = 0; i < Dims; ++i) { NDRDesc.LocalSize[i] = LocalSize[i]; } - if (GlobalOffset) + if (Offset) for (size_t i = 0; i < Dims; ++i) { - NDRDesc.GlobalOffset[i] = GlobalOffset[i]; + NDRDesc.GlobalOffset[i] = Offset[i]; } return NDRDesc; } diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 3d44f1d81a57e..9041793ecdaf2 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -22,5 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT AccessorDefaultCtor.cpp HostTaskAndBarrier.cpp BarrierDependencies.cpp - RangesRefViewUsage.cpp + NdRangeViewUsage.cpp ) diff --git a/sycl/unittests/scheduler/RangesRefViewUsage.cpp b/sycl/unittests/scheduler/NdRangeViewUsage.cpp similarity index 78% rename from sycl/unittests/scheduler/RangesRefViewUsage.cpp rename to sycl/unittests/scheduler/NdRangeViewUsage.cpp index f28b914e16c11..229fad21d68d5 100644 --- a/sycl/unittests/scheduler/RangesRefViewUsage.cpp +++ b/sycl/unittests/scheduler/NdRangeViewUsage.cpp @@ -1,4 +1,4 @@ -//==---- RangesRefViewUsage.cpp --- Check ranges_ref_view ------------------==// +//==---- NdRangeViewUsage.cpp --- Check nd_range_view ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,21 +6,21 @@ // //===----------------------------------------------------------------------===// #include -#include +#include #include template -void TestNDRangesRefView(sycl::range global, sycl::range local, +void TestNdRangeView(sycl::range global, sycl::range local, sycl::id offset) { { sycl::nd_range nd_range{global, local, offset}; - sycl::detail::ranges_ref_view r{nd_range}; + sycl::detail::nd_range_view r{nd_range}; ASSERT_EQ(r.Dims, size_t{dims}); for (int d = 0; d < dims; d++) { ASSERT_EQ(r.GlobalSize[d], global[d]); ASSERT_EQ(r.LocalSize[d], local[d]); - ASSERT_EQ(r.GlobalOffset[d], offset[d]); + ASSERT_EQ(r.Offset[d], offset[d]); } sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); @@ -32,13 +32,13 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, } } { - sycl::detail::ranges_ref_view r{global, local}; + sycl::detail::nd_range_view r{global, local}; ASSERT_EQ(r.Dims, size_t{dims}); for (int d = 0; d < dims; d++) { ASSERT_EQ(r.GlobalSize[d], global[d]); ASSERT_EQ(r.LocalSize[d], local[d]); } - ASSERT_EQ(r.GlobalOffset, nullptr); + ASSERT_EQ(r.Offset, nullptr); sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); ASSERT_EQ(NDRDesc.Dims, size_t{dims}); @@ -55,13 +55,13 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, } } { - sycl::detail::ranges_ref_view r{global}; + sycl::detail::nd_range_view r{global}; ASSERT_EQ(r.Dims, size_t{dims}); for (int d = 0; d < dims; d++) { ASSERT_EQ(r.GlobalSize[d], global[d]); } ASSERT_EQ(r.LocalSize, nullptr); - ASSERT_EQ(r.GlobalOffset, nullptr); + ASSERT_EQ(r.Offset, nullptr); sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); ASSERT_EQ(NDRDesc.Dims, size_t{dims}); @@ -79,10 +79,10 @@ void TestNDRangesRefView(sycl::range global, sycl::range local, } TEST(RangesRefUsage, RangesRefUsage) { - TestNDRangesRefView(sycl::range<1>{1024}, sycl::range<1>{64}, + TestNdRangeView(sycl::range<1>{1024}, sycl::range<1>{64}, sycl::id<1>{10}); - TestNDRangesRefView(sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, + TestNdRangeView(sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, sycl::id<2>{10, 5}); - TestNDRangesRefView(sycl::range<3>{1024, 512, 256}, + TestNdRangeView(sycl::range<3>{1024, 512, 256}, sycl::range<3>{64, 32, 16}, sycl::id<3>{10, 5, 2}); } From c7bc868a91ca1cb172857ff4b74910465a1311bb Mon Sep 17 00:00:00 2001 From: Alexandr Konovalov Date: Wed, 27 Aug 2025 12:10:54 +0200 Subject: [PATCH 13/35] Fix formatting. --- sycl/include/sycl/detail/nd_range_view.hpp | 3 +-- sycl/unittests/scheduler/NdRangeViewUsage.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/detail/nd_range_view.hpp b/sycl/include/sycl/detail/nd_range_view.hpp index de3b52c075c38..35b5a606566a9 100644 --- a/sycl/include/sycl/detail/nd_range_view.hpp +++ b/sycl/include/sycl/detail/nd_range_view.hpp @@ -28,8 +28,7 @@ class nd_range_view { nd_range_view &operator=(nd_range_view &&Desc) = default; template - nd_range_view(sycl::range &GlobalSizes, - sycl::range &LocalSizes) + nd_range_view(sycl::range &GlobalSizes, sycl::range &LocalSizes) : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), Dims{size_t(Dims_)} {} diff --git a/sycl/unittests/scheduler/NdRangeViewUsage.cpp b/sycl/unittests/scheduler/NdRangeViewUsage.cpp index 229fad21d68d5..65bc466ab32eb 100644 --- a/sycl/unittests/scheduler/NdRangeViewUsage.cpp +++ b/sycl/unittests/scheduler/NdRangeViewUsage.cpp @@ -12,7 +12,7 @@ template void TestNdRangeView(sycl::range global, sycl::range local, - sycl::id offset) { + sycl::id offset) { { sycl::nd_range nd_range{global, local, offset}; sycl::detail::nd_range_view r{nd_range}; @@ -79,10 +79,9 @@ void TestNdRangeView(sycl::range global, sycl::range local, } TEST(RangesRefUsage, RangesRefUsage) { - TestNdRangeView(sycl::range<1>{1024}, sycl::range<1>{64}, - sycl::id<1>{10}); + TestNdRangeView(sycl::range<1>{1024}, sycl::range<1>{64}, sycl::id<1>{10}); TestNdRangeView(sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, - sycl::id<2>{10, 5}); - TestNdRangeView(sycl::range<3>{1024, 512, 256}, - sycl::range<3>{64, 32, 16}, sycl::id<3>{10, 5, 2}); + sycl::id<2>{10, 5}); + TestNdRangeView(sycl::range<3>{1024, 512, 256}, sycl::range<3>{64, 32, 16}, + sycl::id<3>{10, 5, 2}); } From a3ae6c030c2d0355fc30653ce1769b42692498ee Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 31 Oct 2025 09:24:37 +0000 Subject: [PATCH 14/35] Update the nd_range_view and NDRDescT conversion --- sycl/include/sycl/detail/nd_range_view.hpp | 29 ++++----- sycl/include/sycl/queue.hpp | 21 +++---- sycl/source/detail/ndrange_desc.hpp | 39 +++++++----- sycl/source/detail/queue_impl.cpp | 22 +++---- sycl/source/detail/queue_impl.hpp | 16 +++-- sycl/source/queue.cpp | 46 ++------------ sycl/unittests/scheduler/NdRangeViewUsage.cpp | 63 ++++++++++++++----- 7 files changed, 116 insertions(+), 120 deletions(-) diff --git a/sycl/include/sycl/detail/nd_range_view.hpp b/sycl/include/sycl/detail/nd_range_view.hpp index 35b5a606566a9..ea7f403f171b3 100644 --- a/sycl/include/sycl/detail/nd_range_view.hpp +++ b/sycl/include/sycl/detail/nd_range_view.hpp @@ -28,27 +28,28 @@ class nd_range_view { nd_range_view &operator=(nd_range_view &&Desc) = default; template - nd_range_view(sycl::range &GlobalSizes, sycl::range &LocalSizes) - : GlobalSize(&(GlobalSizes[0])), LocalSize(&(LocalSizes[0])), - Dims{size_t(Dims_)} {} + nd_range_view(sycl::range &N, bool SetNumWorkGroups = false) + : MGlobalSize(&(N[0])), MSetNumWorkGroups(SetNumWorkGroups), + MDims{size_t(Dims_)} {} - // to support usage in sycl::ext::oneapi::experimental::submit_with_event() template - nd_range_view(sycl::nd_range &ExecutionRange) - : GlobalSize(&ExecutionRange.globalSize[0]), - LocalSize(&ExecutionRange.localSize[0]), - Offset(&ExecutionRange.offset[0]), Dims{size_t(Dims_)} {} + nd_range_view(sycl::range &GlobalSize, sycl::id &Offset) + : MGlobalSize(&(GlobalSize[0])), MOffset(&(Offset[0])), + MDims{size_t(Dims_)} {} template - nd_range_view(sycl::range &Range) - : GlobalSize(&(Range[0])), Dims{size_t(Dims_)} {} + nd_range_view(sycl::nd_range &ExecutionRange) + : MGlobalSize(&(ExecutionRange.globalSize[0])), + MLocalSize(&(ExecutionRange.localSize[0])), + MOffset(&(ExecutionRange.offset[0])), MDims{size_t(Dims_)} {} sycl::detail::NDRDescT toNDRDescT() const; - const size_t *GlobalSize = nullptr; - const size_t *LocalSize = nullptr; - const size_t *Offset = nullptr; - size_t Dims = 0; + const size_t *MGlobalSize = nullptr; + const size_t *MLocalSize = nullptr; + const size_t *MOffset = nullptr; + bool MSetNumWorkGroups = false; + size_t MDims = 0; }; } // namespace detail diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 4a7f1fac789a3..f9291d5283ccb 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -21,6 +21,7 @@ #include // for checkValueRange #include // for is_queue_info_... #include // for KernelInfo +#include // for nd_range_view #include #include // for OwnerLessBase #include // for device @@ -63,16 +64,14 @@ template auto get_native(const SyclObjectT &Obj) -> backend_return_t; -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -162,7 +161,7 @@ template auto submit_kernel_direct( const queue &Queue, [[maybe_unused]] PropertiesT Props, - const nd_range &Range, KernelTypeUniversalRef &&KernelFunc, + detail::nd_range_view RangeView, KernelTypeUniversalRef &&KernelFunc, const detail::code_location &CodeLoc = detail::code_location::current()) { // TODO Properties not supported yet static_assert( @@ -212,11 +211,11 @@ auto submit_kernel_direct( if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } } @@ -224,7 +223,7 @@ auto submit_kernel_direct( template auto submit_kernel_direct_parallel_for( - const queue &Queue, PropertiesT Props, const nd_range &Range, + const queue &Queue, PropertiesT Props, nd_range Range, KernelTypeUniversalRef &&KernelFunc, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -246,8 +245,8 @@ auto submit_kernel_direct_parallel_for( return submit_kernel_direct( - Queue, Props, Range, std::forward(KernelFunc), - CodeLoc); + Queue, Props, detail::nd_range_view(Range), + std::forward(KernelFunc), CodeLoc); } template ( - Queue, Props, nd_range<1>{1, 1}, + Queue, Props, detail::nd_range_view(), std::forward(KernelFunc), CodeLoc); } diff --git a/sycl/source/detail/ndrange_desc.hpp b/sycl/source/detail/ndrange_desc.hpp index 30108d729db31..5426f2e09d43f 100644 --- a/sycl/source/detail/ndrange_desc.hpp +++ b/sycl/source/detail/ndrange_desc.hpp @@ -32,51 +32,62 @@ class NDRDescT { NDRDescT(const NDRDescT &Desc) = default; NDRDescT(NDRDescT &&Desc) = default; - template - NDRDescT(sycl::range N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} { + NDRDescT(const size_t *N, bool SetNumWorkGroups, int DimsVal) + : Dims{size_t(DimsVal)} { if (SetNumWorkGroups) { - for (size_t I = 0; I < Dims_; ++I) { + for (size_t I = 0; I < Dims; ++I) { NumWorkGroups[I] = N[I]; } } else { - for (size_t I = 0; I < Dims_; ++I) { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = N[I]; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { GlobalSize[I] = 1; } } } template - NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, - sycl::id Offset) - : Dims{size_t(Dims_)} { - for (size_t I = 0; I < Dims_; ++I) { + NDRDescT(sycl::range N, bool SetNumWorkGroups) + : NDRDescT(&(N[0]), SetNumWorkGroups, Dims_) {} + + NDRDescT(const size_t *NumWorkItems, const size_t *LocalSizes, + const size_t *Offset, int DimsVal) + : Dims{size_t(DimsVal)} { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = NumWorkItems[I]; LocalSize[I] = LocalSizes[I]; GlobalOffset[I] = Offset[I]; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { LocalSize[I] = LocalSizes[0] ? 1 : 0; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { GlobalSize[I] = 1; } } template - NDRDescT(sycl::range NumWorkItems, sycl::id Offset) - : Dims{size_t(Dims_)} { - for (size_t I = 0; I < Dims_; ++I) { + NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, + sycl::id Offset) + : NDRDescT(&(NumWorkItems[0]), &(LocalSizes[0]), &(Offset[0]), Dims_) {} + + NDRDescT(const size_t *NumWorkItems, const size_t *Offset, int DimsVal) + : Dims{size_t(DimsVal)} { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = NumWorkItems[I]; GlobalOffset[I] = Offset[I]; } } + template + NDRDescT(sycl::range NumWorkItems, sycl::id Offset) + : NDRDescT(&(NumWorkItems[0]), &(Offset[0]), Dims_) {} + template NDRDescT(sycl::nd_range ExecutionRange) : NDRDescT(ExecutionRange.get_global_range(), diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 4091946739d9c..49e98c3c14018 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -127,21 +127,15 @@ prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) { } sycl::detail::NDRDescT nd_range_view::toNDRDescT() const { - NDRDescT NDRDesc; - - NDRDesc.Dims = Dims; - for (size_t i = 0; i < Dims; ++i) { - NDRDesc.GlobalSize[i] = GlobalSize[i]; + if (!MGlobalSize) { + return NDRDescT(nd_range<1>{1, 1}); + } else if (MLocalSize) { + return NDRDescT(MGlobalSize, MLocalSize, MOffset, MDims); + } else if (MOffset) { + return NDRDescT(MGlobalSize, MOffset, MDims); + } else { + return NDRDescT(MGlobalSize, MSetNumWorkGroups, MDims); } - if (LocalSize) - for (size_t i = 0; i < Dims; ++i) { - NDRDesc.LocalSize[i] = LocalSize[i]; - } - if (Offset) - for (size_t i = 0; i < Dims; ++i) { - NDRDesc.GlobalOffset[i] = Offset[i]; - } - return NDRDesc; } const std::vector & diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 49da7aee8c448..d6e3d30f404e9 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -359,24 +359,22 @@ class queue_impl : public std::enable_shared_from_this { return createSyclObjFromImpl(ResEvent); } - template event submit_kernel_direct_with_event( - const nd_range &Range, detail::HostKernelRefBase &HostKernel, + detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - detail::EventImplPtr EventImpl = - submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - true, CodeLoc, IsTopCodeLoc); + detail::EventImplPtr EventImpl = submit_kernel_direct_impl( + RangeView.toNDRDescT(), HostKernel, DeviceKernelInfo, true, CodeLoc, + IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } - template void submit_kernel_direct_without_event( - const nd_range &Range, detail::HostKernelRefBase &HostKernel, + detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - false, CodeLoc, IsTopCodeLoc); + submit_kernel_direct_impl(RangeView.toNDRDescT(), HostKernel, + DeviceKernelInfo, false, CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f34da47852266..fe1778e0b84d3 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -471,62 +471,24 @@ void queue::ext_oneapi_set_external_event(const event &external_event) { const property_list &queue::getPropList() const { return impl->getPropList(); } -template event submit_kernel_direct_with_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + RangeView, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); } -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( - const queue &Queue, const nd_range<1> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( - const queue &Queue, const nd_range<2> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( - const queue &Queue, const nd_range<3> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void submit_kernel_direct_without_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); + RangeView, HostKernel, DeviceKernelInfo, CodeLoc, IsTopCodeLoc); } -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( - const queue &Queue, const nd_range<1> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( - const queue &Queue, const nd_range<2> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( - const queue &Queue, const nd_range<3> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - } // namespace _V1 } // namespace sycl diff --git a/sycl/unittests/scheduler/NdRangeViewUsage.cpp b/sycl/unittests/scheduler/NdRangeViewUsage.cpp index 65bc466ab32eb..dd1e0659cb607 100644 --- a/sycl/unittests/scheduler/NdRangeViewUsage.cpp +++ b/sycl/unittests/scheduler/NdRangeViewUsage.cpp @@ -16,11 +16,11 @@ void TestNdRangeView(sycl::range global, sycl::range local, { sycl::nd_range nd_range{global, local, offset}; sycl::detail::nd_range_view r{nd_range}; - ASSERT_EQ(r.Dims, size_t{dims}); + ASSERT_EQ(r.MDims, size_t{dims}); for (int d = 0; d < dims; d++) { - ASSERT_EQ(r.GlobalSize[d], global[d]); - ASSERT_EQ(r.LocalSize[d], local[d]); - ASSERT_EQ(r.Offset[d], offset[d]); + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MLocalSize[d], local[d]); + ASSERT_EQ(r.MOffset[d], offset[d]); } sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); @@ -30,38 +30,68 @@ void TestNdRangeView(sycl::range global, sycl::range local, ASSERT_EQ(NDRDesc.LocalSize[d], local[d]); ASSERT_EQ(NDRDesc.GlobalOffset[d], offset[d]); } + + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 1UL); + ASSERT_EQ(NDRDesc.LocalSize[d], 1UL); + } } { - sycl::detail::nd_range_view r{global, local}; - ASSERT_EQ(r.Dims, size_t{dims}); + sycl::detail::nd_range_view r{global, offset}; + ASSERT_EQ(r.MDims, size_t{dims}); for (int d = 0; d < dims; d++) { - ASSERT_EQ(r.GlobalSize[d], global[d]); - ASSERT_EQ(r.LocalSize[d], local[d]); + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MOffset[d], offset[d]); } - ASSERT_EQ(r.Offset, nullptr); + ASSERT_EQ(r.MLocalSize, nullptr); sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); ASSERT_EQ(NDRDesc.Dims, size_t{dims}); for (int d = 0; d < dims; d++) { ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); - ASSERT_EQ(NDRDesc.LocalSize[d], local[d]); + ASSERT_EQ(NDRDesc.GlobalOffset[d], offset[d]); } for (int d = dims; d < 3; d++) { ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } + for (int d = 0; d < 3; d++) { ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); } + } + { + sycl::detail::nd_range_view r{global, true}; + ASSERT_EQ(r.MDims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MSetNumWorkGroups, true); + } + ASSERT_EQ(r.MLocalSize, nullptr); + ASSERT_EQ(r.MOffset, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.NumWorkGroups[d], global[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.NumWorkGroups[d], 0UL); + } for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); } } { - sycl::detail::nd_range_view r{global}; - ASSERT_EQ(r.Dims, size_t{dims}); + sycl::detail::nd_range_view r{global, false}; + ASSERT_EQ(r.MDims, size_t{dims}); for (int d = 0; d < dims; d++) { - ASSERT_EQ(r.GlobalSize[d], global[d]); + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MSetNumWorkGroups, false); } - ASSERT_EQ(r.LocalSize, nullptr); - ASSERT_EQ(r.Offset, nullptr); + ASSERT_EQ(r.MLocalSize, nullptr); + ASSERT_EQ(r.MOffset, nullptr); sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); ASSERT_EQ(NDRDesc.Dims, size_t{dims}); @@ -69,10 +99,11 @@ void TestNdRangeView(sycl::range global, sycl::range local, ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); } for (int d = dims; d < 3; d++) { - ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalSize[d], 1UL); } for (int d = 0; d < 3; d++) { ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + ASSERT_EQ(NDRDesc.NumWorkGroups[d], 0UL); ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); } } From 8c970ab1ea1868154c683967ad1a5a558ea4a09b Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 3 Nov 2025 14:15:22 +0000 Subject: [PATCH 15/35] Update Linux symbols --- sycl/test/abi/sycl_symbols_linux.dump | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e9fdf8dd5215e..2c9b6e68221db 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,8 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoERKNS4_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoERKNS4_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv From 8a434a1a0302b9089afc91f295ddfa7788a2875f Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 3 Nov 2025 14:26:05 +0000 Subject: [PATCH 16/35] Update the include deps tests --- sycl/test/include_deps/sycl_detail_core.hpp.cpp | 1 + sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp | 1 + sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp | 1 + sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp | 1 + 4 files changed, 4 insertions(+) diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index ed1175ba9f57b..45bf678ebffc3 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -98,6 +98,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: device.hpp // CHECK-NEXT: detail/string_view.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index b6c5ac3144887..e9790e74c75df 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -102,6 +102,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: device.hpp // CHECK-NEXT: detail/string_view.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index 50abdf954cca0..65e5d95389e72 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -180,6 +180,7 @@ // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: queue.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp // CHECK-NEXT: sycl_span.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 457978c1753c7..46968bb10d418 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -122,6 +122,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: event.hpp // CHECK-NEXT: exception_list.hpp From fa35c99b59f850bf05620c3e302e79c440ea4551 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 3 Nov 2025 14:46:30 +0000 Subject: [PATCH 17/35] Add nd_range_view layout tests --- sycl/test/abi/layout_array.cpp | 13 +++++++++++++ sycl/test/abi/symbol_size_alignment.cpp | 2 ++ 2 files changed, 15 insertions(+) diff --git a/sycl/test/abi/layout_array.cpp b/sycl/test/abi/layout_array.cpp index db0236091a46c..9ab0970a14570 100644 --- a/sycl/test/abi/layout_array.cpp +++ b/sycl/test/abi/layout_array.cpp @@ -8,6 +8,7 @@ #include #include #include +#include SYCL_EXTERNAL void id(sycl::id<2>) {} @@ -68,3 +69,15 @@ SYCL_EXTERNAL void nd_range(sycl::nd_range<2>) {} // CHECK-NEXT: 32 | size_t[2] common_array // CHECK-NEXT: | [sizeof=48, dsize=48, align=8, // CHECK-NEXT: | nvsize=48, nvalign=8] + +//---------------------------- + +SYCL_EXTERNAL void nd_range_view(sycl::detail::nd_range_view) {} +// CHECK: 0 | class sycl::detail::nd_range_view +// CHECK-NEXT: 0 | const size_t * MGlobalSize +// CHECK-NEXT: 8 | const size_t * MLocalSize +// CHECK-NEXT: 16 | const size_t * MOffset +// CHECK-NEXT: 24 | _Bool MSetNumWorkGroups +// CHECK-NEXT: 32 | size_t MDims +// CHECK-NEXT: | [sizeof=40, dsize=40, align=8, +// CHECK-NEXT: | nvsize=40, nvalign=8] diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index 63286da1b9786..9e83584d0d99d 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -19,6 +19,7 @@ #include #include #include +#include using namespace sycl; @@ -58,6 +59,7 @@ int main() { #endif check, 16, 8>(); check(); + check(); check(); #ifdef __SYCL_DEVICE_ONLY__ check, 4, 4>(); From b4184b42d72b7fa5b2ed31b4572b788d06c2020e Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 3 Nov 2025 15:44:21 +0000 Subject: [PATCH 18/35] Move the nd_range_view layout test to a separate file --- sycl/test/abi/layout_array.cpp | 13 ------------- sycl/test/abi/layout_nd_range_view.cpp | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) create mode 100644 sycl/test/abi/layout_nd_range_view.cpp diff --git a/sycl/test/abi/layout_array.cpp b/sycl/test/abi/layout_array.cpp index 9ab0970a14570..db0236091a46c 100644 --- a/sycl/test/abi/layout_array.cpp +++ b/sycl/test/abi/layout_array.cpp @@ -8,7 +8,6 @@ #include #include #include -#include SYCL_EXTERNAL void id(sycl::id<2>) {} @@ -69,15 +68,3 @@ SYCL_EXTERNAL void nd_range(sycl::nd_range<2>) {} // CHECK-NEXT: 32 | size_t[2] common_array // CHECK-NEXT: | [sizeof=48, dsize=48, align=8, // CHECK-NEXT: | nvsize=48, nvalign=8] - -//---------------------------- - -SYCL_EXTERNAL void nd_range_view(sycl::detail::nd_range_view) {} -// CHECK: 0 | class sycl::detail::nd_range_view -// CHECK-NEXT: 0 | const size_t * MGlobalSize -// CHECK-NEXT: 8 | const size_t * MLocalSize -// CHECK-NEXT: 16 | const size_t * MOffset -// CHECK-NEXT: 24 | _Bool MSetNumWorkGroups -// CHECK-NEXT: 32 | size_t MDims -// CHECK-NEXT: | [sizeof=40, dsize=40, align=8, -// CHECK-NEXT: | nvsize=40, nvalign=8] diff --git a/sycl/test/abi/layout_nd_range_view.cpp b/sycl/test/abi/layout_nd_range_view.cpp new file mode 100644 index 0000000000000..2a1d0693dab26 --- /dev/null +++ b/sycl/test/abi/layout_nd_range_view.cpp @@ -0,0 +1,19 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// REQUIRES: linux +// UNSUPPORTED: libcxx + +// clang-format off + +#include + + +SYCL_EXTERNAL void nd_range_view(sycl::detail::nd_range_view) {} +// CHECK: 0 | class sycl::detail::nd_range_view +// CHECK-NEXT: 0 | const size_t * MGlobalSize +// CHECK-NEXT: 8 | const size_t * MLocalSize +// CHECK-NEXT: 16 | const size_t * MOffset +// CHECK-NEXT: 24 | _Bool MSetNumWorkGroups +// CHECK-NEXT: 32 | size_t MDims +// CHECK-NEXT: | [sizeof=40, dsize=40, align=8, +// CHECK-NEXT: | nvsize=40, nvalign=8] From fa083a4de449a94064545a5681903808e84c3aac Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 4 Nov 2025 12:09:16 +0000 Subject: [PATCH 19/35] Update Linux symbols --- sycl/test/abi/sycl_symbols_linux.dump | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 5e58ac99474d3..88bcd89824fe4 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,8 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSC_5intel12experimental12cache_configENSE_17use_root_sync_keyENSE_23work_group_progress_keyENSE_22sub_group_progress_keyENSE_22work_item_progress_keyENSE_4cuda12cluster_sizeILi1EEENSO_ILi2EEENSO_ILi3EEEEEERKNS4_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSC_5intel12experimental12cache_configENSE_17use_root_sync_keyENSE_23work_group_progress_keyENSE_22sub_group_progress_keyENSE_22work_item_progress_keyENSE_4cuda12cluster_sizeILi1EEENSO_ILi2EEENSO_ILi3EEEEEERKNS4_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv From 8227e9ada4689ce7ae8c0c73bf58ea0d1d7c89c7 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 4 Nov 2025 12:23:53 +0000 Subject: [PATCH 20/35] Update Windows symbols --- sycl/test/abi/sycl_symbols_windows.dump | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f7e0f0f062223..ce41ca10da997 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,6 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4484,6 +4478,8 @@ ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z +?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z +?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z From df435a039aea306e13546b3420f6de693ed6622e Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 4 Nov 2025 12:28:23 +0000 Subject: [PATCH 21/35] Fix formatting --- sycl/test/abi/symbol_size_alignment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index 9e83584d0d99d..36bf933ce6b28 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -19,7 +20,6 @@ #include #include #include -#include using namespace sycl; From 7677e9802ca2002b783c910a64d6d914d7d2c154 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 6 Nov 2025 08:54:22 +0000 Subject: [PATCH 22/35] Update Linux symbols --- sycl/test/abi/sycl_symbols_linux.dump | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e42759431d374..f7a32763da179 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,8 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv From 4404ab1fd477bfe9f73fb15c42765ca4275a6bf8 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 6 Nov 2025 09:15:24 +0000 Subject: [PATCH 23/35] Update Windows symbols --- sycl/test/abi/sycl_symbols_windows.dump | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 033a2e1a83be1..49387922d3fbc 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,6 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4484,8 +4478,8 @@ ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z -?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z -?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z +?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z +?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z From c04aa1f3ea2cded7ad6e376f182c4e3078e7e68b Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 6 Nov 2025 09:16:16 +0000 Subject: [PATCH 24/35] Remove include comment --- sycl/include/sycl/queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 48152884b8504..b220f33aafa22 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -21,7 +21,7 @@ #include // for checkValueRange #include // for is_queue_info_... #include // for KernelInfo -#include // for nd_range_view +#include #include #include // for OwnerLessBase #include // for device From eea6565fdb9bdf5bb7797f9d4b70136a31248dfb Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 12 Nov 2025 15:27:52 +0000 Subject: [PATCH 25/35] Pass nd_range_view as const & --- sycl/include/sycl/queue.hpp | 4 ++-- sycl/source/detail/queue_impl.hpp | 6 ++++-- sycl/source/queue.cpp | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index b220f33aafa22..118ec968a963c 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -66,7 +66,7 @@ auto get_native(const SyclObjectT &Obj) -> backend_return_t; event __SYCL_EXPORT submit_kernel_direct_with_event_impl( - const queue &Queue, detail::nd_range_view RangeView, + const queue &Queue, const detail::nd_range_view &RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, @@ -74,7 +74,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const detail::code_location &CodeLoc, bool IsTopCodeLoc); void __SYCL_EXPORT submit_kernel_direct_without_event_impl( - const queue &Queue, detail::nd_range_view RangeView, + const queue &Queue, const detail::nd_range_view &RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index b44a5bbfc3ea7..87667c4ff1ef1 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -361,7 +361,8 @@ class queue_impl : public std::enable_shared_from_this { } event submit_kernel_direct_with_event( - detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, + const detail::nd_range_view &RangeView, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, @@ -373,7 +374,8 @@ class queue_impl : public std::enable_shared_from_this { } void submit_kernel_direct_without_event( - detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, + const detail::nd_range_view &RangeView, + detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index e855cf37ef1eb..f5858217d23e7 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -464,7 +464,7 @@ void queue::ext_oneapi_set_external_event(const event &external_event) { const property_list &queue::getPropList() const { return impl->getPropList(); } event submit_kernel_direct_with_event_impl( - const queue &Queue, detail::nd_range_view RangeView, + const queue &Queue, const detail::nd_range_view &RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, @@ -476,7 +476,7 @@ event submit_kernel_direct_with_event_impl( } void submit_kernel_direct_without_event_impl( - const queue &Queue, detail::nd_range_view RangeView, + const queue &Queue, const detail::nd_range_view &RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, From e91203967c95a441400df7a36328879262bacb2a Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 12 Nov 2025 16:10:25 +0000 Subject: [PATCH 26/35] Update Linux symbols --- sycl/test/abi/sycl_symbols_linux.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f7a32763da179..12ca5b7b5a93f 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,8 +2985,8 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueERKNS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSI_5intel12experimental12cache_configENSK_17use_root_sync_keyENSK_23work_group_progress_keyENSK_22sub_group_progress_keyENSK_22work_item_progress_keyENSK_4cuda12cluster_sizeILi1EEENSU_ILi2EEENSU_ILi3EEEEEERKNS4_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueERKNS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSI_5intel12experimental12cache_configENSK_17use_root_sync_keyENSK_23work_group_progress_keyENSK_22sub_group_progress_keyENSK_22work_item_progress_keyENSK_4cuda12cluster_sizeILi1EEENSU_ILi2EEENSU_ILi3EEEEEERKNS4_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv From 34e875afb42990134122c574ee02226301b87fe8 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 13 Nov 2025 08:37:47 +0000 Subject: [PATCH 27/35] Update Windows symbols --- sycl/test/abi/sycl_symbols_windows.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 49387922d3fbc..0c071dd4d33c6 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4478,8 +4478,8 @@ ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z -?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z -?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z +?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@AEBVnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z +?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@AEBVnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z From bdddb77bcc61e81a79ab804b7ce413f5663b013c Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Thu, 13 Nov 2025 11:46:46 +0000 Subject: [PATCH 28/35] Support for range-based handler-less kernel submission --- sycl/include/sycl/queue.hpp | 209 +++++++++++++++++++++++---- sycl/include/sycl/range_rounding.hpp | 159 ++++++++++++++++++++ sycl/source/queue.cpp | 31 ++++ 3 files changed, 372 insertions(+), 27 deletions(-) create mode 100644 sycl/include/sycl/range_rounding.hpp diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 118ec968a963c..3e10835e116c0 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -41,6 +41,7 @@ #include // for nd_range #include // for property_list #include // for range +#include #include // for sycl::span #include // for size_t @@ -161,8 +162,8 @@ class __SYCL_EXPORT SubmissionInfo { } // namespace v1 -template auto submit_kernel_direct( @@ -176,10 +177,7 @@ auto submit_kernel_direct( using KernelType = std::remove_const_t>; - using NameT = - typename detail::get_kernel_name_t::name; - - detail::KernelWrapper::wrap(KernelFunc); HostKernelRef @@ -194,8 +192,8 @@ auto submit_kernel_direct( #endif detail::DeviceKernelInfo *DeviceKernelInfoPtr = - &detail::getDeviceKernelInfo(); - constexpr auto Info = detail::CompileTimeKernelInfo; + &detail::getDeviceKernelInfo(); + constexpr auto Info = detail::CompileTimeKernelInfo; assert(Info.Name != std::string_view{} && "Kernel must have a name!"); @@ -254,6 +252,8 @@ auto submit_kernel_direct_parallel_for( using KernelType = std::remove_const_t>; + using NameT = + typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; @@ -268,13 +268,116 @@ auto submit_kernel_direct_parallel_for( #endif return submit_kernel_direct( Queue, detail::nd_range_view(Range), std::forward(KernelFunc), DepEvents, Props, CodeLoc); } +template struct TransformUserItemType { + using type = std::conditional_t< + std::is_convertible_v, LambdaArgType>, nd_item, + std::conditional_t, LambdaArgType>, + item, LambdaArgType>>; +}; + +template +auto submit_kernel_direct_parallel_for( + const queue &Queue, range Range, KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents = {}, + const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, + const detail::code_location &CodeLoc = detail::code_location::current()) { + +#ifndef __SYCL_DEVICE_ONLY__ + if (!range_size_fits_in_size_t(Range)) + throw sycl::exception(make_error_code(errc::runtime), + "The total number of work-items in " + "a range must fit within size_t"); +#endif + + using KernelType = + std::remove_const_t>; + using NameT = + typename detail::get_kernel_name_t::name; + using LambdaArgType = sycl::detail::lambda_arg_type>; + + // If 1D kernel argument is an integral type, convert it to sycl::item<1> + // If user type is convertible from sycl::item/sycl::nd_item, use + // sycl::item/sycl::nd_item to transport item information + using TransformedArgType = std::conditional_t< + std::is_integral::value && Dims == 1, item, + typename TransformUserItemType::type>; + + static_assert(!std::is_same_v>, + "Kernel argument cannot have a sycl::nd_item type in " + "sycl::parallel_for with sycl::range"); + + static_assert(std::is_convertible_v, LambdaArgType> || + std::is_convertible_v, LambdaArgType>, + "sycl::parallel_for(sycl::range) kernel must have the " + "first argument of sycl::item type, or of a type which is " + "implicitly convertible from sycl::item"); + + using RefLambdaArgType = std::add_lvalue_reference_t; + static_assert( + (std::is_invocable_v), + "SYCL kernel lambda/functor has an unexpected signature, it should be " + "invocable with sycl::item and optionally sycl::kernel_handler"); + + // Range rounding can be disabled by the user. + // Range rounding is supported only for newer SYCL standards. +#if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \ + SYCL_LANGUAGE_VERSION >= 202012L + auto [RoundedRange, HasRoundedRange] = + detail::getRoundedRange(Range, get_device()); + if (HasRoundedRange) { + using NameWT = typename detail::get_kernel_wrapper_name_t::name; + auto Wrapper = + getRangeRoundedKernelLambda( + KernelFunc, Range); + + using KTypeWrapper = decltype(Wrapper); + using KName = std::conditional_t::value, + KTypeWrapper, NameWT>; +#ifndef __SYCL_DEVICE_ONLY__ + // We are executing over the rounded range, but there are still + // items/ids that are are constructed in ther range rounded + // kernel use items/ids in the user range, which means that + // __SYCL_ASSUME_INT can still be violated. So check the bounds + // of the user range, instead of the rounded range. + detail::checkValueRange(Range); +#endif + return submit_kernel_direct( + Queue, detail::nd_range_view(Range), std::move(Wrapper), DepEvents, + Props, CodeLoc); + } else +#endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ && + // SYCL_LANGUAGE_VERSION >= 202012L + { +#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ +#ifndef __SYCL_DEVICE_ONLY__ + detail::checkValueRange(Range); +#endif + return submit_kernel_direct( + Queue, detail::nd_range_view(Range), + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); + +#else + (void)Range; + (void)Props; + (void)KernelFunc; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + } +} + template @@ -284,7 +387,12 @@ auto submit_kernel_direct_single_task( const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { - return submit_kernel_direct>; + using NameT = + typename detail::get_kernel_name_t::name; + + return submit_kernel_direct( Queue, detail::nd_range_view(), @@ -3984,11 +4092,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + using LambdaArgType = sycl::detail::lambda_arg_type>; + using TransformedArgType = std::conditional_t< + std::is_integral::value && Dims == 1, item, + typename detail::TransformUserItemType::type>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, TransformedArgType>::value)) { + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., {}, Properties, TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); + } } /// parallel_for_impl with a kernel represented as a lambda + range that @@ -4018,12 +4141,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + using LambdaArgType = sycl::detail::lambda_arg_type>; + using TransformedArgType = std::conditional_t< + std::is_integral::value && Dims == 1, item, + typename detail::TransformUserItemType::type>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, TransformedArgType>::value)) { + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., sycl::span(&DepEvent, 1), + Properties, TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); + } } /// parallel_for_impl with a kernel represented as a lambda + range that @@ -4055,12 +4194,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { PropertiesT Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Properties, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + using LambdaArgType = sycl::detail::lambda_arg_type>; + using TransformedArgType = std::conditional_t< + std::is_integral::value && Dims == 1, item, + typename detail::TransformUserItemType::type>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, TransformedArgType>::value)) { + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., DepEvents, Properties, + TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.template parallel_for(Range, Properties, Rest...); + }, + TlsCodeLocCapture.query()); + } } /// parallel_for_impl version with a kernel represented as a lambda + range diff --git a/sycl/include/sycl/range_rounding.hpp b/sycl/include/sycl/range_rounding.hpp new file mode 100644 index 0000000000000..6865dcfc6c646 --- /dev/null +++ b/sycl/include/sycl/range_rounding.hpp @@ -0,0 +1,159 @@ +//==----------- range_rounding.hpp --- SYCL range rounding utils -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include + +#include // for size_t + +namespace sycl { +inline namespace _V1 { + +namespace detail { + +void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, + size_t &MinRange); + +std::tuple, bool> +getMaxWorkGroups_v2(const device &Device); + +bool DisableRangeRounding(); + +bool RangeRoundingTrace(); + +template +std::tuple, bool> getRoundedRange(range UserRange, + const device &Device) { + range RoundedRange = UserRange; + // Disable the rounding-up optimizations under these conditions: + // 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set. + // 2. The kernel is provided via an interoperability method (this uses a + // different code path). + // 3. The range is already a multiple of the rounding factor. + // + // Cases 2 and 3 could be supported with extra effort. + // As an optimization for the common case it is an + // implementation choice to not support those scenarios. + // Note that "this_item" is a free function, i.e. not tied to any + // specific id or item. When concurrent parallel_fors are executing + // on a device it is difficult to tell which parallel_for the call is + // being made from. One could replicate portions of the + // call-graph to make this_item calls kernel-specific but this is + // not considered worthwhile. + + // Perform range rounding if rounding-up is enabled. + if (DisableRangeRounding()) + return {range{}, false}; + + // Range should be a multiple of this for reasonable performance. + size_t MinFactorX = 16; + // Range should be a multiple of this for improved performance. + size_t GoodFactor = 32; + // Range should be at least this to make rounding worthwhile. + size_t MinRangeX = 1024; + + // Check if rounding parameters have been set through environment: + // SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=MinRound:PreferredRound:MinRange + GetRangeRoundingSettings(MinFactorX, GoodFactor, MinRangeX); + + // In SYCL, each dimension of a global range size is specified by + // a size_t, which can be up to 64 bits. All backends should be + // able to accept a kernel launch with a 32-bit global range size + // (i.e. do not throw an error). The OpenCL CPU backend will + // accept every 64-bit global range, but the GPU backends will not + // generally accept every 64-bit global range. So, when we get a + // non-32-bit global range, we wrap the old kernel in a new kernel + // that has each work item peform multiple invocations the old + // kernel in a 32-bit global range. + id MaxNWGs = [&] { + auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2(Device); + if (!HasMaxWGs) { + id Default; + for (int i = 0; i < Dims; ++i) + Default[i] = (std::numeric_limits::max)(); + return Default; + } + + id IdResult; + size_t Limit = (std::numeric_limits::max)(); + for (int i = 0; i < Dims; ++i) + IdResult[i] = (std::min)(Limit, MaxWGs[Dims - i - 1]); + return IdResult; + }(); + auto M = (std::numeric_limits::max)(); + range MaxRange; + for (int i = 0; i < Dims; ++i) { + auto DesiredSize = MaxNWGs[i] * GoodFactor; + MaxRange[i] = + DesiredSize <= M ? DesiredSize : (M / GoodFactor) * GoodFactor; + } + + bool DidAdjust = false; + auto Adjust = [&](int Dim, size_t Value) { + if (RangeRoundingTrace()) + std::cout << "parallel_for range adjusted at dim " << Dim << " from " + << RoundedRange[Dim] << " to " << Value << std::endl; + RoundedRange[Dim] = Value; + DidAdjust = true; + }; + +#ifdef __SYCL_EXP_PARALLEL_FOR_RANGE_ROUNDING__ + size_t GoodExpFactor = 1; + switch (Dims) { + case 1: + GoodExpFactor = 32; // Make global range multiple of {32} + break; + case 2: + GoodExpFactor = 16; // Make global range multiple of {16, 16} + break; + case 3: + GoodExpFactor = 8; // Make global range multiple of {8, 8, 8} + break; + } + + // Check if rounding parameters have been set through environment: + // SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=MinRound:PreferredRound:MinRange + GetRangeRoundingSettings(MinFactorX, GoodExpFactor, MinRangeX); + + for (auto i = 0; i < Dims; ++i) + if (UserRange[i] % GoodExpFactor) { + Adjust(i, ((UserRange[i] / GoodExpFactor) + 1) * GoodExpFactor); + } +#else + // Perform range rounding if there are sufficient work-items to + // need rounding and the user-specified range is not a multiple of + // a "good" value. + if (RoundedRange[0] % MinFactorX != 0 && RoundedRange[0] >= MinRangeX) { + // It is sufficient to round up just the first dimension. + // Multiplying the rounded-up value of the first dimension + // by the values of the remaining dimensions (if any) + // will yield a rounded-up value for the total range. + Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor); + } +#endif // __SYCL_EXP_PARALLEL_FOR_RANGE_ROUNDING__ +#ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // If we are forcing range rounding kernels to be used, we always want the + // rounded range kernel to be generated, even if rounding isn't needed + DidAdjust = true; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + + for (int i = 0; i < Dims; ++i) + if (RoundedRange[i] > MaxRange[i]) + Adjust(i, MaxRange[i]); + + if (!DidAdjust) + return {range{}, false}; + return {RoundedRange, true}; +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl \ No newline at end of file diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index f5858217d23e7..34e5179cdfd2e 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -58,6 +58,37 @@ const ext::oneapi::experimental::event_mode_enum & SubmissionInfo::EventMode() const { return impl->MEventMode; } + +void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, + size_t &MinRange) { + SYCLConfig::GetSettings( + MinFactor, GoodFactor, MinRange); +} + +std::tuple, bool> +getMaxWorkGroups_v2(const device &Device) { + std::array UrResult = {}; + auto &DeviceImpl = getSyclObjImpl(Device); + + auto Ret = DeviceImpl->getAdapter().call_nocheck( + DeviceImpl->getHandleRef(), + UrInfoCode< + ext::oneapi::experimental::info::device::max_work_groups<3>>::value, + sizeof(UrResult), &UrResult, nullptr); + if (Ret == UR_RESULT_SUCCESS) { + return {UrResult, true}; + } + return {std::array{0, 0, 0}, false}; +} + +bool DisableRangeRounding() { + return SYCLConfig::get(); +} + +bool RangeRoundingTrace() { + return SYCLConfig::get(); +} + } // namespace detail #endif // __INTEL_PREVIEW_BREAKING_CHANGES From 3df079800c4cacb28ae3fbde1d0e35a5215b80bc Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 14 Nov 2025 14:09:20 +0000 Subject: [PATCH 29/35] Support for range-based handler-less kernel submission - continue --- sycl/include/sycl/handler.hpp | 40 +- sycl/include/sycl/queue.hpp | 453 ++++++++++-------- sycl/include/sycl/range_rounding.hpp | 1 + .../include_deps/sycl_detail_core.hpp.cpp | 1 + .../sycl_khr_includes_queue.hpp.cpp | 1 + .../sycl_khr_includes_reduction.hpp.cpp | 1 + .../sycl_khr_includes_usm.hpp.cpp | 1 + 7 files changed, 270 insertions(+), 228 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index d8d46d2a27814..6b2c4f65c7a85 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -362,6 +362,24 @@ class RoundedRangeKernelWithKH { } }; +template ::value> * = nullptr> +auto getRangeRoundedKernelLambda(KernelType KernelFunc, range UserRange) { + return detail::RoundedRangeKernelWithKH{ + UserRange, KernelFunc}; +} + +template ::value> * = nullptr> +auto getRangeRoundedKernelLambda(KernelType KernelFunc, range UserRange) { + return detail::RoundedRangeKernel{ + UserRange, KernelFunc}; +} + using std::enable_if_t; using sycl::detail::queue_impl; @@ -1218,7 +1236,7 @@ class __SYCL_EXPORT handler { if (HasRoundedRange) { using NameWT = typename detail::get_kernel_wrapper_name_t::name; auto Wrapper = - getRangeRoundedKernelLambda( + detail::getRangeRoundedKernelLambda( KernelFunc, UserRange); using KName = std::conditional_t::value, @@ -3265,26 +3283,6 @@ class __SYCL_EXPORT handler { void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, size_t &MinRange); - template ::value> * = nullptr> - auto getRangeRoundedKernelLambda(KernelType KernelFunc, - range UserRange) { - return detail::RoundedRangeKernelWithKH{UserRange, KernelFunc}; - } - - template ::value> * = nullptr> - auto getRangeRoundedKernelLambda(KernelType KernelFunc, - range UserRange) { - return detail::RoundedRangeKernel{ - UserRange, KernelFunc}; - } - #ifndef __INTEL_PREVIEW_BREAKING_CHANGES const std::shared_ptr &getContextImplPtr() const; #endif diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 3e10835e116c0..070880f48682a 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -162,6 +162,13 @@ class __SYCL_EXPORT SubmissionInfo { } // namespace v1 +template struct TransformUserItemType { + using type = std::conditional_t< + std::is_convertible_v, LambdaArgType>, nd_item, + std::conditional_t, LambdaArgType>, + item, LambdaArgType>>; +}; + template DepEvents, const PropertiesT &ExtraProps = ext::oneapi::experimental::empty_properties_t{}, - const detail::code_location &CodeLoc = detail::code_location::current()) { - detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - - using KernelType = - std::remove_const_t>; - - detail::KernelWrapper::wrap(KernelFunc); - - HostKernelRef - HostKernel(std::forward(KernelFunc)); - - // Instantiating the kernel on the host improves debugging. - // Passing this pointer to another translation unit prevents optimization. -#ifndef NDEBUG - // TODO: call library to prevent dropping call due to optimization. - (void) - detail::GetInstantiateKernelOnHostPtr(); -#endif - - detail::DeviceKernelInfo *DeviceKernelInfoPtr = - &detail::getDeviceKernelInfo(); - constexpr auto Info = detail::CompileTimeKernelInfo; - - assert(Info.Name != std::string_view{} && "Kernel must have a name!"); - - static_assert( - Info.Name == std::string_view{} || sizeof(KernelType) == Info.KernelSize, - "Unexpected kernel lambda size. This can be caused by an " - "external host compiler producing a lambda with an " - "unexpected layout. This is a limitation of the compiler." - "In many cases the difference is related to capturing constexpr " - "variables. In such cases removing constexpr specifier aligns the " - "captures between the host compiler and the device compiler." - "\n" - "In case of MSVC, passing " - "-fsycl-host-compiler-options='/std:c++latest' " - "might also help."); - - detail::KernelPropertyHolderStructTy ParsedProperties; - if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< - const KernelType &>::value) { - // Merge properties via get() and manually specified properties. - // get() method is used for specifying kernel properties but properties - // passed via launch_config (ExtraProps) should be kernel launch properties. - // They are mutually exclusive, so there should not be any conflict when - // merging properties. merge_properties() throws if there's a conflict. - auto MergedProps = - sycl::ext::oneapi::experimental::detail::merge_properties( - ExtraProps, - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); - - ParsedProperties = extractKernelProperties(MergedProps); - } else { - ParsedProperties = extractKernelProperties(ExtraProps); - } - - if constexpr (EventNeeded) { - return submit_kernel_direct_with_event_impl( - Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, - ParsedProperties, TlsCodeLocCapture.query(), - TlsCodeLocCapture.isToplevel()); - } else { - submit_kernel_direct_without_event_impl( - Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, - ParsedProperties, TlsCodeLocCapture.query(), - TlsCodeLocCapture.isToplevel()); - } -} + const detail::code_location &CodeLoc = detail::code_location::current()); template Range, KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, - const detail::code_location &CodeLoc = detail::code_location::current()) { - - using KernelType = - std::remove_const_t>; - using NameT = - typename detail::get_kernel_name_t::name; - - using LambdaArgType = - sycl::detail::lambda_arg_type>; - static_assert( - std::is_convertible_v, LambdaArgType>, - "Kernel argument of a sycl::parallel_for with sycl::nd_range " - "must be either sycl::nd_item or be convertible from sycl::nd_item"); - using TransformedArgType = sycl::nd_item; - -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(Range); -#endif - - return submit_kernel_direct( - Queue, detail::nd_range_view(Range), - std::forward(KernelFunc), DepEvents, Props, - CodeLoc); -} - -template struct TransformUserItemType { - using type = std::conditional_t< - std::is_convertible_v, LambdaArgType>, nd_item, - std::conditional_t, LambdaArgType>, - item, LambdaArgType>>; -}; + const detail::code_location &CodeLoc = detail::code_location::current()); template Range, KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, - const detail::code_location &CodeLoc = detail::code_location::current()) { - -#ifndef __SYCL_DEVICE_ONLY__ - if (!range_size_fits_in_size_t(Range)) - throw sycl::exception(make_error_code(errc::runtime), - "The total number of work-items in " - "a range must fit within size_t"); -#endif - - using KernelType = - std::remove_const_t>; - using NameT = - typename detail::get_kernel_name_t::name; - using LambdaArgType = sycl::detail::lambda_arg_type>; - - // If 1D kernel argument is an integral type, convert it to sycl::item<1> - // If user type is convertible from sycl::item/sycl::nd_item, use - // sycl::item/sycl::nd_item to transport item information - using TransformedArgType = std::conditional_t< - std::is_integral::value && Dims == 1, item, - typename TransformUserItemType::type>; - - static_assert(!std::is_same_v>, - "Kernel argument cannot have a sycl::nd_item type in " - "sycl::parallel_for with sycl::range"); - - static_assert(std::is_convertible_v, LambdaArgType> || - std::is_convertible_v, LambdaArgType>, - "sycl::parallel_for(sycl::range) kernel must have the " - "first argument of sycl::item type, or of a type which is " - "implicitly convertible from sycl::item"); - - using RefLambdaArgType = std::add_lvalue_reference_t; - static_assert( - (std::is_invocable_v), - "SYCL kernel lambda/functor has an unexpected signature, it should be " - "invocable with sycl::item and optionally sycl::kernel_handler"); - - // Range rounding can be disabled by the user. - // Range rounding is supported only for newer SYCL standards. -#if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \ - SYCL_LANGUAGE_VERSION >= 202012L - auto [RoundedRange, HasRoundedRange] = - detail::getRoundedRange(Range, get_device()); - if (HasRoundedRange) { - using NameWT = typename detail::get_kernel_wrapper_name_t::name; - auto Wrapper = - getRangeRoundedKernelLambda( - KernelFunc, Range); - - using KTypeWrapper = decltype(Wrapper); - using KName = std::conditional_t::value, - KTypeWrapper, NameWT>; -#ifndef __SYCL_DEVICE_ONLY__ - // We are executing over the rounded range, but there are still - // items/ids that are are constructed in ther range rounded - // kernel use items/ids in the user range, which means that - // __SYCL_ASSUME_INT can still be violated. So check the bounds - // of the user range, instead of the rounded range. - detail::checkValueRange(Range); -#endif - return submit_kernel_direct( - Queue, detail::nd_range_view(Range), std::move(Wrapper), DepEvents, - Props, CodeLoc); - } else -#endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ && - // SYCL_LANGUAGE_VERSION >= 202012L - { -#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ -#ifndef __SYCL_DEVICE_ONLY__ - detail::checkValueRange(Range); -#endif - return submit_kernel_direct( - Queue, detail::nd_range_view(Range), - std::forward(KernelFunc), DepEvents, Props, - CodeLoc); - -#else - (void)Range; - (void)Props; - (void)KernelFunc; -#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ - } -} + const detail::code_location &CodeLoc = detail::code_location::current()); template DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, - const detail::code_location &CodeLoc = detail::code_location::current()) { - - using KernelType = - std::remove_const_t>; - using NameT = - typename detail::get_kernel_name_t::name; - - return submit_kernel_direct( - Queue, detail::nd_range_view(), - std::forward(KernelFunc), DepEvents, Props, - CodeLoc); -} + const detail::code_location &CodeLoc = detail::code_location::current()); } // namespace detail @@ -4092,7 +3899,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - using KernelType = std::tuple_element_t<0, std::tuple>; + using KernelType = std::remove_const_t< + std::remove_reference_t>>>; using LambdaArgType = sycl::detail::lambda_arg_type>; using TransformedArgType = std::conditional_t< std::is_integral::value && Dims == 1, item, @@ -4141,7 +3949,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - using KernelType = std::tuple_element_t<0, std::tuple>; + using KernelType = std::remove_const_t< + std::remove_reference_t>>>; using LambdaArgType = sycl::detail::lambda_arg_type>; using TransformedArgType = std::conditional_t< std::is_integral::value && Dims == 1, item, @@ -4194,7 +4003,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { PropertiesT Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - using KernelType = std::tuple_element_t<0, std::tuple>; + using KernelType = std::remove_const_t< + std::remove_reference_t>>>; using LambdaArgType = sycl::detail::lambda_arg_type>; using TransformedArgType = std::conditional_t< std::is_integral::value && Dims == 1, item, @@ -4257,6 +4067,235 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { } }; +namespace detail { + +template +auto submit_kernel_direct(const queue &Queue, detail::nd_range_view RangeView, + KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents, + const PropertiesT &ExtraProps, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + + using KernelType = + std::remove_const_t>; + + detail::KernelWrapper::wrap(KernelFunc); + + HostKernelRef + HostKernel(std::forward(KernelFunc)); + + // Instantiating the kernel on the host improves debugging. + // Passing this pointer to another translation unit prevents optimization. +#ifndef NDEBUG + // TODO: call library to prevent dropping call due to optimization. + (void) + detail::GetInstantiateKernelOnHostPtr(); +#endif + + detail::DeviceKernelInfo *DeviceKernelInfoPtr = + &detail::getDeviceKernelInfo(); + constexpr auto Info = detail::CompileTimeKernelInfo; + + assert(Info.Name != std::string_view{} && "Kernel must have a name!"); + + static_assert( + Info.Name == std::string_view{} || sizeof(KernelType) == Info.KernelSize, + "Unexpected kernel lambda size. This can be caused by an " + "external host compiler producing a lambda with an " + "unexpected layout. This is a limitation of the compiler." + "In many cases the difference is related to capturing constexpr " + "variables. In such cases removing constexpr specifier aligns the " + "captures between the host compiler and the device compiler." + "\n" + "In case of MSVC, passing " + "-fsycl-host-compiler-options='/std:c++latest' " + "might also help."); + + detail::KernelPropertyHolderStructTy ParsedProperties; + if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< + const KernelType &>::value) { + // Merge properties via get() and manually specified properties. + // get() method is used for specifying kernel properties but properties + // passed via launch_config (ExtraProps) should be kernel launch properties. + // They are mutually exclusive, so there should not be any conflict when + // merging properties. merge_properties() throws if there's a conflict. + auto MergedProps = + sycl::ext::oneapi::experimental::detail::merge_properties( + ExtraProps, + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + + ParsedProperties = extractKernelProperties(MergedProps); + } else { + ParsedProperties = extractKernelProperties(ExtraProps); + } + + if constexpr (EventNeeded) { + return submit_kernel_direct_with_event_impl( + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, + ParsedProperties, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); + } else { + submit_kernel_direct_without_event_impl( + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, + ParsedProperties, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); + } +} + +template +auto submit_kernel_direct_parallel_for(const queue &Queue, nd_range Range, + KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents, + const PropertiesT &Props, + const detail::code_location &CodeLoc) { + + using KernelType = + std::remove_const_t>; + using NameT = + typename detail::get_kernel_name_t::name; + + using LambdaArgType = + sycl::detail::lambda_arg_type>; + static_assert( + std::is_convertible_v, LambdaArgType>, + "Kernel argument of a sycl::parallel_for with sycl::nd_range " + "must be either sycl::nd_item or be convertible from sycl::nd_item"); + using TransformedArgType = sycl::nd_item; + +#ifndef __SYCL_DEVICE_ONLY__ + detail::checkValueRange(Range); +#endif + + return submit_kernel_direct( + Queue, detail::nd_range_view(Range), + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); +} + +template +auto submit_kernel_direct_parallel_for(const queue &Queue, range Range, + KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents, + const PropertiesT &Props, + const detail::code_location &CodeLoc) { + +#ifndef __SYCL_DEVICE_ONLY__ + if (!range_size_fits_in_size_t(Range)) + throw sycl::exception(make_error_code(errc::runtime), + "The total number of work-items in " + "a range must fit within size_t"); +#endif + + using KernelType = + std::remove_const_t>; + using NameT = + typename detail::get_kernel_name_t::name; + using LambdaArgType = sycl::detail::lambda_arg_type>; + + // If 1D kernel argument is an integral type, convert it to sycl::item<1> + // If user type is convertible from sycl::item/sycl::nd_item, use + // sycl::item/sycl::nd_item to transport item information + using TransformedArgType = std::conditional_t< + std::is_integral::value && Dims == 1, item, + typename TransformUserItemType::type>; + + static_assert(!std::is_same_v>, + "Kernel argument cannot have a sycl::nd_item type in " + "sycl::parallel_for with sycl::range"); + + static_assert(std::is_convertible_v, LambdaArgType> || + std::is_convertible_v, LambdaArgType>, + "sycl::parallel_for(sycl::range) kernel must have the " + "first argument of sycl::item type, or of a type which is " + "implicitly convertible from sycl::item"); + + using RefLambdaArgType = std::add_lvalue_reference_t; + static_assert( + (std::is_invocable_v), + "SYCL kernel lambda/functor has an unexpected signature, it should be " + "invocable with sycl::item"); + + // Range rounding can be disabled by the user. + // Range rounding is supported only for newer SYCL standards. +#if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \ + SYCL_LANGUAGE_VERSION >= 202012L + auto [RoundedRange, HasRoundedRange] = + detail::getRoundedRange(Range, Queue.get_device()); + if (HasRoundedRange) { + using NameWT = typename detail::get_kernel_wrapper_name_t::name; + auto Wrapper = + detail::getRangeRoundedKernelLambda( + KernelFunc, Range); + + using KTypeWrapper = decltype(Wrapper); + using KName = std::conditional_t::value, + KTypeWrapper, NameWT>; +#ifndef __SYCL_DEVICE_ONLY__ + // We are executing over the rounded range, but there are still + // items/ids that are are constructed in ther range rounded + // kernel use items/ids in the user range, which means that + // __SYCL_ASSUME_INT can still be violated. So check the bounds + // of the user range, instead of the rounded range. + detail::checkValueRange(Range); +#endif + return submit_kernel_direct( + Queue, detail::nd_range_view(Range), std::move(Wrapper), DepEvents, + Props, CodeLoc); + } else +#endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ && + // SYCL_LANGUAGE_VERSION >= 202012L + { +#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ +#ifndef __SYCL_DEVICE_ONLY__ + detail::checkValueRange(Range); +#endif + return submit_kernel_direct( + Queue, detail::nd_range_view(Range), + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); + +#else + (void)Range; + (void)Props; + (void)KernelFunc; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + } +} + +template +auto submit_kernel_direct_single_task(const queue &Queue, + KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents, + const PropertiesT &Props, + const detail::code_location &CodeLoc) { + + using KernelType = + std::remove_const_t>; + using NameT = + typename detail::get_kernel_name_t::name; + + return submit_kernel_direct( + Queue, detail::nd_range_view(), + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); +} +} // namespace detail + } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/range_rounding.hpp b/sycl/include/sycl/range_rounding.hpp index 6865dcfc6c646..79739bdc1b480 100644 --- a/sycl/include/sycl/range_rounding.hpp +++ b/sycl/include/sycl/range_rounding.hpp @@ -10,6 +10,7 @@ #include +#include #include #include // for size_t diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index a0bc25739c465..2d0cc14cd5c26 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -152,6 +152,7 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: range_rounding.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: feature_test.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index 9cd74ac24ca78..846bd0ed4a436 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -156,5 +156,6 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: range_rounding.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index 65e5d95389e72..e6b167d5102a0 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -183,6 +183,7 @@ // CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp +// CHECK-NEXT: range_rounding.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: usm.hpp // CHECK-NEXT: usm/usm_pointer_info.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 5534c9b9fe6ee..b24f3577906ae 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -171,6 +171,7 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: range_rounding.hpp // CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: usm/usm_pointer_info.hpp // CHECK-NEXT: usm/usm_allocator.hpp From 906d3df2d36f784b00b80ac7640ee5f8d7dbfb27 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 14 Nov 2025 15:31:49 +0000 Subject: [PATCH 30/35] Range-based kernel submit - part 3 - add exports --- sycl/include/sycl/range_rounding.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/range_rounding.hpp b/sycl/include/sycl/range_rounding.hpp index 79739bdc1b480..49c2b3638b452 100644 --- a/sycl/include/sycl/range_rounding.hpp +++ b/sycl/include/sycl/range_rounding.hpp @@ -20,15 +20,16 @@ inline namespace _V1 { namespace detail { -void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, - size_t &MinRange); +void __SYCL_EXPORT GetRangeRoundingSettings(size_t &MinFactor, + size_t &GoodFactor, + size_t &MinRange); std::tuple, bool> -getMaxWorkGroups_v2(const device &Device); + __SYCL_EXPORT getMaxWorkGroups_v2(const device &Device); -bool DisableRangeRounding(); +bool __SYCL_EXPORT DisableRangeRounding(); -bool RangeRoundingTrace(); +bool __SYCL_EXPORT RangeRoundingTrace(); template std::tuple, bool> getRoundedRange(range UserRange, From 2e2d48ce3fe755e1ffb84000ab9f536281d49fd5 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Fri, 14 Nov 2025 16:34:39 +0000 Subject: [PATCH 31/35] Fix nested calls tracker and num kernel copies test. --- sycl/source/detail/queue_impl.cpp | 2 ++ sycl/test-e2e/Basic/test_num_kernel_copies.cpp | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 980fcc65bfc40..1e01acce7f457 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -650,6 +650,8 @@ queue_impl::submit_direct(bool CallerNeedsEvent, detail::CG::StorageInitHelper CGData; std::unique_lock Lock(MMutex); + NestedCallsTracker tracker; + // Used by queue_empty() and getLastEvent() MEmpty.store(false, std::memory_order_release); diff --git a/sycl/test-e2e/Basic/test_num_kernel_copies.cpp b/sycl/test-e2e/Basic/test_num_kernel_copies.cpp index 82f8477a10962..d0770a56696be 100644 --- a/sycl/test-e2e/Basic/test_num_kernel_copies.cpp +++ b/sycl/test-e2e/Basic/test_num_kernel_copies.cpp @@ -23,7 +23,8 @@ int main(int argc, char **argv) { kernel<0> krn0; q.parallel_for(sycl::range<1>{1}, krn0); - assert(copy_count == 1); + // The kernel is copied on the scheduler-based path only + assert(copy_count == 0); assert(move_count == 0); copy_count = 0; From 0ebccee1ee47fd8b29d44bd08814d34c37f31c27 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 17 Nov 2025 11:16:41 +0000 Subject: [PATCH 32/35] Add export header --- sycl/include/sycl/range_rounding.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/range_rounding.hpp b/sycl/include/sycl/range_rounding.hpp index 49c2b3638b452..bfffa96fbb70f 100644 --- a/sycl/include/sycl/range_rounding.hpp +++ b/sycl/include/sycl/range_rounding.hpp @@ -9,7 +9,7 @@ #pragma once #include - +#include #include #include From 0f1b0ab8814e36144bb73d514e9d737eeac40f31 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 17 Nov 2025 11:20:31 +0000 Subject: [PATCH 33/35] Fix formatting --- sycl/include/sycl/range_rounding.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/range_rounding.hpp b/sycl/include/sycl/range_rounding.hpp index bfffa96fbb70f..cf54004db4d73 100644 --- a/sycl/include/sycl/range_rounding.hpp +++ b/sycl/include/sycl/range_rounding.hpp @@ -8,9 +8,9 @@ #pragma once -#include #include #include +#include #include #include // for size_t From ae2eda90d17319054fc10d13041d75b3f60f7f1b Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 17 Nov 2025 12:21:16 +0000 Subject: [PATCH 34/35] Move functions out of preview macro --- sycl/source/queue.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 34e5179cdfd2e..d096f814dd2e4 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -20,8 +20,8 @@ namespace sycl { inline namespace _V1 { -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES namespace detail { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES SubmissionInfo::SubmissionInfo() : impl{std::make_shared()} {} @@ -59,6 +59,8 @@ SubmissionInfo::EventMode() const { return impl->MEventMode; } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor, size_t &MinRange) { SYCLConfig::GetSettings( @@ -91,8 +93,6 @@ bool RangeRoundingTrace() { } // namespace detail -#endif // __INTEL_PREVIEW_BREAKING_CHANGES - queue::queue(const context &SyclContext, const device_selector &DeviceSelector, const async_handler &AsyncHandler, const property_list &PropList) { const std::vector Devs = SyclContext.get_devices(); From e8ad6a9afcb3728caeea13eb5235bb008046c5a6 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Mon, 17 Nov 2025 12:49:22 +0000 Subject: [PATCH 35/35] Update Linux symbols --- sycl/test/abi/sycl_symbols_linux.dump | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 12ca5b7b5a93f..9b7a17a78a54a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3314,11 +3314,13 @@ _ZN4sycl3_V16detail17HostProfilingInfo3endEv _ZN4sycl3_V16detail17HostProfilingInfo5startEv _ZN4sycl3_V16detail17device_global_map3addEPKvPKc _ZN4sycl3_V16detail17reduComputeWGSizeEmmRm +_ZN4sycl3_V16detail18RangeRoundingTraceEv _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi _ZN4sycl3_V16detail19getDeviceKernelInfoERKNS1_27compile_time_kernel_info_v123CompileTimeKernelInfoTyE +_ZN4sycl3_V16detail19getMaxWorkGroups_v2ERKNS0_6deviceE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewE @@ -3326,6 +3328,7 @@ _ZN4sycl3_V16detail19kernel_bundle_plain30ext_oneapi_get_raw_kernel_nameENS1_11s _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm _ZN4sycl3_V16detail19kernel_bundle_plain33ext_oneapi_get_device_global_sizeENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain36ext_oneapi_get_device_global_addressENS1_11string_viewERKNS0_6deviceE +_ZN4sycl3_V16detail20DisableRangeRoundingEv _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_16AccessorBaseHostENS0_6access6targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_28SampledImageAccessorBaseHostENS0_12image_targetE _ZN4sycl3_V16detail20associateWithHandlerERNS0_7handlerEPNS1_30UnsampledImageAccessorBaseHostENS0_12image_targetE @@ -3351,6 +3354,7 @@ _ZN4sycl3_V16detail22reduGetPreferredWGSizeERNS0_7handlerEm _ZN4sycl3_V16detail22reduGetPreferredWGSizeERSt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE +_ZN4sycl3_V16detail24GetRangeRoundingSettingsERmS2_S2_ _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN4sycl3_V16detail26createKernelNameBasedCacheEv _ZN4sycl3_V16detail26isDeviceGlobalUsedInKernelEPKv