diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 24ed44b219e3a..27b62c0118ee8 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -859,6 +859,13 @@ void queue_impl::verifyProps(const property_list &Props) const { CheckPropertiesWithData); } +std::vector & +queue_impl::getKernelArgStorage(uint32_t size) { + MKernelArgStorage.clear(); + MKernelArgStorage.reserve(size); + return MKernelArgStorage; +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7e15c772f2697..f61bb23df563d 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -683,6 +683,11 @@ class queue_impl : public std::enable_shared_from_this { } #endif + /// Clears MKernelArgsStorage, has it .reserve(size), and returns a reference + /// to it. Not inherently thread safe. + std::vector & + getKernelArgStorage(uint32_t size); + protected: template EventImplPtr insertHelperBarrier(const HandlerType &Handler) { @@ -999,6 +1004,10 @@ class queue_impl : public std::enable_shared_from_this { ur_queue_handle_t MQueue; + // To avoid re-allocating this every time a kernel is enqueued we keep this + // vector around and .clear()/.reserve() for each kernel instead. + std::vector MKernelArgStorage; + // Access should be guarded with MMutex struct DependencyTrackingItems { // This event is employed for enhanced dependency tracking with in-order diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 70f12d0a59ef7..94c704db22f1e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2424,8 +2424,7 @@ static ur_result_t SetKernelParamsAndLaunch( DeviceImageImpl ? DeviceImageImpl->get_spec_const_blob_ref() : Empty); } - std::vector UrArgs; - UrArgs.reserve(Args.size()); + auto UrArgs = Queue.getKernelArgStorage(Args.size()); if (KernelFuncPtr && !KernelHasSpecialCaptures) { auto setFunc = [&UrArgs,