Skip to content

[SYCL][NFC] Move NDRDescT to include/sycl/detail/cg_types.hpp #19665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,101 @@ enum class CGType : unsigned int {
AsyncFree = 29,
};

// The structure represents NDRange - global, local sizes, global offset and
// number of dimensions.

// TODO: A lot of tests rely on particular values to be set for dimensions that
// are not used. To clarify, for example, if a 2D kernel is invoked, in
// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0
Copy link
Contributor

@cperkinsintel cperkinsintel Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really dislike this aspect of the NDRDesc. It has THREE different applications:

  • sometimes we template it to Dims
  • sometimes Dim is 3, but 0 is used in the higher "unused" dimensions
  • othertimes Dim is 3, and 1 is used in the higher "unused" dimensions.

IIRC, when we are hoisting it to the device it is not templated and we use that third convention. That way the sizes are correct, as multiplying by 1 doesn't change anything.

We should probably remove that second convention, the one that uses 0. That just leads to bugs. And, maybe we should reevaluate the usage of this end to end and drop the template over Dims. In one form or other, the NDRDesc is used by the FE, by SYCL and by the device.

// depending on which constructor is used for no clear reason.
// Instead, only sensible defaults should be used and tests should be updated
// to reflect this.
class NDRDescT {

public:
NDRDescT() = default;
NDRDescT(const NDRDescT &Desc) = default;
NDRDescT(NDRDescT &&Desc) = default;

template <int Dims_>
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} {
if (SetNumWorkGroups) {
for (size_t I = 0; I < Dims_; ++I) {
NumWorkGroups[I] = N[I];
}
} else {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = N[I];
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
LocalSize[I] = LocalSizes[I];
GlobalOffset[I] = Offset[I];
}

for (int I = Dims_; I < 3; ++I) {
LocalSize[I] = LocalSizes[0] ? 1 : 0;
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
GlobalOffset[I] = Offset[I];
}
}

template <int Dims_>
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
: NDRDescT(ExecutionRange.get_global_range(),
ExecutionRange.get_local_range(),
ExecutionRange.get_offset()) {}

template <int Dims_>
NDRDescT(sycl::range<Dims_> Range)
: NDRDescT(Range, /*SetNumWorkGroups=*/false) {}

template <int Dims_> void setClusterDimensions(sycl::range<Dims_> N) {
if (this->Dims != size_t(Dims_)) {
throw std::runtime_error(
"Dimensionality of cluster, global and local ranges must be same");
}

for (int I = 0; I < Dims_; ++I)
ClusterDimensions[I] = N[I];
}

NDRDescT &operator=(const NDRDescT &Desc) = default;
NDRDescT &operator=(NDRDescT &&Desc) = default;

std::array<size_t, 3> GlobalSize{0, 0, 0};
std::array<size_t, 3> LocalSize{0, 0, 0};
std::array<size_t, 3> GlobalOffset{0, 0, 0};
/// Number of workgroups, used to record the number of workgroups from the
/// simplest form of parallel_for_work_group. If set, all other fields must be
/// zero
std::array<size_t, 3> NumWorkGroups{0, 0, 0};
std::array<size_t, 3> ClusterDimensions{1, 1, 1};
size_t Dims = 0;
};

template <typename, typename T> struct check_fn_signature {
static_assert(std::integral_constant<T, false>::value,
"Second template parameter is required to be of function type");
Expand Down
95 changes: 0 additions & 95 deletions sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,101 +60,6 @@ class ArgDesc {
int MIndex;
};

// The structure represents NDRange - global, local sizes, global offset and
// number of dimensions.

// TODO: A lot of tests rely on particular values to be set for dimensions that
// are not used. To clarify, for example, if a 2D kernel is invoked, in
// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0
// depending on which constructor is used for no clear reason.
// Instead, only sensible defaults should be used and tests should be updated
// to reflect this.
class NDRDescT {

public:
NDRDescT() = default;
NDRDescT(const NDRDescT &Desc) = default;
NDRDescT(NDRDescT &&Desc) = default;

template <int Dims_>
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} {
if (SetNumWorkGroups) {
for (size_t I = 0; I < Dims_; ++I) {
NumWorkGroups[I] = N[I];
}
} else {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = N[I];
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
LocalSize[I] = LocalSizes[I];
GlobalOffset[I] = Offset[I];
}

for (int I = Dims_; I < 3; ++I) {
LocalSize[I] = LocalSizes[0] ? 1 : 0;
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
GlobalOffset[I] = Offset[I];
}
}

template <int Dims_>
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
: NDRDescT(ExecutionRange.get_global_range(),
ExecutionRange.get_local_range(),
ExecutionRange.get_offset()) {}

template <int Dims_>
NDRDescT(sycl::range<Dims_> Range)
: NDRDescT(Range, /*SetNumWorkGroups=*/false) {}

template <int Dims_> void setClusterDimensions(sycl::range<Dims_> N) {
if (this->Dims != size_t(Dims_)) {
throw std::runtime_error(
"Dimensionality of cluster, global and local ranges must be same");
}

for (int I = 0; I < Dims_; ++I)
ClusterDimensions[I] = N[I];
}

NDRDescT &operator=(const NDRDescT &Desc) = default;
NDRDescT &operator=(NDRDescT &&Desc) = default;

std::array<size_t, 3> GlobalSize{0, 0, 0};
std::array<size_t, 3> LocalSize{0, 0, 0};
std::array<size_t, 3> GlobalOffset{0, 0, 0};
/// Number of workgroups, used to record the number of workgroups from the
/// simplest form of parallel_for_work_group. If set, all other fields must be
/// zero
std::array<size_t, 3> NumWorkGroups{0, 0, 0};
std::array<size_t, 3> ClusterDimensions{1, 1, 1};
size_t Dims = 0;
};

/// Base class for all types of command groups.
class CG {
public:
Expand Down