Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions docs/snippets/example/02_execution.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/* Copyright 2026 René Widera
* SPDX-License-Identifier: MPL-2.0
*/

#include <alpaka/alpaka.hpp>

#include <catch2/catch_test_macros.hpp>

using namespace alpaka;

TEST_CASE("tutorial enumerate backends and executors", "[docs]")
{
// BEGIN-TUTORIAL-enumerateDeviceSpec
auto deviceSpec = onHost::DeviceSpec{api::host, deviceKind::cpu};
auto selector = onHost::makeDeviceSelector(deviceSpec);

auto numDevices = selector.getDeviceCount();
REQUIRE(numDevices >= 1u);

auto properties = selector.getDeviceProperties(0u);
auto device = selector.makeDevice(0u);
// END-TUTORIAL-enumerateDeviceSpec

CHECK(properties.warpSize >= 1u);
CHECK(!device.getName().empty());

size_t numVisitedBackends = 0u;
// BEGIN-TUTORIAL-enumerateBackends
onHost::executeForEachIfHasDevice(
[&](auto const& backend)
{
++numVisitedBackends;

auto backendDeviceSpec = backend[object::deviceSpec];
auto backendExec = backend[object::exec];
auto backendSelector = onHost::makeDeviceSelector(backendDeviceSpec);
auto backendDevice = backendSelector.makeDevice(0u);
auto backendQueue = backendDevice.makeQueue();

backendQueue.enqueueHostFn([]() noexcept {});
onHost::wait(backendQueue);

alpaka::unused(backendExec);
return EXIT_SUCCESS;
},
onHost::allBackends(onHost::enabledApis, exec::enabledExecutors));
// END-TUTORIAL-enumerateBackends

CHECK(numVisitedBackends >= 1u);
}
2 changes: 1 addition & 1 deletion docs/snippets/example/05_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ TEST_CASE("show host devices", "[docs]")
// END-TUTORIAL-devCount

// BEGIN-TUTORIAL-devHandleCount
// Always check the number of available compute devices! Alpaka always creates a valid DeviceSelector even for
// Always check the number of available compute devices! alpaka always creates a valid DeviceSelector even for
// unsupported combinations of an api and deviceKind.
if(numComputeDevs > 0)
{
Expand Down
19 changes: 14 additions & 5 deletions docs/snippets/example/06_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,23 @@
* SPDX-License-Identifier: MPL-2.0
*/

#include "docsTest.hpp"

#include <alpaka/alpaka.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

#include <cstdint>

using namespace alpaka;

TEST_CASE("non blocking queue", "[docs]")
TEMPLATE_LIST_TEST_CASE("non blocking queue", "[docs]", docs::test::TestBackends)
{
auto device = onHost::makeHostDevice();
auto selector = onHost::makeDeviceSelector(TestType::makeDict()[object::deviceSpec]);
if(!selector.isAvailable())
return;
auto device = selector.makeDevice(0);

// BEGIN-TUTORIAL-nonBlockingQueue
// Creating a non-blocking queue
Expand All @@ -24,16 +30,19 @@ TEST_CASE("non blocking queue", "[docs]")
// END-TUTORIAL-nonBlockingQueue
}

TEST_CASE("blocking queue", "[docs]")
TEMPLATE_LIST_TEST_CASE("blocking queue", "[docs]", docs::test::TestBackends)
{
auto device = onHost::makeHostDevice();
auto selector = onHost::makeDeviceSelector(TestType::makeDict()[object::deviceSpec]);
if(!selector.isAvailable())
return;
auto device = selector.makeDevice(0);

// BEGIN-TUTORIAL-blockingQueue
// Creating a blocking queue
onHost::Queue queue = device.makeQueue(queueKind::blocking);
uint32_t value = 42u;
queue.enqueueHostFn([&value]() { value = 23u; });
// no wait required, enqueue will wait untile the task is finished
// no wait required, enqueue will wait until the task is finished
CHECK(value == 23u);
// END-TUTORIAL-blockingQueue
}
38 changes: 38 additions & 0 deletions docs/snippets/example/08_events.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/* Copyright 2026 René Widera
* SPDX-License-Identifier: MPL-2.0
*/

#include "docsTest.hpp"

#include <alpaka/alpaka.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

using namespace alpaka;

TEMPLATE_LIST_TEST_CASE("tutorial events and synchronization", "[docs]", docs::test::TestBackends)
{
auto selector = onHost::makeDeviceSelector(TestType::makeDict()[object::deviceSpec]);
if(!selector.isAvailable())
return;
auto device = selector.makeDevice(0);
auto queue0 = device.makeQueue();
auto queue1 = device.makeQueue();
auto event = device.makeEvent();
int value = 0;

// BEGIN-TUTORIAL-eventCreation
queue0.enqueueHostFn([&value]() { value = 41; });
queue0.enqueue(event);
// END-TUTORIAL-eventCreation

// BEGIN-TUTORIAL-eventWait
queue1.waitFor(event);
queue1.enqueueHostFn([&value]() { value += 1; });
onHost::wait(queue1);
// END-TUTORIAL-eventWait

CHECK(event.isComplete());
CHECK(value == 42);
}
35 changes: 11 additions & 24 deletions docs/snippets/example/10_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
* SPDX-License-Identifier: MPL-2.0
*/

#include "docsTest.hpp"

#include <alpaka/alpaka.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

#include <algorithm>
Expand Down Expand Up @@ -80,20 +83,12 @@ TEST_CASE("memory allocations like", "[docs]")
alpaka::unused(devDoubleBuffer);
}

TEST_CASE("memory", "[docs]")
TEMPLATE_LIST_TEST_CASE("memory", "[docs]", docs::test::TestBackends)
{
// Nvidia GPU: onHost::DeviceSpec{api::cuda, deviceKind::nvidiaGpu};
// Amd GPU: onHost::DeviceSpec{api::hip, deviceKind::amdGpu};
// Intel GPU: onHost::DeviceSpec{api::oneApi, deviceKind::intelGpu};
// this call selects the host Cpu
auto computeDevSpec = onHost::DeviceSpec{api::host, deviceKind::cpu};
auto computeDevSpec = TestType::makeDict()[object::deviceSpec];
auto computeDevSelector = alpaka::onHost::makeDeviceSelector(computeDevSpec);
auto numComputeDevs = computeDevSelector.getDeviceCount();

if(numComputeDevs == 0)
{
std::cout << "No device for " << onHost::getName(computeDevSpec) << " found." << std::endl;
}
if(!computeDevSelector.isAvailable())
return;

// using the typed interface and not concept + auto
onHost::Device computeDev = computeDevSelector.makeDevice(0);
Expand Down Expand Up @@ -133,20 +128,12 @@ TEST_CASE("memory", "[docs]")
CHECK(v == 42);
}

TEST_CASE("memory using std::vector", "[docs]")
TEMPLATE_LIST_TEST_CASE("memory using std::vector", "[docs]", docs::test::TestBackends)
{
// Nvidia GPU: onHost::DeviceSpec{api::cuda, deviceKind::nvidiaGpu};
// Amd GPU: onHost::DeviceSpec{api::hip, deviceKind::amdGpu};
// Intel GPU: onHost::DeviceSpec{api::oneApi, deviceKind::intelGpu};
// this call selects the host Cpu
auto computeDevSpec = onHost::DeviceSpec{api::host, deviceKind::cpu};
auto computeDevSpec = TestType::makeDict()[object::deviceSpec];
auto computeDevSelector = alpaka::onHost::makeDeviceSelector(computeDevSpec);
auto numComputeDevs = computeDevSelector.getDeviceCount();

if(numComputeDevs == 0)
{
std::cout << "No device for " << onHost::getName(computeDevSpec) << " found." << std::endl;
}
if(!computeDevSelector.isAvailable())
return;

onHost::Device computeDev = computeDevSelector.makeDevice(0);
onHost::Queue asyncComputeQueue = computeDev.makeQueue();
Expand Down
49 changes: 49 additions & 0 deletions docs/snippets/example/11_views.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/* Copyright 2026 René Widera
* SPDX-License-Identifier: MPL-2.0
*/

#include "docsTest.hpp"

#include <alpaka/alpaka.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

#include <vector>

using namespace alpaka;

TEMPLATE_LIST_TEST_CASE("tutorial views and subviews", "[docs]", docs::test::TestBackends)
{
auto selector = onHost::makeDeviceSelector(TestType::makeDict()[object::deviceSpec]);
if(!selector.isAvailable())
return;
auto device = selector.makeDevice(0);
auto queue = device.makeQueue();

std::vector<int> hostData{0, 1, 2, 3, 4, 5, 6, 7};

// BEGIN-TUTORIAL-viewCreation
auto hostView = makeView(hostData);
auto middleView = hostView.getSubView(size_t{2}, size_t{4});
// END-TUTORIAL-viewCreation

CHECK(hostView.getExtents().x() == 8u);
CHECK(middleView.getExtents().x() == 4u);
CHECK(middleView[Vec{size_t{0}}] == 2);
CHECK(middleView[Vec{size_t{3}}] == 5);

// BEGIN-TUTORIAL-viewCopy
auto deviceBuffer = onHost::allocLike(device, hostView);
onHost::memcpy(queue, deviceBuffer, hostView);

auto hostSlice = onHost::allocHost<int>(4u);
onHost::memcpy(queue, hostSlice, deviceBuffer.getSubView(Vec{size_t{2}}, Vec{size_t{4}}));
onHost::wait(queue);
// END-TUTORIAL-viewCopy

CHECK(hostSlice[Vec{0u}] == 2);
CHECK(hostSlice[Vec{1u}] == 3);
CHECK(hostSlice[Vec{2u}] == 4);
CHECK(hostSlice[Vec{3u}] == 5);
}
75 changes: 75 additions & 0 deletions docs/snippets/example/12_kernelIntro.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/* Copyright 2026 René Widera
* SPDX-License-Identifier: MPL-2.0
*/

#include "docsTest.hpp"

#include <alpaka/alpaka.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

#include <numeric>
#include <vector>

using namespace alpaka;

// BEGIN-TUTORIAL-kernelStructure
struct VectorAddKernel
{
ALPAKA_FN_ACC void operator()(
onAcc::concepts::Acc auto const& acc,
concepts::IMdSpan auto out,
concepts::IDataSource auto const& lhs,
concepts::IDataSource auto const& rhs) const
{
ALPAKA_ASSERT_ACC(out.getExtents() == lhs.getExtents());
ALPAKA_ASSERT_ACC(out.getExtents() == rhs.getExtents());

for(auto [i] : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange{out.getExtents()}))
{
out[i] = lhs[i] + rhs[i];
}
}
};

// END-TUTORIAL-kernelStructure

TEMPLATE_LIST_TEST_CASE("tutorial kernel intro vector add", "[docs]", docs::test::TestBackends)
{
auto selector = onHost::makeDeviceSelector(TestType::makeDict()[object::deviceSpec]);
if(!selector.isAvailable())
return;
auto device = selector.makeDevice(0);
auto queue = device.makeQueue();

std::vector<int> lhs(257u);
std::vector<int> rhs(257u);
std::iota(lhs.begin(), lhs.end(), 0);
std::iota(rhs.begin(), rhs.end(), 1000);
std::vector<int> result(lhs.size(), -1);

auto lhsBuffer = onHost::alloc<int>(device, static_cast<uint32_t>(lhs.size()));
auto rhsBuffer = onHost::allocLike(device, lhsBuffer);
auto resultBuffer = onHost::allocLike(device, lhsBuffer);

onHost::memcpy(queue, lhsBuffer, lhs);
onHost::memcpy(queue, rhsBuffer, rhs);
onHost::memset(queue, resultBuffer, 0x00);

// BEGIN-TUTORIAL-kernelLaunch
// BEGIN-TUTORIAL-kernelFrameSpec
auto frameSpec = onHost::getFrameSpec<int>(device, Vec{static_cast<uint32_t>(result.size())});
// END-TUTORIAL-kernelFrameSpec

queue.enqueue(frameSpec, KernelBundle{VectorAddKernel{}, resultBuffer, lhsBuffer, rhsBuffer});

onHost::memcpy(queue, result, resultBuffer);
onHost::wait(queue);
// END-TUTORIAL-kernelLaunch

for(size_t i = 0; i < result.size(); ++i)
{
CHECK(result[i] == lhs[i] + rhs[i]);
}
}
Loading
Loading