Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/compute-sanitizer-run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Compute Sanitizer Run

on:
workflow_call:
inputs:
tool_name:
required: true
type: string
description: "Compute sanitizer tool to run (memcheck, racecheck, initcheck, synccheck)"
workflow_dispatch:
inputs:
tool_name:
required: true
type: choice
description: "Compute sanitizer tool to run"
options:
- memcheck
- racecheck
- initcheck
- synccheck

jobs:
run-sanitizer-tests:
name: Run ${{ inputs.tool_name }} on single_tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-13.1.0
with:
build_type: pull-request
node_type: "gpu-l4-latest-1"
script: "ci/test_cpp_sanitizer.sh ${{ inputs.tool_name }} single_tests"
22 changes: 22 additions & 0 deletions .github/workflows/compute-sanitizer-trigger.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Compute Sanitizer Trigger

# This workflow runs compute-sanitizer tools (racecheck, synccheck) on librapidsmpf tests weekly.
# memcheck is run in the nightly test.yaml workflow.
# For targeted testing, manually trigger compute-sanitizer-run.yaml with specific tool_name.

on:
schedule:
- cron: '0 10 * * 6' # Weekly on Saturday at 10:00 UTC
workflow_dispatch:

jobs:
run-sanitizer-tests-racecheck:
name: compute-sanitizer racecheck tests
uses: ./.github/workflows/compute-sanitizer-run.yaml
with:
tool_name: "racecheck"
run-sanitizer-tests-synccheck:
name: compute-sanitizer synccheck tests
uses: ./.github/workflows/compute-sanitizer-run.yaml
with:
tool_name: "synccheck"
21 changes: 16 additions & 5 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
- conda-cpp-linters
- conda-cpp-tests
- conda-cpp-memcheck
- conda-cpp-racecheck
- conda-cpp-synccheck
- conda-python-build
- conda-python-tests
- docs-build
Expand Down Expand Up @@ -134,13 +136,22 @@ jobs:
script: ci/test_cpp.sh
sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN
conda-cpp-memcheck:
secrets: inherit
needs: conda-cpp-build
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@cuda-13.1.0
uses: ./.github/workflows/compute-sanitizer-run.yaml
with:
build_type: pull-request
script: "ci/test_cpp_memcheck.sh"
node_type: "gpu-l4-latest-1"
tool_name: "memcheck"
# Temporary for testing
conda-cpp-racecheck:
needs: conda-cpp-build
uses: ./.github/workflows/compute-sanitizer-run.yaml
with:
tool_name: "racecheck"
# Temporary for testing
conda-cpp-synccheck:
needs: conda-cpp-build
uses: ./.github/workflows/compute-sanitizer-run.yaml
with:
tool_name: "synccheck"
conda-python-build:
needs: conda-cpp-build
secrets: inherit
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000"
date: ${{ inputs.date }}
node_type: "gpu-l4-latest-1"
script: ci/test_cpp_memcheck.sh
script: ci/test_cpp_sanitizer.sh memcheck single_tests
sha: ${{ inputs.sha }}
conda-python-tests:
secrets: inherit
Expand Down
73 changes: 73 additions & 0 deletions ci/run_compute_sanitizer_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

# This script runs compute-sanitizer on a single librapidsmpf test executable
# Usage: ./run_compute_sanitizer_test.sh TOOL_NAME TEST_NAME [additional gtest args...]
# Example: ./run_compute_sanitizer_test.sh memcheck single_tests
# Example: ./run_compute_sanitizer_test.sh racecheck single_tests --gtest_filter=ShufflerTest.*

if [ $# -lt 2 ]; then
echo "Error: Tool and test name required"
echo "Usage: $0 TOOL_NAME TEST_NAME [additional gtest args...]"
echo " TOOL_NAME: compute-sanitizer tool (memcheck, racecheck, initcheck, synccheck)"
echo " TEST_NAME: librapidsmpf test name (e.g., single_tests)"
exit 1
fi

TOOL_NAME="${1}"
shift
TEST_NAME="${1}"
shift

rapids-logger "Running compute-sanitizer --tool ${TOOL_NAME} on ${TEST_NAME}"

# Support customizing the ctests' install location
# First, try the installed location (CI/conda environments)
installed_test_location="${INSTALL_PREFIX:-${CONDA_PREFIX:-/usr}}/bin/tests/librapidsmpf"
# Fall back to the build directory (devcontainer environments)
devcontainers_test_location="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/../cpp/build/latest"

if [[ -d "${installed_test_location}" ]]; then
TEST_DIR="${installed_test_location}"
elif [[ -d "${devcontainers_test_location}" ]]; then
TEST_DIR="${devcontainers_test_location}"
else
echo "Error: Test location not found. Searched:" >&2
echo " - ${installed_test_location}" >&2
echo " - ${devcontainers_test_location}" >&2
exit 1
fi

TEST_EXECUTABLE="${TEST_DIR}/gtests/${TEST_NAME}"

if [ ! -x "${TEST_EXECUTABLE}" ]; then
rapids-logger "Error: Test executable ${TEST_EXECUTABLE} not found or not executable"
exit 1
fi

# Build compute-sanitizer arguments based on tool
SANITIZER_ARGS=(
--tool "${TOOL_NAME}"
--force-blocking-launches
--error-exitcode=1
)

# Add tool-specific arguments
if [ "${TOOL_NAME}" = "memcheck" ]; then
SANITIZER_ARGS+=(--track-stream-ordered-races=all)
fi

# Run compute-sanitizer on the specified test, excluding CuptiMonitorTest
compute-sanitizer \
"${SANITIZER_ARGS[@]}" \
"${TEST_EXECUTABLE}" \
--gtest_filter=-CuptiMonitorTest.* \
"$@"

EXITCODE=$?

rapids-logger "compute-sanitizer --tool ${TOOL_NAME} on ${TEST_NAME} exiting with value: $EXITCODE"
exit $EXITCODE
55 changes: 0 additions & 55 deletions ci/test_cpp_memcheck.sh

This file was deleted.

55 changes: 55 additions & 0 deletions ci/test_cpp_sanitizer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

# This script sets up the test environment and runs compute-sanitizer on librapidsmpf tests
# Usage: ./test_cpp_sanitizer.sh TOOL_NAME TEST_NAME [additional gtest args...]
# Example: ./test_cpp_sanitizer.sh memcheck single_tests
# Example: ./test_cpp_sanitizer.sh racecheck single_tests --gtest_filter=ShufflerTest.*

if [ $# -lt 2 ]; then
echo "Error: Tool and test name required"
echo "Usage: $0 TOOL_NAME TEST_NAME [additional gtest args...]"
echo " TOOL_NAME: compute-sanitizer tool (memcheck, racecheck, initcheck, synccheck)"
echo " TEST_NAME: librapidsmpf test name (e.g., single_tests)"
exit 1
fi

TOOL_NAME="${1}"
shift
TEST_NAME="${1}"
shift

rapids-logger "Configuring conda strict channel priority"
conda config --set channel_priority strict

CPP_CHANNEL=$(rapids-download-conda-from-github cpp)

rapids-logger "Generate C++ testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file-key test_cpp \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" \
--prepend-channel "${CPP_CHANNEL}" \
| tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

rapids-print-env

rapids-logger "Check GPU usage"
nvidia-smi

# Support invoking test_cpp_sanitizer.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"

./run_compute_sanitizer_test.sh "${TOOL_NAME}" "${TEST_NAME}" "$@"
Loading