From e01c675087a0d5c489088d5e399a68b5191f38c2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 15 Jan 2025 12:57:09 +0100 Subject: [PATCH 1/4] GPU:Remove OpenCL 1.2, now that OpenCL 2 supports all its functionality --- GPU/Common/GPUCommonConstants.h | 4 +- GPU/Common/GPUCommonDef.h | 4 +- GPU/Common/GPUCommonDefSettings.h | 1 - GPU/Common/GPUCommonMath.h | 12 +- GPU/Common/GPUCommonTypeTraits.h | 2 +- GPU/GPUTracking/Base/GPUConstantMem.h | 2 +- GPU/GPUTracking/Base/GPUParam.inc | 6 - ...ReconstructionAvailableBackends.template.h | 1 - .../GPUReconstructionKernelList.template.h | 8 +- .../Base/GPUReconstructionLibrary.cxx | 4 - .../opencl-common/GPUReconstructionOCL.cl | 28 ----- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 112 ------------------ .../Base/opencl/GPUReconstructionOCL1.cxx | 103 ---------------- .../Base/opencl/GPUReconstructionOCL1.h | 52 -------- .../opencl/GPUReconstructionOCL1Internals.h | 28 ----- GPU/GPUTracking/CMakeLists.txt | 12 +- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 3 - GPU/GPUTracking/DataTypes/GPUO2DataTypes.h | 4 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 2 - .../DataTypes/GPUTPCGMPolynomialField.h | 9 -- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 12 +- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- GPU/GPUTracking/Definitions/GPULogging.h | 2 +- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 1 - .../SliceTracker/GPUTPCGlobalTracking.cxx | 3 - .../SliceTracker/GPUTPCGlobalTracking.h | 2 - .../SliceTracker/GPUTPCSliceOutput.h | 2 - .../SliceTracker/GPUTPCTracker.cxx | 2 - GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 2 - .../GPUTPCTrackletConstructor.cxx | 16 +-- .../SliceTracker/GPUTPCTrackletConstructor.h | 2 - GPU/GPUTracking/Standalone/cmake/config.cmake | 1 - GPU/GPUTracking/cmake/kernel_helpers.cmake | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 6 +- GPU/GPUTracking/kernels.cmake | 20 ++-- dependencies/FindO2GPU.cmake | 26 +--- 37 files changed, 39 insertions(+), 463 deletions(-) delete mode 100644 GPU/GPUTracking/Base/opencl/CMakeLists.txt delete mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx delete mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h delete mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index f45aa05ed00ca..c6dfedc14ab7e 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -17,11 +17,9 @@ #include "GPUCommonDef.h" -#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE::gpu::gpu_common_constants { -static CONSTEXPR const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this when OpenCL1 is removed +static CONSTEXPR const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP } -#endif #endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index ac3d7279fbaf4..14949d569c1e6 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -30,7 +30,7 @@ //Some GPU configuration settings, must be included first #include "GPUCommonDefSettings.h" -#if !defined(__OPENCL1__) && (!(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__)) && defined(__cplusplus) && __cplusplus >= 201103L +#if (!(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__)) && defined(__cplusplus) && __cplusplus >= 201103L #define GPUCA_NOCOMPAT // C++11 + No old ROOT5 + No old OpenCL #ifndef __OPENCL__ #define GPUCA_NOCOMPAT_ALLOPENCL // + No OpenCL at all @@ -82,7 +82,7 @@ #define GPUCA_NAMESPACE o2 #endif -#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL1__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) || (defined(__OPENCLCPP__) && defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY)) +#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCLCPP__) && defined(GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY #elif defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM diff --git a/GPU/Common/GPUCommonDefSettings.h b/GPU/Common/GPUCommonDefSettings.h index 6a4ef86125a3f..91f44657c4f06 100644 --- a/GPU/Common/GPUCommonDefSettings.h +++ b/GPU/Common/GPUCommonDefSettings.h @@ -26,7 +26,6 @@ //#define GPUCA_CUDA_NO_CONSTANT_MEMORY // Do not use constant memory for CUDA //#define GPUCA_HIP_NO_CONSTANT_MEMORY // Do not use constant memory for HIP -//#define GPUCA_OPENCL_NO_CONSTANT_MEMORY // Do not use constant memory for OpenCL 1.2 #define GPUCA_OPENCLCPP_NO_CONSTANT_MEMORY // Do not use constant memory for OpenCL C++ - MANDATORY as OpenCL cannot cast between __constant and __generic yet! // clang-format on diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 0e5db743d0c57..d211b051bed39 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -31,12 +31,10 @@ #include #endif -#if !defined(__OPENCL1__) namespace GPUCA_NAMESPACE { namespace gpu { -#endif class GPUCommonMath { @@ -289,7 +287,7 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL1__) +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available #else for (int32_t i = 31; i >= 0; i--) { @@ -303,7 +301,7 @@ GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) { -#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /* !defined(__OPENCL1__)*/) // TODO: exclude only OPENCLC (not CPP) when reported SPIR-V bug is fixed +#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); #else @@ -563,9 +561,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef CHOICE -#if !defined(__OPENCL1__) -} -} -#endif +} // namespace gpu +} // namespace GPUCA_NAMESPACE #endif // GPUCOMMONMATH_H diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 88fcc9b838a65..6d72565d1f1fb 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -21,7 +21,7 @@ #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif -#elif !defined(__OPENCL1__) +#else // We just reimplement some type traits in std for the GPU namespace std { diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index df797f4c79419..96b212eeea078 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -20,7 +20,7 @@ #include "GPUDataTypes.h" #include "GPUErrors.h" -// Dummies for stuff not supported in legacy code (ROOT 5 / OPENCL1.2) +// Dummies for stuff not supported in legacy code (ROOT 5) #if defined(GPUCA_NOCOMPAT_ALLCINT) #include "GPUTPCGMMerger.h" #else diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 41ed3c8f203cb..1e972189d1b92 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -17,9 +17,7 @@ #include "GPUParam.h" #include "GPUTPCGMMergedTrackHit.h" -#if !defined(__OPENCL1__) #include "GPUTPCClusterOccupancyMap.h" -#endif namespace GPUCA_NAMESPACE { @@ -228,15 +226,11 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, MEM_CLASS_PRE() GPUdi() float MEM_LG(GPUParam)::GetUnscaledMult(float time) const { -#if !defined(__OPENCL1__) if (!occupancyMap) { return 0.f; } const uint32_t bin = CAMath::Max(0.f, time / rec.tpc.occupancyMapTimeBins); return occupancyMap[bin]; -#else - return 0.f; -#endif } MEM_CLASS_PRE() diff --git a/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h b/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h index 77c57533ba541..3aea2706723f1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionAvailableBackends.template.h @@ -14,5 +14,4 @@ #cmakedefine CUDA_ENABLED #cmakedefine HIP_ENABLED -#cmakedefine OPENCL1_ENABLED #cmakedefine OPENCL2_ENABLED diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h index 8194214a180e4..1def09c61e606 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h @@ -15,14 +15,14 @@ // No header protection, this may be used multiple times #include "GPUReconstructionKernelMacros.h" -#if !defined(GPUCA_OPENCL1) && (!defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE)) -#define GPUCA_KRNL_NOOCL1 +#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) +#define GPUCA_KRNL_NOALIROOT #endif // clang-format off $,> // clang-format on -#ifdef GPUCA_KRNL_NOOCL1 -#undef GPUCA_KRNL_NOOCL1 +#ifdef GPUCA_KRNL_NOALIROOT +#undef GPUCA_KRNL_NOALIROOT #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index d4d7b12dc8cc6..cb509f9978e5b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -101,10 +101,6 @@ std::shared_ptr* GPUReconstruction::GetLibrary } else if (type == DeviceType::HIP) { #ifdef HIP_ENABLED return &sLibHIP; -#endif - } else if (type == DeviceType::OCL) { -#ifdef OPENCL1_ENABLED - return &sLibOCL; #endif } else if (type == DeviceType::OCL2) { #ifdef OPENCL2_ENABLED diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl index 672c4b63eb476..57b32850900b3 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl @@ -16,8 +16,6 @@ #define __OPENCL__ #if defined(__cplusplus) && __cplusplus >= 201703L #define __OPENCLCPP__ -#else - #define __OPENCL1__ #endif #define GPUCA_GPUTYPE_OPENCL @@ -57,9 +55,6 @@ #define M_PI 3.1415926535f #endif #else - #ifdef GPUCA_OPENCL_NO_CONSTANT_MEMORY - #define GPUCA_NO_CONSTANT_MEMORY - #endif #define nullptr NULL #define NULL (0x0) #endif @@ -77,32 +72,9 @@ typedef signed char int8_t; #undef assert #endif #define assert(param) -#ifndef __OPENCLCPP__ -#define static_assert(...) -#define GPUCA_OPENCL1 -#endif #include "GPUConstantMem.h" -#ifdef __OPENCLCPP__ #include "GPUReconstructionIncludesDeviceAll.h" -#else // Workaround, since OpenCL1 cannot digest all files -#include "GPUTPCTrackParam.cxx" -#include "GPUTPCTrack.cxx" -#include "GPUTPCGrid.cxx" -#include "GPUTPCRow.cxx" -#include "GPUTPCTracker.cxx" - -#include "GPUGeneralKernels.cxx" -#include "GPUErrors.cxx" - -#include "GPUTPCTrackletSelector.cxx" -#include "GPUTPCNeighboursFinder.cxx" -#include "GPUTPCNeighboursCleaner.cxx" -#include "GPUTPCStartHitsFinder.cxx" -#include "GPUTPCStartHitsSorter.cxx" -#include "GPUTPCTrackletConstructor.cxx" -#include "GPUTPCGlobalTracking.cxx" -#endif // if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt deleted file mode 100644 index 1ad9041f70997..0000000000000 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -set(MODULE GPUTrackingOCL) -enable_language(ASM) - -# AMD APP SDK required for OpenCL tracker as it's using specific extensions -# (currently) not provided by other vendors - -if(NOT AMDAPPSDKROOT) - message( - FATAL_ERROR - "AMDAPPSDKROOT not set. Please install AMD APP SDK and set $AMDAPPSDKROOT or disable ENABLE_OPENCL1." - ) -endif() - -message(STATUS "Building GPUTracking with OpenCL 1.2 support") - -# convenience variables -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - set(GPUDIR ${CMAKE_SOURCE_DIR}/../) -else() - set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) -endif() -set(CL_SRC ${GPUDIR}/Base/opencl-common/GPUReconstructionOCL.cl) -set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCL1Code.bin) - -# build the OpenCL compile wrapper : -# -# * checks the correct vendor implementation (AMD) -# * builds binary code (blob) for the found platform(s) -add_executable(opencl_compiler - ${GPUDIR}/utils/makefile_opencl_compiler.cxx) -target_link_libraries(opencl_compiler PUBLIC OpenCL::OpenCL) -set_property(TARGET opencl_compiler - PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) - -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - set(OPENCL_HEADER_FILTER "${CMAKE_SOURCE_DIR}") -else() - set(OPENCL_HEADER_FILTER "${CMAKE_SOURCE_DIR}/GPU") -endif() -set(OPENCL_HEADER_FILTER "^${OPENCL_HEADER_FILTER}|^${CMAKE_BINARY_DIR}.*include_gpu_onthefly") - -# executes OpenCL compiler wrapper to build binary object -add_custom_command( - OUTPUT ${CL_BIN} - COMMAND LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$ - $ - -output-file - ${CL_BIN} - ${CL_SRC} - -- - "-D$,$-D>" - "-I$,EXCLUDE,^/usr>,INCLUDE,${OPENCL_HEADER_FILTER}>,$-I>" - -x clc++ - MAIN_DEPENDENCY ${CL_SRC} - IMPLICIT_DEPENDS CXX ${CL_SRC} - COMMAND_EXPAND_LISTS - COMMENT "Compiling OpenCL1 CL source file ${CL_SRC}") - -create_binary_resource(${CL_BIN} ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.o) - -set(SRCS GPUReconstructionOCL1.cxx - ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode.o) -set(HDRS GPUReconstructionOCL1.h GPUReconstructionOCL1Internals.h) - -if(ALIGPU_BUILD_TYPE STREQUAL "O2") - o2_add_library(${MODULE} - SOURCES ${SRCS} - PUBLIC_LINK_LIBRARIES O2::GPUTrackingOpenCLCommon - TARGETVARNAME targetName) - - target_compile_definitions(${targetName} PRIVATE $) - # the compile_defitions are not propagated automatically on purpose (they are - # declared PRIVATE) so we are not leaking them outside of the GPU** - # directories - - install(FILES ${HDRS} DESTINATION include/GPU) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL1.h" "${incdirs} .") - - # Generate the ROOT map - generate_rootmap("Ali${MODULE}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} PUBLIC AliGPUTrackingOpenCLCommon) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - add_library(${MODULE} SHARED ${SRCS}) - target_link_libraries(${MODULE} GPUTrackingOpenCLCommon) - install(TARGETS ${MODULE}) -endif() diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx deleted file mode 100644 index 3f84ab0f6ac15..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.cxx +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1.cxx -/// \author David Rohr - -#define GPUCA_GPUTYPE_OPENCL -#define __OPENCL_HOST__ - -#include "GPUReconstructionOCL1.h" -#include "GPUReconstructionOCL1Internals.h" -#include "GPUReconstructionIncludes.h" - -using namespace GPUCA_NAMESPACE::gpu; - -#include -#include -#include -#include - -#include "utils/opencl_obtain_program.h" -#include "utils/qGetLdBinarySymbols.h" -QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCL1Code_bin); - -GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL1(cfg); } - -GPUReconstructionOCL1Backend::GPUReconstructionOCL1Backend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionOCL(cfg) -{ -} - -template -int32_t GPUReconstructionOCL1Backend::runKernelBackend(const krnlSetupArgs& args) -{ - cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); - return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v); -} - -template -S& GPUReconstructionOCL1Backend::getKernelObject() -{ - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); - return mInternals->kernels[krnl].first; -} - -int32_t GPUReconstructionOCL1Backend::GetOCLPrograms() -{ - cl_uint count; - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &count))) { - GPUError("Error getting OPENCL Device Count"); - return (1); - } - - if (_makefiles_opencl_obtain_program_helper(mInternals->context, count, mInternals->devices.get(), &mInternals->program, _binary_GPUReconstructionOCL1Code_bin_start)) { - clReleaseContext(mInternals->context); - GPUError("Could not obtain OpenCL progarm"); - return 1; - } - -#define GPUCA_OPENCL1 -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_OPENCL1 -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi - - return 0; -} - -bool GPUReconstructionOCL1Backend::CheckPlatform(uint32_t i) -{ - char platform_version[64] = {}, platform_vendor[64] = {}; - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); - if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && strstr(platform_version, "OpenCL 2.0 AMD-APP (") != nullptr) { - float ver = 0; - sscanf(platform_version, "OpenCL 2.0 AMD-APP (%f)", &ver); - if (ver < 2000.f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("AMD APP OpenCL Platform found"); - } - return true; - } - } - return false; -} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h deleted file mode 100644 index c9a3b89a79cd1..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONOCL1_H -#define GPURECONSTRUCTIONOCL1_H - -#include "GPUReconstructionOCL.h" - -#ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#else -extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); -#endif - -namespace GPUCA_NAMESPACE::gpu -{ -struct GPUReconstructionOCL1Internals; - -class GPUReconstructionOCL1Backend : public GPUReconstructionOCL -{ - public: - ~GPUReconstructionOCL1Backend() override = default; - - protected: - GPUReconstructionOCL1Backend(const GPUSettingsDeviceBackend& cfg); - - template - int32_t runKernelBackend(const krnlSetupArgs& args); - template - S& getKernelObject(); - - RecoStepField AvailableGPURecoSteps() override { return (RecoStep::TPCSliceTracking); } - bool ContextForAllPlatforms() override { return true; } - bool CheckPlatform(uint32_t i) override; - int32_t GetOCLPrograms() override; -}; - -using GPUReconstructionOCL1 = GPUReconstructionKernels; -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h deleted file mode 100644 index 997a108ac26d0..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL1Internals.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCL1Internals.h -/// \author David Rohr, Sergey Gorbunov - -#ifndef GPUTPCGPUTRACKEROPENCLINTERNALS1_H -#define GPUTPCGPUTRACKEROPENCLINTERNALS1_H - -#include "GPUReconstructionOCLInternals.h" - -namespace GPUCA_NAMESPACE::gpu -{ - -struct GPUReconstructionOCL1Internals : public GPUReconstructionOCLInternals { -}; - -} // namespace GPUCA_NAMESPACE::gpu - -#endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index dd3480cae86bd..2cf03860a6d86 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -22,7 +22,7 @@ endif() include(cmake/helpers.cmake) if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - if(ENABLE_CUDA OR ENABLE_OPENCL1 OR ENABLE_OPENCL2 OR ENABLE_HIP) + if(ENABLE_CUDA OR ENABLE_OPENCL2 OR ENABLE_HIP) include(FeatureSummary) find_package(O2GPU) else() @@ -415,7 +415,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/cuda ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/hip ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl-common - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl2 ${CMAKE_SOURCE_DIR}/GPU/TPCFastTransformation) alice_usevc() @@ -523,20 +522,15 @@ endif() target_compile_options(${targetName} PRIVATE -Wno-instantiation-after-specialization) # Add CMake recipes for GPU Tracking librararies -if(CUDA_ENABLED OR OPENCL1_ENABLED OR OPENCL2_ENABLED OR HIP_ENABLED) +if(CUDA_ENABLED OR OPENCL2_ENABLED OR HIP_ENABLED) if(CMAKE_SYSTEM_NAME MATCHES Darwin) message(WARNING "GPU Tracking disabled on MacOS") else() if(CUDA_ENABLED) add_subdirectory(Base/cuda) endif() - if(OPENCL1_ENABLED OR OPENCL2_ENABLED) - add_subdirectory(Base/opencl-common) - endif() - if(OPENCL1_ENABLED) - add_subdirectory(Base/opencl) - endif() if(OPENCL2_ENABLED) + add_subdirectory(Base/opencl-common) add_subdirectory(Base/opencl2) endif() if(HIP_ENABLED) diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index d3b88f0239c7b..8bcd06576d776 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -125,9 +125,6 @@ namespace gpu #define GPUCA_RECO_STEP GPUDataTypes #endif -#if defined(__OPENCL1__) -MEM_CLASS_PRE() // Macro with some template magic for OpenCL 1.2 -#endif class GPUTPCTrack; class GPUTPCHitId; class GPUTPCGMMergedTrack; diff --git a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h index 1015b31fe6556..810e4dd58ca0e 100644 --- a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h @@ -17,7 +17,7 @@ // Pull in several O2 headers with basic data types, or load a header with empty fake classes if O2 headers not available -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(__OPENCL1__) +#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/Digit.h" #include "DetectorsBase/MatLayerCylSet.h" @@ -27,8 +27,6 @@ #include "GPUO2FakeClasses.h" #endif -#if !defined(__OPENCL1__) #include "GPUdEdxInfo.h" -#endif #endif diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index b967a7ce42620..b853d80754080 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -45,9 +45,7 @@ class GPUSettings RejectionStrategyA = 1, RejectionStrategyB = 2 }; -#if !defined(__OPENCL1__) static CONSTEXPR const uint32_t TPC_MAX_TF_TIME_BIN = ((256 * 3564 + 2 * 8 - 2) / 8); -#endif }; #ifdef GPUCA_NOCOMPAT diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 09193e76b9382..88294b2b06c25 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -29,7 +29,6 @@ namespace gpu class GPUTPCGMPolynomialField { public: -#if !defined(__OPENCL1__) GPUTPCGMPolynomialField() : mNominalBz(0.f) { Reset(); @@ -75,11 +74,6 @@ class GPUTPCGMPolynomialField const float* GetCoefmItsBx() const { return mItsBx; } const float* GetCoefmItsBy() const { return mItsBy; } const float* GetCoefmItsBz() const { return mItsBz; } -#else -#define NTPCM 10 -#define NTRDM 20 -#define NITSM 10 -#endif private: float mNominalBz; // nominal constant field value in [kG * 2.99792458E-4 GeV/c/cm] @@ -94,8 +88,6 @@ class GPUTPCGMPolynomialField float mItsBz[NITSM]; }; -#if !defined(__OPENCL1__) - inline void GPUTPCGMPolynomialField::Reset() { mNominalBz = 0.f; @@ -297,7 +289,6 @@ GPUdi() float GPUTPCGMPolynomialField::GetFieldItsBz(float x, float y, float z) return bz; } -#endif // __OPENCL__ } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 515905abe48b5..75b08047834bb 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -34,9 +34,6 @@ namespace gpu // Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr class GPUTPCGeometry // TODO: Make values constexpr { -#if defined(__OPENCL1__) - GPUTPCGeometry(); // Fake constructor declaration for OpenCL due to static members, does not exist! -#endif #ifdef GPUCA_TPC_GEOMETRY_O2 const float mX[GPUCA_ROW_COUNT] GPUCA_CPP11_INIT(= {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, @@ -63,9 +60,7 @@ class GPUTPCGeometry // TODO: Make values constexpr const float mPadHeight[10] GPUCA_CPP11_INIT(= {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}); const float mPadWidth[10] GPUCA_CPP11_INIT(= {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}); -#if !defined(__OPENCL1__) static CONSTEXPR float FACTOR_T2Z GPUCA_CPP11_INIT(= 250.f / 512.f); // Used in compression, must remain constant at 250cm, 512 time bins! -#endif public: GPUd() int32_t GetRegion(int32_t row) const { return mRegion[row]; } @@ -95,9 +90,7 @@ class GPUTPCGeometry // TODO: Make values constexpr const float mPadHeight[3] GPUCA_CPP11_INIT(= {.75f, 1.f, 1.5f}); const float mPadWidth[3] GPUCA_CPP11_INIT(= {.4f, .6f, .6f}); -#if !defined(__OPENCL1__) static CONSTEXPR float FACTOR_T2Z GPUCA_CPP11_INIT(= 250.f / 1024.f); // Used in compression, must remain constant at 250cm, 1024 time bins! -#endif public: GPUd() int32_t GetRegion(int32_t row) const { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } @@ -109,9 +102,8 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() int32_t EndOROC2() const { return GPUCA_ROW_COUNT; } #endif private: -#if !defined(__OPENCL1__) static CONSTEXPR float FACTOR_Z2T GPUCA_CPP11_INIT(= 1.f / FACTOR_T2Z); -#endif + public: GPUd() static CONSTEXPR float TPCLength() { return 250.f - 0.275f; } GPUd() float Row2X(int32_t row) const { return (mX[row]); } @@ -120,7 +112,6 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } -#if !defined(__OPENCL1__) GPUd() float LinearPad2Y(int32_t slice, int32_t row, float pad) const { const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); @@ -144,7 +135,6 @@ class GPUTPCGeometry // TODO: Make values constexpr const float v = (slice >= GPUCA_NSLICES / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } -#endif }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 7693ee8553b77..1c8134f11efda 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -51,7 +51,7 @@ #if defined(GPUCA_NSLICES) || defined(GPUCA_ROW_COUNT) #error GPUCA_NSLICES or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! #endif -#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_TPC_GEOMETRY_O2) && !defined(__OPENCL1__) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) +#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) //Use definitions from the O2 headers if available for nicer code and type safety #include "DataFormatsTPC/Constants.h" #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index f3c6c019f593b..32557edb01d1b 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" // clang-format off #if !defined(GPUCA_NOCOMPAT) - // Cannot do anything for ROOT5CINT / OpenCL1, so just disable + // Cannot do anything for ROOT5CINT, so just disable #define GPUInfo(...) #define GPUImportant(...) #define GPUWarning(...) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 07cd320140909..d09f9c89a8077 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -537,7 +537,7 @@ AddOption(constBz, bool, false, "", 0, "force constant Bz for tests") AddOption(setMaxTimeBin, int32_t, -2, "", 0, "maximum time bin of continuous data, 0 for triggered events, -1 for automatic continuous mode, -2 for automatic continuous / triggered") AddOption(overrideNHbfPerTF, int32_t, 0, "", 0, "Overrides the number of HBF per TF if != 0") AddOption(overrideTPCTimeBinCur, int32_t, 0, "", 0, "Overrides TPC time bin cut if > 0") -AddOption(deviceType, std::string, "CPU", "", 0, "Device type, CPU | CUDA | HIP | OCL1 | OCL2") +AddOption(deviceType, std::string, "CPU", "", 0, "Device type, CPU | CUDA | HIP | OCL2") AddOption(forceDeviceType, bool, true, "", 0, "force device type, otherwise allows fall-back to CPU") AddOption(synchronousProcessing, bool, false, "", 0, "Apply performance shortcuts for synchronous processing, disable unneeded steps") AddOption(dump, int32_t, 0, "", 0, "Dump events for standalone benchmark: 1 = dump events, 2 = dump events and skip processing in workflow") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index ff4133d9b2ce3..8eddab63df35c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -567,7 +567,6 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) } #endif -// TODO: Clusterizer not working with OCL1 (Clusterizer on CPU, Tracking on GPU) int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) { if (param().rec.fwdTPCDigitsAsClusters) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx index c1a3c685947d6..5cf14ca6ab5a4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx @@ -22,8 +22,6 @@ using namespace GPUCA_NAMESPACE::gpu; -#if !defined(__OPENCL1__) - GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) @@ -200,7 +198,6 @@ GPUd() void GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(uint32_t iSlice, right += GPUDataTypes::NSLICES / 2; } } -#endif // !__OPENCL1__ template <> GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker, int32_t n) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index 9d732a582b1c4..c45391cd46a4c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -25,7 +25,6 @@ namespace gpu MEM_CLASS_PRE() class GPUTPCTracker; -#if !defined(__OPENCL1__) class GPUTPCGlobalTracking : public GPUKernelTemplate { public: @@ -49,7 +48,6 @@ class GPUTPCGlobalTracking : public GPUKernelTemplate GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, GPUTPCTracker& sliceTarget, bool right); }; -#endif class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h index 3ab5b0a331f31..5108c3f2ec8bb 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h @@ -44,7 +44,6 @@ class GPUTPCSliceOutput } GPUhd() uint32_t NLocalTracks() const { return mNLocalTracks; } GPUhd() uint32_t NTrackClusters() const { return mNTrackClusters; } -#if !defined(__OPENCL1__) GPUhd() const GPUTPCTrack* GetFirstTrack() const { return (const GPUTPCTrack*)((const char*)this + sizeof(*this)); @@ -53,7 +52,6 @@ class GPUTPCSliceOutput { return (GPUTPCTrack*)((char*)this + sizeof(*this)); } -#endif GPUhd() size_t Size() const { return (mMemorySize); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index 84bdc52ab6f46..4970ff90a934c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -22,9 +22,7 @@ #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" -#if !defined(__OPENCL1__) #include "GPUTPCConvertImpl.h" -#endif #if !defined(GPUCA_GPUCODE) #include diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index da8d3d1fb28d4..488807e981b5b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -94,14 +94,12 @@ class GPUTPCTracker : public GPUProcessor StructGPUParameters gpuParameters; // GPU parameters }; -#if !defined(__OPENCL1__) GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mData.ClusterData(); } GPUhdi() MakeType(const MEM_LG(GPUTPCRow) &) Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } GPUhdi() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; } -#endif GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index 05e75232297a3..c073ad3d26b8b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -21,13 +21,11 @@ #include "GPUTPCTracker.h" #include "GPUTPCTracklet.h" #include "GPUTPCTrackletConstructor.h" -#if !defined(__OPENCL1__) #include "GPUTPCGlobalTracking.h" #include "CorrectionMapsHelper.h" #ifdef GPUCA_HAVE_O2HEADERS #include "CalibdEdxContainer.h" #endif // GPUCA_HAVE_O2HEADERS -#endif // OPENCL1 #include "GPUParam.inc" #include "GPUCommonMath.h" @@ -140,18 +138,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float z = z0 + hh.y * stepZ; if (iRow != r.mStartRow || !tracker.Param().par.continuousTracking) { tParam.ConstrainZ(z, tracker.ISlice(), z0, r.mLastZ); -#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, z); -#endif } if (iRow == r.mStartRow) { if (tracker.Param().par.continuousTracking) { float refZ = ((z > 0) ? tracker.Param().rec.tpc.defaultZOffsetOverR : -tracker.Param().rec.tpc.defaultZOffsetOverR) * x; -#if !defined(__OPENCL1__) float zTmp = refZ; tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, zTmp); z += zTmp - refZ; // Add zCorrection (=zTmp - refZ) to z, such that zOffset is set such, that transformed (z - zOffset) becomes refZ -#endif tParam.SetZOffset(z - refZ); tParam.SetZ(refZ); r.mLastZ = refZ; @@ -266,7 +260,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, r.mNMissed++; float x = row.X(); -#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (!tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -277,7 +270,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, tParam.ConstrainZ(tmpZ, tracker.ISlice(), z0, r.mLastZ); tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); } -#endif CADEBUG(printf("%14s: SEA TRACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); if (!tParam.TransportToX(x, tParam.SinPhi(), tParam.GetCosPhi(), tracker.Param().bzCLight, GPUCA_MAX_SIN_PHI_LOW)) { @@ -299,9 +291,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); #endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR -#if !defined(__OPENCL1__) tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); -#endif if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { rowHit = CALINK_INVAL; @@ -391,7 +381,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } while (false); (void)found; -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(__OPENCL1__) +#if defined(GPUCA_HAVE_O2HEADERS) if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { @@ -461,7 +451,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO iRow = r.mEndRow; iRowEnd = -1; float x = tracker.Row(r.mEndRow).X(); -#if !defined(__OPENCL1__) { float tmpY, tmpZ; if (tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { @@ -476,7 +465,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO continue; } } -#endif if ((r.mGo = (tParam.TransportToX(x, tracker.Param().bzCLight, GPUCA_MAX_SIN_PHI) && tParam.Filter(r.mLastY, r.mLastZ, tParam.Err2Y() * 0.5f, tParam.Err2Z() * 0.5f, GPUCA_MAX_SIN_PHI_LOW, true)))) { CADEBUG(printf("%14s: SEA BACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); float err2Y, err2Z; @@ -584,7 +572,6 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() MEM_GLO #endif // GPUCA_GPUCODE -#if !defined(__OPENCL1__) template <> // FIXME: GPUgeneric() needed to make the clang spirv output link correctly GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { @@ -602,4 +589,3 @@ GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTrackin } return (rMem.mNHits); } -#endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index effee4fa757b8..f82aba47788f9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -100,10 +100,8 @@ class GPUTPCTrackletConstructor GPUd() static int32_t FetchTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem); #endif // GPUCA_GPUCODE -#if !defined(__OPENCL1__) template GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); -#endif typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 29f908c538af7..c2167d9591ba2 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -14,7 +14,6 @@ set(ENABLE_CUDA AUTO) set(ENABLE_HIP AUTO) -set(ENABLE_OPENCL1 AUTO) set(ENABLE_OPENCL2 AUTO) set(CONFIG_OPENMP 1) set(GPUCA_CONFIG_VC 1) diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 42fd6b3d2402a..1667ad867a9e7 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -52,8 +52,8 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() set(TMP_PRE "") set(TMP_POST "") - if(NOT kernel_bounds MATCHES "_OCL1") - set(TMP_PRE "#ifdef GPUCA_KRNL_NOOCL1\n") + if(NOT kernel_bounds MATCHES "_ALIR") + set(TMP_PRE "#ifdef GPUCA_KRNL_NOALIROOT\n") set(TMP_POST "#endif\n") endif() set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}), (${OPT3}))\n") diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 516d1fced0a20..8c042d51514c4 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -20,7 +20,7 @@ #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUdEdxInfo.h" -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_OPENCL1) +#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/Defs.h" #include "CalibdEdxContainer.h" #include "GPUDebugStreamer.h" @@ -30,7 +30,7 @@ namespace GPUCA_NAMESPACE { namespace gpu { -#if !defined(GPUCA_HAVE_O2HEADERS) || defined(GPUCA_OPENCL1) +#if !defined(GPUCA_HAVE_O2HEADERS) class GPUdEdx { @@ -212,7 +212,7 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -#endif // !GPUCA_HAVE_O2HEADERS || GPUCA_OPENCL1 +#endif // !GPUCA_HAVE_O2HEADERS } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index f028c6990f267..bfa738201b637 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -33,16 +33,16 @@ o2_gpu_kernel_file_list(MATLUT) o2_gpu_kernel_file_list(TPCMERGER) endif() -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB_OCL1 single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB_OCL1 both) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_OCL1 "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO_OCL1 single int32_t n) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB_ALIR single) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB_ALIR both) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO_ALIR single int32_t n) o2_gpu_add_kernel("GPUTPCGlobalTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index c9420de2b704b..aacaf7fcedd8d 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -12,9 +12,6 @@ if(NOT DEFINED ENABLE_CUDA) set(ENABLE_CUDA "AUTO") endif() -if(NOT DEFINED ENABLE_OPENCL1) - set(ENABLE_OPENCL1 "AUTO") -endif() if(NOT DEFINED ENABLE_OPENCL2) set(ENABLE_OPENCL2 "AUTO") endif() @@ -22,7 +19,6 @@ if(NOT DEFINED ENABLE_HIP) set(ENABLE_HIP "AUTO") endif() string(TOUPPER "${ENABLE_CUDA}" ENABLE_CUDA) -string(TOUPPER "${ENABLE_OPENCL1}" ENABLE_OPENCL1) string(TOUPPER "${ENABLE_OPENCL2}" ENABLE_OPENCL2) string(TOUPPER "${ENABLE_HIP}" ENABLE_HIP) if(NOT DEFINED CMAKE_BUILD_TYPE_UPPER) @@ -151,32 +147,14 @@ if(ENABLE_CUDA) endif() # Detect and enable OpenCL 1.2 from AMD -if(ENABLE_OPENCL1 OR ENABLE_OPENCL2) +if(ENABLE_OPENCL2) find_package(OpenCL) - if((ENABLE_OPENCL1 AND NOT ENABLE_OPENCL1 STREQUAL "AUTO") - OR (ENABLE_OPENCL2 AND NOT ENABLE_OPENCL2 STREQUAL "AUTO")) + if(ENABLE_OPENCL2 AND NOT ENABLE_OPENCL2 STREQUAL "AUTO") set_package_properties(OpenCL PROPERTIES TYPE REQUIRED) else() set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL) endif() endif() -if(ENABLE_OPENCL1) - if(NOT AMDAPPSDKROOT) - set(AMDAPPSDKROOT "$ENV{AMDAPPSDKROOT}") - endif() - - if(OpenCL_FOUND - AND OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 1.2 - AND AMDAPPSDKROOT - AND EXISTS "${AMDAPPSDKROOT}") - set(OPENCL1_ENABLED ON) - message(STATUS "Found AMD OpenCL 1.2") - elseif(NOT ENABLE_OPENCL1 STREQUAL "AUTO") - message(FATAL_ERROR "AMD OpenCL 1.2 not available") - else() - set(OPENCL1_ENABLED OFF) - endif() -endif() # Detect and enable OpenCL 2.x if(ENABLE_OPENCL2) From 514e95337f999edfe4a3629e4843624c9232cddb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 15 Jan 2025 13:25:39 +0100 Subject: [PATCH 2/4] GPU: Remove some template magic that was only needed for OpenCL 1.2 --- GPU/GPUTracking/Base/GPUConstantMem.h | 12 +- GPU/GPUTracking/Base/GPUGeneralKernels.cxx | 4 +- GPU/GPUTracking/Base/GPUGeneralKernels.h | 15 +-- GPU/GPUTracking/Base/GPUParam.h | 1 - GPU/GPUTracking/Base/GPUParam.inc | 56 +++----- GPU/GPUTracking/Base/GPUProcessor.h | 8 +- .../Base/GPUReconstructionDeviceBase.cxx | 1 - .../Base/GPUReconstructionKernelMacros.h | 4 +- .../opencl-common/GPUReconstructionOCL.cl | 2 +- GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/Definitions/GPUDef.h | 5 +- .../Definitions/GPUDefOpenCL12Templates.h | 86 ------------ GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefit.h | 8 +- .../SliceTracker/GPUTPCBaseTrackParam.h | 6 +- .../SliceTracker/GPUTPCCreateSliceData.cxx | 2 +- .../SliceTracker/GPUTPCCreateSliceData.h | 7 +- .../SliceTracker/GPUTPCGlobalTracking.cxx | 12 +- .../SliceTracker/GPUTPCGlobalTracking.h | 20 ++- GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx | 18 +-- GPU/GPUTracking/SliceTracker/GPUTPCGrid.h | 1 - .../SliceTracker/GPUTPCNeighboursCleaner.cxx | 8 +- .../SliceTracker/GPUTPCNeighboursCleaner.h | 9 +- .../SliceTracker/GPUTPCNeighboursFinder.cxx | 16 +-- .../SliceTracker/GPUTPCNeighboursFinder.h | 12 +- GPU/GPUTracking/SliceTracker/GPUTPCRow.h | 7 +- .../GPUTPCSectorDebugSortKernels.cxx | 4 +- .../SliceTracker/GPUTPCSliceData.cxx | 8 +- .../SliceTracker/GPUTPCSliceData.h | 123 ++++++------------ .../SliceTracker/GPUTPCStartHitsFinder.cxx | 6 +- .../SliceTracker/GPUTPCStartHitsFinder.h | 9 +- .../SliceTracker/GPUTPCStartHitsSorter.cxx | 2 +- .../SliceTracker/GPUTPCStartHitsSorter.h | 9 +- GPU/GPUTracking/SliceTracker/GPUTPCTrack.h | 9 +- .../SliceTracker/GPUTPCTrackLinearisation.h | 5 +- .../SliceTracker/GPUTPCTrackParam.cxx | 72 ++++------ .../SliceTracker/GPUTPCTrackParam.h | 13 +- GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 106 ++++++--------- GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h | 9 +- .../GPUTPCTrackletConstructor.cxx | 44 +++---- .../SliceTracker/GPUTPCTrackletConstructor.h | 30 ++--- .../SliceTracker/GPUTPCTrackletSelector.cxx | 6 +- .../SliceTracker/GPUTPCTrackletSelector.h | 9 +- 43 files changed, 261 insertions(+), 526 deletions(-) delete mode 100644 GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 96b212eeea078..c36cec7100b59 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -71,12 +71,10 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() struct GPUConstantMem { - MEM_CONSTANT(GPUParam) - param; - MEM_GLOBAL(GPUTPCTracker) - tpcTrackers[GPUCA_NSLICES]; + GPUParam param; + GPUTPCTracker + tpcTrackers[GPUCA_NSLICES]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; @@ -150,7 +148,7 @@ namespace gpu { // Must be placed here, to avoid circular header dependency -GPUdi() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUProcessor::GetConstantMem() const +GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const { #if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) return &GPUCA_CONSMEM; @@ -159,7 +157,7 @@ GPUdi() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUProcessor::GetC #endif } -GPUdi() GPUconstantref() const MEM_CONSTANT(GPUParam) & GPUProcessor::Param() const +GPUdi() GPUconstantref() const GPUParam& GPUProcessor::Param() const { return GetConstantMem()->param; } diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx index 8fc60bae6dbe9..44faf09112e5e 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx @@ -17,7 +17,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() processors, GPUglobalref() void* ptr, uint64_t size) +GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUglobalref() void* ptr, uint64_t size) { const uint64_t stride = get_global_size(0); int4 i0; @@ -30,7 +30,7 @@ GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_ } template <> -GPUdii() void GPUitoa::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() processors, GPUglobalref() int32_t* ptr, uint64_t size) +GPUdii() void GPUitoa::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUglobalref() int32_t* ptr, uint64_t size) { const uint64_t stride = get_global_size(0); for (uint64_t i = get_global_id(0); i < size; i += stride) { diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 44314e3393589..47f26e2443229 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -36,7 +36,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() struct GPUConstantMem; class GPUKernelTemplate @@ -50,7 +49,6 @@ class GPUKernelTemplate step4 = 4, step5 = 5 }; - MEM_CLASS_PRE() struct GPUSharedMemory { }; @@ -82,21 +80,20 @@ class GPUKernelTemplate #endif }; - typedef GPUconstantref() MEM_CONSTANT(GPUConstantMem) processorType; + typedef GPUconstantref() GPUConstantMem processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; } #ifdef GPUCA_NOCOMPAT template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, Args... args) + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args) { } #else template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors) + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors) { } #endif @@ -108,7 +105,7 @@ class GPUMemClean16 : public GPUKernelTemplate public: GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; // Fill with incrementing sequnce of integers @@ -117,7 +114,7 @@ class GPUitoa : public GPUKernelTemplate public: GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; } // namespace gpu diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index ce9ac30b7c35b..48771578c63a4 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -79,7 +79,6 @@ struct GPUParam_t { } // namespace internal #if !(defined(__CINT__) || defined(__ROOTCINT__)) || defined(__CLING__) // Hide from ROOT 5 CINT -MEM_CLASS_PRE() struct GPUParam : public internal::GPUParam_t { #ifndef GPUCA_GPUCODE diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 1e972189d1b92..0b32067f8980c 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -24,8 +24,7 @@ namespace GPUCA_NAMESPACE namespace gpu { -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const +GPUdi() void GPUParam::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const { // conversion of coordinates sector->global *X = x * SliceParam[iSlice].CosAlpha - y * SliceParam[iSlice].SinAlpha; @@ -33,8 +32,7 @@ GPUdi() void MEM_LG(GPUParam)::Slice2Global(int32_t iSlice, float x, float y, fl *Z = z; } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const +GPUdi() void GPUParam::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const { // conversion of coordinates global->sector *x = X * SliceParam[iSlice].CosAlpha + Y * SliceParam[iSlice].SinAlpha; @@ -44,8 +42,7 @@ GPUdi() void MEM_LG(GPUParam)::Global2Slice(int32_t iSlice, float X, float Y, fl #ifdef GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { const int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -60,10 +57,9 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t ErrZ2 = GetClusterErrorSeeding(1, rowType, z, angleZ2, unscaledMult); // Returns Err2 } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const // Note, returns Err2 despite the name not containing 2 +GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const // Note, returns Err2 despite the name not containing 2 { - MakeType(const float*) c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 + const float* c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 float v = c[0] + c[1] * angle2 + c[2] * zDiff + c[3] * (unscaledMult * unscaledMult); v = CAMath::Abs(v); v *= yz ? rec.tpc.clusterError2CorrectionZ : rec.tpc.clusterError2CorrectionY; @@ -71,10 +67,9 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float scaledInvAvgCharge, float scaledInvCharge) const +GPUdi() float GPUParam::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float scaledInvAvgCharge, float scaledInvCharge) const { - MakeType(const float*) c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 + const float* c = ParamErrors[yz][type]; // Note: c[0] = p[0]^2, c[1] = p[1]^2 * padHeight, c[2] = p[2]^2 / tpcLength / padHeight, c[3] = p[3]^2 * clusterErrorOccupancyScaler^2 float v = c[0] + c[1] * angle2 * scaledInvAvgCharge + c[2] * zDiff * scaledInvCharge + c[3] * (unscaledMult * unscaledMult) * (scaledInvAvgCharge * scaledInvAvgCharge); v = CAMath::Abs(v); v *= yz ? rec.tpc.clusterError2CorrectionZ : rec.tpc.clusterError2CorrectionY; @@ -82,8 +77,7 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y, float z, bool sideC) const +GPUdi() float GPUParam::GetSystematicClusterErrorIFC2(float x, float y, float z, bool sideC) const { float sysErr = 0.f; const float kMaxExpArg = 9.f; // limit r-dumped error to this exp. argument @@ -116,8 +110,7 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y, return sysErr; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, uint8_t sector) const +GPUdi() float GPUParam::GetSystematicClusterErrorC122(float x, float y, uint8_t sector) const { const float dx = x - 83.f; if (dx > occupancyTotal * rec.tpc.sysClusErrorC12Box) { @@ -131,17 +124,15 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, #else // GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float scaledMult) const +GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float scaledMult) const { - MakeType(const float*) c = ParamErrorsSeeding0[yz][type]; + const float* c = ParamErrorsSeeding0[yz][type]; float v = c[0] + c[1] * zDiff + c[2] * angle2; v = CAMath::Abs(v); return v; } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -156,10 +147,9 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t ErrZ2 = ErrZ2 * ErrZ2 * rec.tpc.clusterError2CorrectionZ + rec.tpc.clusterError2AdditionalZ; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float avgInvCharge, float invCharge) const +GPUdi() float GPUParam::GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float avgInvCharge, float invCharge) const { - MakeType(const float*) c = ParamS0Par[yz][type]; + const float* c = ParamS0Par[yz][type]; float v = c[0] + c[1] * zDiff + c[2] * angle2 + c[3] * zDiff * zDiff + c[4] * angle2 * angle2 + c[5] * zDiff * angle2; v = CAMath::Abs(v); if (v < 0.0001f) { @@ -170,22 +160,19 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterError2(int32_t yz, int32_t type, float return v; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float trackX, float trackY, float z, bool sideC) const +GPUdi() float GPUParam::GetSystematicClusterErrorIFC2(float trackX, float trackY, float z, bool sideC) const { return 0; } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const +GPUdi() float GPUParam::GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const { return 0; } #endif // !GPUCA_TPC_GEOMETRY_O2 -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const { const int32_t rowType = tpcGeometry.GetROC(iRow); z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); @@ -202,8 +189,7 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(uint8_t sector, int32_t iRow, f ErrZ2 = GetClusterError2(1, rowType, z, angleZ2, unscaledMult, scaledInvAvgCharge, scaledInvCharge); } -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const +GPUdi() void GPUParam::UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const { if (clusterState & GPUTPCGMMergedTrackHit::flagEdge) { ErrY2 += rec.tpc.extraClusterErrorEdgeY2; @@ -223,8 +209,7 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(int16_t clusterState, } } -MEM_CLASS_PRE() -GPUdi() float MEM_LG(GPUParam)::GetUnscaledMult(float time) const +GPUdi() float GPUParam::GetUnscaledMult(float time) const { if (!occupancyMap) { return 0.f; @@ -233,8 +218,7 @@ GPUdi() float MEM_LG(GPUParam)::GetUnscaledMult(float time) const return occupancyMap[bin]; } -MEM_CLASS_PRE() -GPUdi() bool MEM_LG(GPUParam)::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const +GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const { return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } diff --git a/GPU/GPUTracking/Base/GPUProcessor.h b/GPU/GPUTracking/Base/GPUProcessor.h index 95b56a5c4cd28..af8dd895f4ecf 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.h +++ b/GPU/GPUTracking/Base/GPUProcessor.h @@ -29,9 +29,7 @@ namespace gpu { struct GPUTrackingInOutPointers; class GPUReconstruction; -MEM_CLASS_PRE() struct GPUParam; -MEM_CLASS_PRE() struct GPUConstantMem; class GPUProcessor @@ -52,8 +50,8 @@ class GPUProcessor GPUProcessor& operator=(const GPUProcessor&) CON_DELETE; #endif - GPUd() GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GetConstantMem() const; // Body in GPUConstantMem.h to avoid circular headers - GPUd() GPUconstantref() const MEM_CONSTANT(GPUParam) & Param() const; // ... + GPUd() GPUconstantref() const GPUConstantMem* GetConstantMem() const; // Body in GPUConstantMem.h to avoid circular headers + GPUd() GPUconstantref() const GPUParam& Param() const; // ... GPUd() void raiseError(uint32_t code, uint32_t param1 = 0, uint32_t param2 = 0, uint32_t param3 = 0) const; const GPUReconstruction& GetRec() const { return *mRec; } @@ -152,7 +150,7 @@ class GPUProcessor GPUReconstruction* mRec; ProcessorType mGPUProcessorType; GPUProcessor* mLinkedProcessor; - GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mConstantMem; + GPUconstantref() const GPUConstantMem* mConstantMem; private: bool mAllocateAndInitializeLate; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 70eedd0ca86d1..c9155c1cb8f60 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -28,7 +28,6 @@ using namespace GPUCA_NAMESPACE::gpu; #endif #include -MEM_CLASS_PRE() class GPUTPCRow; #define SemLockName "AliceHLTTPCGPUTrackerInitLockSem" diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index de6d5d079cd00..295e6e1a5d9b7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -59,7 +59,7 @@ #else #define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::MEM_LOCAL(GPUSharedMemory) smem; \ + GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSlice_internal] GPUCA_M_STRIP(x_forward)); \ } #endif @@ -76,7 +76,7 @@ const int32_t nSliceBlockOffset = get_num_groups(0) * iSlice_internal / nSliceCount; \ const int32_t sliceBlockId = get_group_id(0) - nSliceBlockOffset; \ const int32_t sliceGridDim = get_num_groups(0) * (iSlice_internal + 1) / nSliceCount - get_num_groups(0) * (iSlice_internal) / nSliceCount; \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::MEM_LOCAL(GPUSharedMemory) smem; \ + GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ GPUCA_M_STRIP_FIRST(x_class)::template Thread(sliceGridDim, get_local_size(0), sliceBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSlice + iSlice_internal] GPUCA_M_STRIP(x_forward)); \ } #endif diff --git a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl index 57b32850900b3..e94efce6503fe 100644 --- a/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl @@ -81,7 +81,7 @@ typedef signed char int8_t; #define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) #define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) #define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__) -#define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() MEM_CONSTANT(GPUConstantMem) * pConstant, +#define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 2cf03860a6d86..7e4ddf0dbd20e 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -137,7 +137,6 @@ set(HDRS_INSTALL Definitions/GPUDefGPUParameters.h Definitions/GPUDef.h Definitions/GPUDefMacros.h - Definitions/GPUDefOpenCL12Templates.h Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index 38784b1ded80e..7152bf2e1813b 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -19,7 +19,6 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" #include "GPUDefGPUParameters.h" -#include "GPUDefOpenCL12Templates.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) @@ -42,7 +41,7 @@ #endif #ifdef GPUCA_GPUCODE - #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUsharedref() MEM_LOCAL(vartype) & __restrict__ varname = varshared; + #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUsharedref() vartype& __restrict__ varname = varshared; #define CA_SHARED_STORAGE(storage) storage #define CA_SHARED_CACHE(target, src, size) \ static_assert((size) % sizeof(int32_t) == 0, "Invalid shared cache size"); \ @@ -53,7 +52,7 @@ CA_SHARED_CACHE(target, src, size) \ GPUsharedref() const reftype* __restrict__ ref = (target) #else - #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUglobalref() MEM_GLOBAL(vartype) & __restrict__ varname = varglobal; + #define CA_MAKE_SHARED_REF(vartype, varname, varglobal, varshared) const GPUglobalref() vartype & __restrict__ varname = varglobal; #define CA_SHARED_STORAGE(storage) #define CA_SHARED_CACHE(target, src, size) #define CA_SHARED_CACHE_REF(target, src, size, reftype, ref) GPUglobalref() const reftype* __restrict__ ref = src diff --git a/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h b/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h deleted file mode 100644 index f65e670399f34..0000000000000 --- a/GPU/GPUTracking/Definitions/GPUDefOpenCL12Templates.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUDefOpenCL12Templates.h -/// \author David Rohr, Sergey Gorbunov - -// clang-format off -#ifndef GPUDEFOPENCL12TEMPLATES_H -#define GPUDEFOPENCL12TEMPLATES_H - -// Special macros for OpenCL rev. 1.2 (encode address space in template parameter) -enum LocalOrGlobal { Mem_Local, Mem_Global, Mem_Constant, Mem_Plain }; -#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_USE_TEMPLATE_ADDRESS_SPACES) - template struct MakeTypeHelper; - template struct MakeTypeHelper { typedef L type; }; - template struct MakeTypeHelper { typedef G type; }; - template struct MakeTypeHelper { typedef C type; }; - template struct MakeTypeHelper { typedef P type; }; - #define MakeType(base_type) typename MakeTypeHelper::type - #define MEM_CLASS_PRE() template - #define MEM_CLASS_PRE_TEMPLATE(t) template - #define MEM_LG(type) type - #define MEM_CLASS_PRE2() template - #define MEM_CLASS_PRE2_TEMPLATE(t) template - #define MEM_LG2(type) type - #define MEM_CLASS_PRE12() template template - #define MEM_CLASS_PRE23() template - #define MEM_LG3(type) type - #define MEM_CLASS_PRE234() template - #define MEM_LG4(type) type - #define MEM_GLOBAL(type) type - #define MEM_LOCAL(type) type - #define MEM_LOCAL_TEMPLATE(type, t) type - #define MEM_CONSTANT(type) type - #define MEM_PLAIN(type) type - #define MEM_TEMPLATE() template - #define MEM_TYPE(type) T - #define MEM_TEMPLATE2() template - #define MEM_TYPE2(type) T2 - #define MEM_TEMPLATE3() template - #define MEM_TYPE3(type) T3 - #define MEM_TEMPLATE4() template - #define MEM_TYPE4(type) T4 -#else - #define MakeType(base_type) base_type - #define MEM_CLASS_PRE() - #define MEM_CLASS_PRE_TEMPLATE(t) template - #define MEM_LG(type) type - #define MEM_CLASS_PRE2() - #define MEM_CLASS_PRE2_TEMPLATE(t) template - #define MEM_LG2(type) type - #define MEM_CLASS_PRE12() - #define MEM_CLASS_PRE23() - #define MEM_LG3(type) type - #define MEM_CLASS_PRE234() - #define MEM_LG4(type) type - #define MEM_GLOBAL(type) type - #define MEM_LOCAL(type) type - #define MEM_LOCAL_TEMPLATE(type, t) type - #define MEM_CONSTANT(type) type - #define MEM_PLAIN(type) type - #define MEM_TEMPLATE() - #define MEM_TYPE(type) type - #define MEM_TEMPLATE2() - #define MEM_TYPE2(type) type - #define MEM_TEMPLATE3() - #define MEM_TYPE3(type) type - #define MEM_TEMPLATE4() - #define MEM_TYPE4(type) type -#endif - -#if defined(GPUCA_NO_CONSTANT_MEMORY) - #undef MEM_CONSTANT - #define MEM_CONSTANT(type) MEM_GLOBAL(type) -#endif - -#endif // GPUDEFOPENCL12TEMPLATES_H -// clang-format on diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 8220b743dde0e..8cca91c0a0033 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -431,7 +431,7 @@ template GPUdni() int32_t GPUTrackingRefit::RefitTrackioPtrs.mergedTrackHitStates; mPclusterNative = v->ioPtrs.clustersNative; diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.h b/GPU/GPUTracking/Refit/GPUTrackingRefit.h index 2cc414bbc2d81..bb45709d08165 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.h +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.h @@ -48,9 +48,7 @@ namespace o2::gpu class CorrectionMapsHelper; class GPUTPCGMTrackParam; class GPUTPCGMMergedTrack; -MEM_CLASS_PRE() struct GPUConstantMem; -MEM_CLASS_PRE() struct GPUParam; struct GPUTPCGMMergedTrackHit; @@ -58,13 +56,13 @@ class GPUTrackingRefit { public: void SetClusterStateArray(const uint8_t* v) { mPclusterState = v; } - void SetPtrsFromGPUConstantMem(const GPUConstantMem* v, MEM_CONSTANT(GPUParam) * p = nullptr); + void SetPtrsFromGPUConstantMem(const GPUConstantMem* v, GPUParam* p = nullptr); void SetPropagator(const o2::base::Propagator* v) { mPpropagator = v; } void SetClusterNative(const o2::tpc::ClusterNativeAccess* v) { mPclusterNative = v; } void SetTrackHits(const GPUTPCGMMergedTrackHit* v) { mPtrackHits = v; } void SetTrackHitReferences(const uint32_t* v) { mPtrackHitReferences = v; } void SetFastTransformHelper(const CorrectionMapsHelper* v) { mPfastTransformHelper = v; } - void SetGPUParam(const MEM_CONSTANT(GPUParam) * v) { mPparam = v; } + void SetGPUParam(const GPUParam* v) { mPparam = v; } GPUd() int32_t RefitTrackAsGPU(GPUTPCGMMergedTrack& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } GPUd() int32_t RefitTrackAsTrackParCov(GPUTPCGMMergedTrack& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } GPUd() int32_t RefitTrackAsGPU(o2::tpc::TrackTPC& trk, bool outward = false, bool resetCov = false) { return RefitTrack(trk, outward, resetCov); } @@ -97,7 +95,7 @@ class GPUTrackingRefit const GPUTPCGMMergedTrackHit* mPtrackHits = nullptr; // Ptr to hits for GPUTPCGMMergedTrack tracks const uint32_t* mPtrackHitReferences = nullptr; // Ptr to hits for TrackTPC tracks const CorrectionMapsHelper* mPfastTransformHelper = nullptr; // Ptr to TPC fast transform object helper - const MEM_CONSTANT(GPUParam) * mPparam = nullptr; // Ptr to GPUParam + const GPUParam* mPparam = nullptr; // Ptr to GPUParam template GPUd() int32_t RefitTrack(T& trk, bool outward, bool resetCov); template diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h index 28fa54544e292..c2fc7e58061da 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h @@ -21,7 +21,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTrackParam; /** @@ -31,7 +30,6 @@ class GPUTPCTrackParam; * used in output of the GPUTPCTracker slice tracker. * This class is used for transfer between tracker and merger and does not contain the covariance matrice */ -MEM_CLASS_PRE() struct GPUTPCBaseTrackParam { GPUd() float X() const { return mX; } GPUd() float Y() const { return mP[0]; } @@ -60,8 +58,8 @@ struct GPUTPCBaseTrackParam { GPUd() float GetKappa(float Bz) const { return -mP[4] * Bz; } - GPUhd() MakeType(const float*) Par() const { return mP; } - GPUd() const MakeType(float*) GetPar() const { return mP; } + GPUhd() const float* Par() const { return mP; } + GPUd() const float* GetPar() const { return mP; } GPUd() float GetPar(int32_t i) const { return (mP[i]); } GPUhd() void SetPar(int32_t i, float v) { mP[i] = v; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx index 3ddedd702f784..5c3e473aab0c9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx @@ -19,7 +19,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISlice(), s.tmp); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h index 9a64d04e7ca6d..916891c2035ef 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h @@ -33,15 +33,14 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate float tmp[4]; }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx index 5cf14ca6ab5a4..c86249fbb6f77 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx @@ -22,7 +22,7 @@ using namespace GPUCA_NAMESPACE::gpu; -GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) +GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) { @@ -118,7 +118,7 @@ GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tra return (nHits >= tracker.Param().rec.tpc.globalTrackingMinHits); } -GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) +GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) { for (int32_t i = iBlock * nThreads + iThread; i < tracker.CommonMemory()->nLocalTracks; i += nThreads * nBlocks) { { @@ -158,9 +158,9 @@ GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t } template <> -GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { - CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(MEM_PLAIN(GPUTPCRow))); + CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); if (tracker.NHitsTotal() == 0) { @@ -200,10 +200,10 @@ GPUd() void GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(uint32_t iSlice, } template <> -GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& GPUrestrict() tracker, int32_t n) +GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker, int32_t n) { for (int32_t i = get_global_id(0); i < n; i += get_global_size(0)) { - GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() trk = (&tracker)[i]; + GPUconstantref() GPUTPCTracker& GPUrestrict() trk = (&tracker)[i]; trk.CommonMemory()->nLocalTracks = trk.CommonMemory()->nTracks; trk.CommonMemory()->nLocalTrackHits = trk.CommonMemory()->nTrackHits; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index c45391cd46a4c..367b4314814fe 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -22,45 +22,43 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; class GPUTPCGlobalTracking : public GPUKernelTemplate { public: struct GPUSharedMemory { - CA_SHARED_STORAGE(MEM_LG(GPUTPCRow) mRows[GPUCA_ROW_COUNT]); + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); GPUd() static int32_t GlobalTrackingSliceOrder(int32_t iSlice); GPUd() static void GlobalTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); private: - GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); - GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, GPUTPCTracker& sliceTarget, bool right); + GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); + GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); }; class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate { public: - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker, int32_t n); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker, int32_t n); }; } // namespace gpu diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx index 00fceaf8a5874..56d2e88db1c28 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx @@ -20,8 +20,7 @@ using namespace GPUCA_NAMESPACE::gpu; #include #endif -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::CreateEmpty() +GPUd() void GPUTPCGrid::CreateEmpty() { // Create an empty grid mYMin = 0.f; @@ -37,8 +36,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::CreateEmpty() mStepZInv = 1.f; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::Create(float yMin, float yMax, float zMin, float zMax, int32_t ny, int32_t nz) +GPUd() void GPUTPCGrid::Create(float yMin, float yMax, float zMin, float zMax, int32_t ny, int32_t nz) { //* Create the grid mYMin = yMin; @@ -59,8 +57,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::Create(float yMin, float yMax, float zMin, float mZMax = mZMin + mNz * sz; } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z) const +GPUd() int32_t GPUTPCGrid::GetBin(float Y, float Z) const { //* get the bin pointer const int32_t yBin = static_cast((Y - mYMin) * mStepYInv); @@ -73,8 +70,7 @@ GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z) const return bin; } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBinBounded(float Y, float Z) const +GPUd() int32_t GPUTPCGrid::GetBinBounded(float Y, float Z) const { //* get the bin pointer const int32_t yBin = static_cast((Y - mYMin) * mStepYInv); @@ -89,8 +85,7 @@ GPUd() int32_t MEM_LG(GPUTPCGrid)::GetBinBounded(float Y, float Z) const return bin; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const +GPUd() void GPUTPCGrid::GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const { //* get the bin pointer @@ -114,8 +109,7 @@ GPUd() void MEM_LG(GPUTPCGrid)::GetBin(float Y, float Z, int32_t* const bY, int3 *bZ = (uint32_t)bbZ; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCGrid)::GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const +GPUd() void GPUTPCGrid::GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const { Y -= mYMin; int32_t by = (int32_t)((Y - dy) * mStepYInv); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h index a069282e2a0a9..a3cd7916f0e6d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h @@ -29,7 +29,6 @@ namespace gpu * used by GPUTPCTracker to speed-up the hit operations * grid axis are named Z,Y to be similar to TPC row coordinates. */ -MEM_CLASS_PRE() class GPUTPCGrid { public: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx index 7842a57f47794..9293801f5f5f9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx @@ -18,7 +18,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // * // * kill link to the neighbour if the neighbour is not pointed to the cluster @@ -38,9 +38,9 @@ GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nT #ifdef GPUCA_GPUCODE int32_t Up = s.mIRowUp; int32_t Dn = s.mIRowDn; - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.Row(s.mIRow); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.Row(Up); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowDn = tracker.Row(Dn); + GPUglobalref() const GPUTPCRow& GPUrestrict() row = tracker.Row(s.mIRow); + GPUglobalref() const GPUTPCRow& GPUrestrict() rowUp = tracker.Row(Up); + GPUglobalref() const GPUTPCRow& GPUrestrict() rowDn = tracker.Row(Dn); #else const GPUTPCRow& GPUrestrict() row = tracker.Row(s.mIRow); const GPUTPCRow& GPUrestrict() rowUp = tracker.Row(s.mIRowUp); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h index 26e85907bc6ab..23c1e21e87ab0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h @@ -23,7 +23,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -33,7 +32,6 @@ class GPUTPCTracker; class GPUTPCNeighboursCleaner : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mIRow; // current row index int32_t mIRowUp; // current row index @@ -41,15 +39,14 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate int32_t mNHits; // number of hits }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx index b7cfccfa15408..69d05fc3176b4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx @@ -20,12 +20,12 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { //* find neighbours #ifdef GPUCA_GPUCODE - for (uint32_t i = iThread; i < sizeof(MEM_PLAIN(GPUTPCRow)) / sizeof(int32_t); i += nThreads) { + for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) { reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock])[i]; if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) { reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock + 2])[i]; @@ -33,13 +33,13 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } GPUbarrier(); - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() row = s.mRow; - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() rowUp = s.mRowUp; - const GPUsharedref() MEM_LOCAL(GPUTPCRow) & GPUrestrict() rowDn = s.mRowDown; + const GPUsharedref() GPUTPCRow& GPUrestrict() row = s.mRow; + const GPUsharedref() GPUTPCRow& GPUrestrict() rowUp = s.mRowUp; + const GPUsharedref() GPUTPCRow& GPUrestrict() rowDn = s.mRowDown; #else - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.mData.mRows[iBlock]; - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2]; - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2]; + const GPUglobalref() GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[iBlock]; + const GPUglobalref() GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2]; + const GPUglobalref() GPUTPCRow& GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2]; #endif if (iThread == 0) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h index 7174286fde948..a121a0f14eb67 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,7 +33,6 @@ class GPUTPCTracker; class GPUTPCNeighboursFinder : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mNHits; // n hits float mUpDx; // x distance to the next row @@ -49,19 +47,17 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; #endif - MEM_LG(GPUTPCRow) - mRow, mRowUp, mRowDown; + GPUTPCRow mRow, mRowUp, mRowDown; }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h index ed25e18e90c46..7c8e96c8352a8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h @@ -29,10 +29,8 @@ namespace gpu * It is the internal class of the GPUTPCTracker algorithm. * */ -MEM_CLASS_PRE() class GPUTPCRow { - MEM_CLASS_PRE2() friend class GPUTPCSliceData; public: @@ -46,7 +44,7 @@ class GPUTPCRow } GPUhd() float X() const { return mX; } GPUhd() float MaxY() const { return mMaxY; } - GPUhd() MakeType(const MEM_LG(GPUTPCGrid) &) Grid() const { return mGrid; } + GPUhd() const GPUTPCGrid& Grid() const { return mGrid; } GPUhd() float Hy0() const { return mHy0; } GPUhd() float Hz0() const { return mHz0; } @@ -66,8 +64,7 @@ class GPUTPCRow int32_t mNHits; // number of hits float mX; // X coordinate of the row float mMaxY; // maximal Y coordinate of the row - MEM_LG(GPUTPCGrid) - mGrid; // grid of hits + GPUTPCGrid mGrid; // grid of hits // hit packing: float mHy0; // offset diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx index 99088a1e99c53..ba5da49ff6ff9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx @@ -28,8 +28,8 @@ template <> GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { const uint32_t iRow = iBlock; - const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.Data().Row(iRow); - const MEM_GLOBAL(GPUTPCGrid) & GPUrestrict() grid = row.Grid(); + const GPUTPCRow& GPUrestrict() row = tracker.Data().Row(iRow); + const GPUTPCGrid& GPUrestrict() grid = row.Grid(); for (uint32_t i = iThread; i < grid.N(); i += nThreads) { uint32_t jMin = tracker.Data().FirstHitInBin(row, i); uint32_t jMax = tracker.Data().FirstHitInBin(row, i + 1); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index 6c456a28918ab..5177c48b6a834 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -32,7 +32,7 @@ using namespace GPUCA_NAMESPACE::gpu; #ifndef GPUCA_GPUCODE -void GPUTPCSliceData::InitializeRows(const MEM_CONSTANT(GPUParam) & p) +void GPUTPCSliceData::InitializeRows(const GPUParam& p) { // initialisation of rows for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; ++i) { @@ -109,7 +109,7 @@ void* GPUTPCSliceData::SetPointersRows(void* mem) #endif -GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) +GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; @@ -121,7 +121,7 @@ GPUd() uint32_t GPUTPCSliceData::GetGridSize(uint32_t nHits, uint32_t nRows) return 128 * nRows + 4 * nHits; } -GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) +GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) { float dz = zMax - zMin; float tfFactor = 1.f; @@ -172,7 +172,7 @@ GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) } } -GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) +GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) { #ifdef GPUCA_GPUCODE constexpr bool EarlyTransformWithoutClusterNative = false; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h index a75cba8dd861b..9ab74d969d965 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h @@ -28,7 +28,6 @@ namespace gpu struct GPUTPCClusterData; class GPUTPCHit; -MEM_CLASS_PRE() class GPUTPCSliceData { public: @@ -36,7 +35,7 @@ class GPUTPCSliceData #ifndef GPUCA_GPUCODE_DEVICE ~GPUTPCSliceData() CON_DEFAULT; - void InitializeRows(const MEM_CONSTANT(GPUParam) & p); + void InitializeRows(const GPUParam& p); void SetMaxData(); void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); void* SetPointersInput(void* mem, bool idsOnGPU, bool sliceDataOnGPU); @@ -47,7 +46,7 @@ class GPUTPCSliceData void* SetPointersRows(void* mem); #endif - GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, int32_t iSlice, float* tmpMinMax); + GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSlice, float* tmpMinMax); /** * Return the number of hits in this slice. @@ -61,39 +60,26 @@ class GPUTPCSliceData * * The links values give the hit index in the row above/below. Or -1 if there is no link. */ - MEM_TEMPLATE() - GPUd() calink HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const; - MEM_TEMPLATE() - GPUd() calink HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const; - - MEM_TEMPLATE() - GPUhdi() GPUglobalref() const cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) const { return &mHitData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUhdi() GPUglobalref() cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) { return &mHitData[row.mHitNumberOffset]; } + GPUd() calink HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const; + GPUd() calink HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const; + + GPUhdi() GPUglobalref() const cahit2* HitData(const GPUTPCRow& row) const { return &mHitData[row.mHitNumberOffset]; } + GPUhdi() GPUglobalref() cahit2* HitData(const GPUTPCRow& row) { return &mHitData[row.mHitNumberOffset]; } GPUhd() GPUglobalref() const cahit2* HitData() const { return (mHitData); } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) const { return &mLinkUpData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) { return &mLinkUpData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row) const { return &mLinkDownData[row.mHitNumberOffset]; } - MEM_TEMPLATE() - GPUdi() GPUglobalref() const calink* FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row) const { return &mFirstHitInBin[row.mFirstHitInBinOffset]; } - - MEM_TEMPLATE() - GPUd() void SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value); - MEM_TEMPLATE() - GPUd() void SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value); + GPUdi() GPUglobalref() const calink* HitLinkUpData(const GPUTPCRow& row) const { return &mLinkUpData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() calink* HitLinkUpData(const GPUTPCRow& row) { return &mLinkUpData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() const calink* HitLinkDownData(const GPUTPCRow& row) const { return &mLinkDownData[row.mHitNumberOffset]; } + GPUdi() GPUglobalref() const calink* FirstHitInBin(const GPUTPCRow& row) const { return &mFirstHitInBin[row.mFirstHitInBinOffset]; } + + GPUd() void SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); + GPUd() void SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); /** * Return the y and z coordinate(s) of the given hit(s). */ - MEM_TEMPLATE() - GPUd() cahit HitDataY(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; - MEM_TEMPLATE() - GPUd() cahit HitDataZ(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; - MEM_TEMPLATE() - GPUd() cahit2 HitData(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const; + GPUd() cahit HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const; + GPUd() cahit HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const; + GPUd() cahit2 HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const; /** * For a given bin index, content tells how many hits there are in the preceding bins. This maps @@ -101,36 +87,31 @@ class GPUTPCSliceData * * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. */ - MEM_TEMPLATE() - GPUd() calink FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, calink binIndex) const; + GPUd() calink FirstHitInBin(const GPUTPCRow& row, calink binIndex) const; /** * If the given weight is higher than what is currently stored replace with the new weight. */ - MEM_TEMPLATE() - GPUd() void MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight); - MEM_TEMPLATE() - GPUd() void SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight); + GPUd() void MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); + GPUd() void SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); /** * Return the maximal weight the given hit got from one tracklet */ - MEM_TEMPLATE() - GPUd() int32_t HitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const; + GPUd() int32_t HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const; /** * Returns the index in the original GPUTPCClusterData object of the given hit */ - MEM_TEMPLATE() - GPUhd() int32_t ClusterDataIndex(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const; + GPUhd() int32_t ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const; GPUd() GPUglobalref() const int32_t* ClusterDataIndex() const { return mClusterDataIndex; } GPUd() GPUglobalref() int32_t* ClusterDataIndex() { return mClusterDataIndex; } /** * Return the row object for the given row index. */ - GPUhdi() GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & Row(int32_t rowIndex) const { return mRows[rowIndex]; } - GPUhdi() GPUglobalref() MEM_GLOBAL(GPUTPCRow) * Rows() const { return mRows; } + GPUhdi() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mRows[rowIndex]; } + GPUhdi() GPUglobalref() GPUTPCRow* Rows() const { return mRows; } GPUhdi() GPUglobalref() GPUAtomic(uint32_t) * HitWeights() { return (mHitWeights); } @@ -145,9 +126,9 @@ class GPUTPCSliceData GPUTPCSliceData& operator=(const GPUTPCSliceData&) CON_DELETE; // ROOT 5 tries to use this if it is not private GPUTPCSliceData(const GPUTPCSliceData&) CON_DELETE; // #endif - GPUd() void CreateGrid(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, MEM_GLOBAL(GPUTPCRow) * GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); - GPUd() void SetRowGridEmpty(MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row); - GPUd() static void GetMaxNBins(GPUconstantref() const MEM_CONSTANT(GPUConstantMem) * mem, MEM_GLOBAL(GPUTPCRow) * GPUrestrict() row, int32_t& maxY, int32_t& maxZ); + GPUd() void CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); + GPUd() void SetRowGridEmpty(GPUTPCRow& GPUrestrict() row); + GPUd() static void GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ); GPUd() uint32_t GetGridSize(uint32_t nHits, uint32_t nRows); friend class GPUTPCNeighboursFinder; @@ -159,7 +140,7 @@ class GPUTPCSliceData GPUglobalref() const void* mGPUTextureBase; // pointer to start of GPU texture - GPUglobalref() MEM_GLOBAL(GPUTPCRow) * mRows; // The row objects needed for most accessor functions + GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index @@ -175,65 +156,41 @@ class GPUTPCSliceData GPUglobalref() const GPUTPCClusterData* mClusterData; }; -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCSliceData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCSliceData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCSliceData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkUpData[row.mHitNumberOffset + hitIndex] = value; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCSliceData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkDownData[row.mHitNumberOffset + hitIndex] = value; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit MEM_LG(GPUTPCSliceData)::HitDataY(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } +GPUdi() cahit GPUTPCSliceData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit MEM_LG(GPUTPCSliceData)::HitDataZ(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } +GPUdi() cahit GPUTPCSliceData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() cahit2 MEM_LG(GPUTPCSliceData)::HitData(const MEM_TYPE(GPUTPCRow) & row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } +GPUdi() cahit2 GPUTPCSliceData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() calink MEM_LG(GPUTPCSliceData)::FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } +GPUdi() calink GPUTPCSliceData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUhdi() int32_t MEM_LG(GPUTPCSliceData)::ClusterDataIndex(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } +GPUhdi() int32_t GPUTPCSliceData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCSliceData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { CAMath::AtomicMax(&mHitWeights[row.mHitNumberOffset + hitIndex], weight); } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() void MEM_LG(GPUTPCSliceData)::SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCSliceData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { mHitWeights[row.mHitNumberOffset + hitIndex] = weight; } -MEM_CLASS_PRE() -MEM_TEMPLATE() -GPUdi() int32_t MEM_LG(GPUTPCSliceData)::HitWeight(const MEM_TYPE(GPUTPCRow) & row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } +GPUdi() int32_t GPUTPCSliceData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx index e9bbcdf91ca6c..2b097ab8f1835 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx @@ -19,7 +19,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // find start hits for tracklets if (iThread == 0) { @@ -32,8 +32,8 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr } } GPUbarrier(); - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row = tracker.mData.mRows[s.mIRow]; - GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & GPUrestrict() rowUp = tracker.mData.mRows[s.mIRow + 2]; + GPUglobalref() const GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[s.mIRow]; + GPUglobalref() const GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[s.mIRow + 2]; for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) { int64_t lHitNumberOffset = row.mHitNumberOffset; uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h index f0adf3985a613..b2b9bfb355fa1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,22 +33,20 @@ class GPUTPCTracker; class GPUTPCStartHitsFinder : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mIRow; // row index int32_t mNHits; // n hits in the row GPUAtomic(uint32_t) mNRowStartHits; // start hits found in the row }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx index 4275306999531..84ad70b58b964 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx @@ -21,7 +21,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // Sorts the Start Hits by Row Index if (iThread == 0) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h index d5f9cc41e2a1a..838fcf7e7d7e1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,22 +33,20 @@ class GPUTPCTracker; class GPUTPCStartHitsSorter : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mStartRow; // start row index int32_t mNRows; // number of rows to process int32_t mStartOffset; // start offset for hits sorted by this block }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h index 759f4e0f954bd..50c99cd3ad6ec 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h @@ -30,7 +30,6 @@ namespace gpu * The class is dedicated for internal use by the GPUTPCTracker algorithm. * The track parameters at both ends are stored separately in the GPUTPCEndPoint class */ -MEM_CLASS_PRE() class GPUTPCTrack { public: @@ -44,14 +43,13 @@ class GPUTPCTrack GPUhd() int32_t NHits() const { return mNHits; } GPUhd() int32_t LocalTrackId() const { return mLocalTrackId; } GPUhd() int32_t FirstHitID() const { return mFirstHitID; } - GPUhd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) Param() const { return mParam; } + GPUhd() const GPUTPCBaseTrackParam& Param() const { return mParam; } GPUhd() void SetNHits(int32_t v) { mNHits = v; } GPUhd() void SetLocalTrackId(int32_t v) { mLocalTrackId = v; } GPUhd() void SetFirstHitID(int32_t v) { mFirstHitID = v; } - MEM_TEMPLATE() - GPUhd() void SetParam(const MEM_TYPE(GPUTPCBaseTrackParam) & v) { mParam = v; } + GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } // Only if used as replacement for SliceOutTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSliceOutCluster); } @@ -65,8 +63,7 @@ class GPUTPCTrack int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array int32_t mNHits; // number of track cells int32_t mLocalTrackId; // Id of local track this global track belongs to, index of this track itself if it is a local track - MEM_LG(GPUTPCBaseTrackParam) - mParam; // track parameters + GPUTPCBaseTrackParam mParam; // track parameters private: }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h index d9f332beabd7d..972c62ffe7e20 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h @@ -41,7 +41,7 @@ class GPUTPCTrackLinearisation GPUTPCTrackLinearisation() : mSinPhi(0), mCosPhi(1), mDzDs(0), mQPt(0) {} GPUTPCTrackLinearisation(float SinPhi1, float CosPhi1, float DzDs1, float QPt1) : mSinPhi(SinPhi1), mCosPhi(CosPhi1), mDzDs(DzDs1), mQPt(QPt1) {} - GPUd() MEM_CLASS_PRE2() GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & t); + GPUd() GPUTPCTrackLinearisation(const GPUTPCTrackParam& t); GPUd() void Set(float SinPhi1, float CosPhi1, float DzDs1, float QPt1); @@ -67,8 +67,7 @@ class GPUTPCTrackLinearisation float mQPt; // QPt }; -MEM_CLASS_PRE2() -GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const MEM_LG2(GPUTPCTrackParam) & GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt()) +GPUdi() GPUTPCTrackLinearisation::GPUTPCTrackLinearisation(const GPUTPCTrackParam& GPUrestrict() t) : mSinPhi(t.SinPhi()), mCosPhi(0), mDzDs(t.DzDs()), mQPt(t.QPt()) { if (mSinPhi > GPUCA_MAX_SIN_PHI) { mSinPhi = GPUCA_MAX_SIN_PHI; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx index 18245c48ab578..5c1c99c4d75b2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx @@ -28,8 +28,7 @@ using namespace GPUCA_NAMESPACE::gpu; // Yc = Y + CAMath::Cos(Phi)/Kappa; // -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetDist2(const MEM_LG(GPUTPCTrackParam) & GPUrestrict() t) const +GPUd() float GPUTPCTrackParam::GetDist2(const GPUTPCTrackParam& GPUrestrict() t) const { // get squared distance between tracks @@ -39,8 +38,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetDist2(const MEM_LG(GPUTPCTrackParam) & return dx * dx + dy * dy + dz * dz; } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetDistXZ2(const MEM_LG(GPUTPCTrackParam) & GPUrestrict() t) const +GPUd() float GPUTPCTrackParam::GetDistXZ2(const GPUTPCTrackParam& GPUrestrict() t) const { // get squared distance between tracks in X&Z @@ -49,8 +47,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetDistXZ2(const MEM_LG(GPUTPCTrackParam) return dx * dx + dz * dz; } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::GetS(float x, float y, float Bz) const +GPUd() float GPUTPCTrackParam::GetS(float x, float y, float Bz) const { //* Get XY path length to the given point @@ -66,8 +63,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::GetS(float x, float y, float Bz) const return dS; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::GetDCAPoint(float x, float y, float z, float& GPUrestrict() xp, float& GPUrestrict() yp, float& GPUrestrict() zp, float Bz) const +GPUd() void GPUTPCTrackParam::GetDCAPoint(float x, float y, float z, float& GPUrestrict() xp, float& GPUrestrict() yp, float& GPUrestrict() zp, float Bz) const { //* Get the track point closest to the (x,y,z) @@ -97,8 +93,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::GetDCAPoint(float x, float y, float z, flo //* Transport routines //* -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, float Bz, float maxSinPhi, float* GPUrestrict() DL) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, float Bz, float maxSinPhi, float* GPUrestrict() DL) { //* Transport the track parameters to X=x, using linearization at t0, and the field value Bz //* maxSinPhi is the max. allowed value for |t0.SinPhi()| @@ -218,8 +213,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, GPUTPCTrackLinearisa return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float sinPhi0, float cosPhi0, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, float sinPhi0, float cosPhi0, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x, using linearization at phi0 with 0 curvature, //* and the field value Bz @@ -299,16 +293,14 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float sinPhi0, float return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToX(float x, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToX(float x, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x GPUTPCTrackLinearisation t0(*this); return TransportToX(x, t0, Bz, maxSinPhi); } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, GPUTPCTrackLinearisation& GPUrestrict() t0, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -326,8 +318,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTr return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, GPUTPCTrackFitParam& GPUrestrict() par, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -335,8 +326,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, GPUTPCTr return TransportToXWithMaterial(x, t0, par, Bz, maxSinPhi); } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, float Bz, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::TransportToXWithMaterial(float x, float Bz, float maxSinPhi) { //* Transport the track parameters to X=x taking into account material budget @@ -348,8 +338,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::TransportToXWithMaterial(float x, float Bz //* //* Multiple scattering and energy losses //* -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGeant(float bg2, float kp0, float kp1, float kp2, float kp3, float kp4) +GPUd() float GPUTPCTrackParam::BetheBlochGeant(float bg2, float kp0, float kp1, float kp2, float kp3, float kp4) { // // This is the parameterization of the Bethe-Bloch formula inspired by Geant. @@ -388,8 +377,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGeant(float bg2, float kp0, flo return mK * mZA * (1 + bg2) / bg2 * (0.5f * CAMath::Log(2 * me * bg2 * maxT / (mI * mI)) - bg2 / (1 + bg2) - d2); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochSolid(float bg) +GPUd() float GPUTPCTrackParam::BetheBlochSolid(float bg) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula, @@ -401,8 +389,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochSolid(float bg) return BetheBlochGeant(bg); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGas(float bg) +GPUd() float GPUTPCTrackParam::BetheBlochGas(float bg) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula, @@ -420,8 +407,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::BetheBlochGas(float bg) return BetheBlochGeant(bg, rho, x0, x1, mI, mZA); } -MEM_CLASS_PRE() -GPUd() float MEM_LG(GPUTPCTrackParam)::ApproximateBetheBloch(float beta2) +GPUd() float GPUTPCTrackParam::ApproximateBetheBloch(float beta2) { //------------------------------------------------------------------ // This is an approximation of the Bethe-Bloch formula with @@ -438,8 +424,7 @@ GPUd() float MEM_LG(GPUTPCTrackParam)::ApproximateBetheBloch(float beta2) return 0.153e-3f / beta2 * (CAMath::Log(5940 * beta2 / (1 - beta2)) - beta2); } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::CalculateFitParameters(GPUTPCTrackFitParam& par, float mass) +GPUd() void GPUTPCTrackParam::CalculateFitParameters(GPUTPCTrackFitParam& par, float mass) { //*! @@ -473,8 +458,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::CalculateFitParameters(GPUTPCTrackFitParam par.k44 = GetPar(3) * GetPar(3) * k2; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::CorrectForMeanMaterial(float xOverX0, float xTimesRho, const GPUTPCTrackFitParam& par) +GPUd() bool GPUTPCTrackParam::CorrectForMeanMaterial(float xOverX0, float xTimesRho, const GPUTPCTrackFitParam& par) { //------------------------------------------------------------------ // This function corrects the track parameters for the crossed material. @@ -523,8 +507,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::CorrectForMeanMaterial(float xOverX0, floa //* //* Rotation //* -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::Rotate(float alpha, float maxSinPhi) { //* Rotate the coordinate system in XY on the angle alpha @@ -581,8 +564,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, float maxSinPhi) return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, GPUTPCTrackLinearisation& t0, float maxSinPhi) +GPUd() bool GPUTPCTrackParam::Rotate(float alpha, GPUTPCTrackLinearisation& t0, float maxSinPhi) { //* Rotate the coordinate system in XY on the angle alpha @@ -628,8 +610,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Rotate(float alpha, GPUTPCTrackLinearisati return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::Filter(float y, float z, float err2Y, float err2Z, float maxSinPhi, bool paramOnly) +GPUd() bool GPUTPCTrackParam::Filter(float y, float z, float err2Y, float err2Z, float maxSinPhi, bool paramOnly) { //* Add the y,z measurement with the Kalman filter @@ -690,8 +671,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::Filter(float y, float z, float err2Y, floa return 1; } -MEM_CLASS_PRE() -GPUd() bool MEM_LG(GPUTPCTrackParam)::CheckNumericalQuality() const +GPUd() bool GPUTPCTrackParam::CheckNumericalQuality() const { //* Check that the track parameters and covariance matrix are reasonable @@ -727,8 +707,7 @@ GPUd() bool MEM_LG(GPUTPCTrackParam)::CheckNumericalQuality() const return ok; } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) +GPUd() void GPUTPCTrackParam::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) { if (sector < GPUCA_NSLICES / 2) { if (z < 0) { @@ -763,8 +742,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::ConstrainZ(float& z, int32_t sector, float } } -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::ShiftZ(float z1, float z2, float x1, float x2, float bz, float defaultZOffsetOverR) +GPUd() void GPUTPCTrackParam::ShiftZ(float z1, float z2, float x1, float x2, float bz, float defaultZOffsetOverR) { const float r1 = CAMath::Max(0.0001f, CAMath::Abs(mParam.mP[4] * bz)); @@ -826,8 +804,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::ShiftZ(float z1, float z2, float x1, float #include #endif -MEM_CLASS_PRE() -GPUd() void MEM_LG(GPUTPCTrackParam)::Print() const +GPUd() void GPUTPCTrackParam::Print() const { //* print parameters @@ -837,8 +814,7 @@ GPUd() void MEM_LG(GPUTPCTrackParam)::Print() const #endif } -MEM_CLASS_PRE() -GPUd() int32_t MEM_LG(GPUTPCTrackParam)::GetPropagatedYZ(float bz, float x, float& projY, float& projZ) const +GPUd() int32_t GPUTPCTrackParam::GetPropagatedYZ(float bz, float x, float& projY, float& projZ) const { float k = mParam.mP[4] * bz; float dx = x - mParam.mX; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h index ffc28af6f4e32..792cba4f519e1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h @@ -32,7 +32,6 @@ class GPUTPCTrackLinearisation; * which is used by the GPUTPCTracker slice tracker. * */ -MEM_CLASS_PRE() class GPUTPCTrackParam { public: @@ -40,8 +39,8 @@ class GPUTPCTrackParam float bethe, e, theta2, EP2, sigmadE2, k22, k33, k43, k44; // parameters }; - GPUd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) GetParam() const { return mParam; } - GPUd() void SetParam(const MEM_LG(GPUTPCBaseTrackParam) & v) { mParam = v; } + GPUd() const GPUTPCBaseTrackParam& GetParam() const { return mParam; } + GPUd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } GPUd() void InitParam(); GPUd() float X() const { return mParam.X(); } @@ -74,7 +73,7 @@ class GPUTPCTrackParam GPUd() float GetKappa(float Bz) const { return mParam.GetKappa(Bz); } GPUd() float GetCosPhi() const { return mSignCosPhi * CAMath::Sqrt(1 - SinPhi() * SinPhi()); } - GPUhd() MakeType(const float*) Par() const { return mParam.Par(); } + GPUhd() const float* Par() const { return mParam.Par(); } GPUhd() const float* Cov() const { return mParam.Cov(); } GPUd() const float* GetPar() const { return mParam.GetPar(); } @@ -145,8 +144,7 @@ class GPUTPCTrackParam #ifndef GPUCA_GPUCODE private: #endif //! GPUCA_GPUCODE - MEM_LG(GPUTPCBaseTrackParam) - mParam; // Track Parameters + GPUTPCBaseTrackParam mParam; // Track Parameters private: // WARNING, Track Param Data is copied in the GPU Tracklet Constructor element by element instead of using copy constructor!!! @@ -157,8 +155,7 @@ class GPUTPCTrackParam int32_t mNDF; // the Number of Degrees of Freedom }; -MEM_CLASS_PRE() -GPUdi() void MEM_LG(GPUTPCTrackParam)::InitParam() +GPUdi() void GPUTPCTrackParam::InitParam() { // Initialize Tracklet Parameters using default values SetSinPhi(0); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index 488807e981b5b..5a320a8863992 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -33,16 +33,11 @@ namespace gpu { class GPUTPCSliceOutput; struct GPUTPCClusterData; -MEM_CLASS_PRE() struct GPUParam; -MEM_CLASS_PRE() class GPUTPCTrack; -MEM_CLASS_PRE() class GPUTPCTrackParam; -MEM_CLASS_PRE() class GPUTPCRow; -MEM_CLASS_PRE() class GPUTPCTracker : public GPUProcessor { public: @@ -52,12 +47,9 @@ class GPUTPCTracker : public GPUProcessor GPUTPCTracker(const GPUTPCTracker&) CON_DELETE; GPUTPCTracker& operator=(const GPUTPCTracker&) CON_DELETE; - MEM_CLASS_PRE2() void SetSlice(int32_t iSlice); - MEM_CLASS_PRE2() void InitializeProcessor(); - MEM_CLASS_PRE2() - void InitializeRows(const MEM_CONSTANT(GPUParam) * param) { mData.InitializeRows(*param); } + void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } int32_t CheckEmptySlice(); void WriteOutputPrepare(); @@ -77,7 +69,6 @@ class GPUTPCTracker : public GPUProcessor GPUAtomic(uint32_t) nextStartHit; // Next Tracklet to process }; - MEM_CLASS_PRE2() struct StructGPUParametersConst { GPUglobalref() char* gpumem; // Base pointer to GPU memory (Needed for OpenCL for verification) }; @@ -98,22 +89,20 @@ class GPUTPCTracker : public GPUProcessor { return mData.ClusterData(); } - GPUhdi() MakeType(const MEM_LG(GPUTPCRow) &) Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } + GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } GPUhdi() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); } - MEM_CLASS_PRE2() - GPUdi() static void GetErrors2Seeding(const MEM_CONSTANT(GPUParam) & param, char sector, int32_t iRow, const MEM_LG2(GPUTPCTrackParam) & t, float time, float& ErrY2, float& ErrZ2) + GPUdi() static void GetErrors2Seeding(const GPUParam& param, char sector, int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) { // param.GetClusterErrors2(sector, iRow, param.GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); param.GetClusterErrorsSeeding2(sector, iRow, param.par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); } - MEM_CLASS_PRE2() - GPUdi() void GetErrors2Seeding(int32_t iRow, const MEM_LG2(GPUTPCTrackParam) & t, float time, float& ErrY2, float& ErrZ2) const + GPUdi() void GetErrors2Seeding(int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) const { // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); @@ -151,13 +140,13 @@ class GPUTPCTracker : public GPUProcessor GPUhd() int32_t ISlice() const { return mISlice; } - GPUhd() GPUconstantref() const MEM_LG(GPUTPCSliceData) & Data() const { return mData; } - GPUhdi() GPUconstantref() MEM_LG(GPUTPCSliceData) & Data() + GPUhd() GPUconstantref() const GPUTPCSliceData& Data() const { return mData; } + GPUhdi() GPUconstantref() GPUTPCSliceData& Data() { return mData; } - GPUhd() GPUglobalref() const MEM_GLOBAL(GPUTPCRow) & Row(int32_t rowIndex) const { return mData.Row(rowIndex); } + GPUhd() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mData.Row(rowIndex); } GPUhd() uint32_t NHitsTotal() const { return mData.NumberOfHits(); } GPUhd() uint32_t NMaxTracklets() const { return mNMaxTracklets; } @@ -167,36 +156,23 @@ class GPUTPCTracker : public GPUProcessor GPUhd() uint32_t NMaxStartHits() const { return mNMaxStartHits; } GPUhd() uint32_t NMaxRowStartHits() const { return mNMaxRowStartHits; } - MEM_TEMPLATE() - GPUd() void SetHitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, calink v) { mData.SetHitLinkUpData(row, hitIndex, v); } - MEM_TEMPLATE() - GPUd() void SetHitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, calink v) { mData.SetHitLinkDownData(row, hitIndex, v); } - MEM_TEMPLATE() - GPUd() calink HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitLinkUpData(row, hitIndex); } - MEM_TEMPLATE() - GPUd() calink HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitLinkDownData(row, hitIndex); } - - MEM_TEMPLATE() - GPUd() GPUglobalref() const cahit2* HitData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* HitLinkUpData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitLinkUpData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* HitLinkDownData(const MEM_TYPE(GPUTPCRow) & row) const { return mData.HitLinkDownData(row); } - MEM_TEMPLATE() - GPUd() GPUglobalref() const calink* FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row) const { return mData.FirstHitInBin(row); } - - MEM_TEMPLATE() - GPUd() int32_t FirstHitInBin(const MEM_TYPE(GPUTPCRow) & row, int32_t binIndex) const { return mData.FirstHitInBin(row, binIndex); } - - MEM_TEMPLATE() - GPUd() cahit HitDataY(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitDataY(row, hitIndex); } - MEM_TEMPLATE() - GPUd() cahit HitDataZ(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitDataZ(row, hitIndex); } - MEM_TEMPLATE() - GPUd() cahit2 HitData(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitData(row, hitIndex); } - - MEM_TEMPLATE() - GPUhd() int32_t HitInputID(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } + GPUd() void SetHitLinkUpData(const GPUTPCRow& row, int32_t hitIndex, calink v) { mData.SetHitLinkUpData(row, hitIndex, v); } + GPUd() void SetHitLinkDownData(const GPUTPCRow& row, int32_t hitIndex, calink v) { mData.SetHitLinkDownData(row, hitIndex, v); } + GPUd() calink HitLinkUpData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitLinkUpData(row, hitIndex); } + GPUd() calink HitLinkDownData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitLinkDownData(row, hitIndex); } + + GPUd() GPUglobalref() const cahit2* HitData(const GPUTPCRow& row) const { return mData.HitData(row); } + GPUd() GPUglobalref() const calink* HitLinkUpData(const GPUTPCRow& row) const { return mData.HitLinkUpData(row); } + GPUd() GPUglobalref() const calink* HitLinkDownData(const GPUTPCRow& row) const { return mData.HitLinkDownData(row); } + GPUd() GPUglobalref() const calink* FirstHitInBin(const GPUTPCRow& row) const { return mData.FirstHitInBin(row); } + + GPUd() int32_t FirstHitInBin(const GPUTPCRow& row, int32_t binIndex) const { return mData.FirstHitInBin(row, binIndex); } + + GPUd() cahit HitDataY(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitDataY(row, hitIndex); } + GPUd() cahit HitDataZ(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitDataZ(row, hitIndex); } + GPUd() cahit2 HitData(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitData(row, hitIndex); } + + GPUhd() int32_t HitInputID(const GPUTPCRow& row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } /** * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one @@ -216,12 +192,9 @@ class GPUTPCTracker : public GPUProcessor return ((int32_t)weight); // return( (NHits << 16) + num); } - MEM_TEMPLATE() - GPUd() void MaximizeHitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, int32_t weight) { mData.MaximizeHitWeight(row, hitIndex, weight); } - MEM_TEMPLATE() - GPUd() void SetHitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex, int32_t weight) { mData.SetHitWeight(row, hitIndex, weight); } - MEM_TEMPLATE() - GPUd() int32_t HitWeight(const MEM_TYPE(GPUTPCRow) & row, int32_t hitIndex) const { return mData.HitWeight(row, hitIndex); } + GPUd() void MaximizeHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.MaximizeHitWeight(row, hitIndex, weight); } + GPUd() void SetHitWeight(const GPUTPCRow& row, int32_t hitIndex, int32_t weight) { mData.SetHitWeight(row, hitIndex, weight); } + GPUd() int32_t HitWeight(const GPUTPCRow& row, int32_t hitIndex) const { return mData.HitWeight(row, hitIndex); } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTracklets() const { return &mCommonMem->nTracklets; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NRowHits() const { return &mCommonMem->nRowHits; } @@ -231,24 +204,23 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() const GPUTPCHitId* TrackletStartHits() const { return mTrackletStartHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackletStartHits() { return mTrackletStartHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackletTmpStartHits() const { return mTrackletTmpStartHits; } - MEM_CLASS_PRE2() - GPUhd() GPUglobalref() const MEM_LG2(GPUTPCTracklet) & Tracklet(int32_t i) const { return mTracklets[i]; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) * Tracklets() const { return mTracklets; } + GPUhd() GPUglobalref() const GPUTPCTracklet& Tracklet(int32_t i) const { return mTracklets[i]; } + GPUhd() GPUglobalref() GPUTPCTracklet* Tracklets() const { return mTracklets; } GPUhd() GPUglobalref() calink* TrackletRowHits() const { return mTrackletRowHits; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTracks() const { return &mCommonMem->nTracks; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCTrack) * Tracks() const { return mTracks; } + GPUhd() GPUglobalref() GPUTPCTrack* Tracks() const { return mTracks; } GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTrackHits() const { return &mCommonMem->nTrackHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackHits() const { return mTrackHits; } - GPUhd() GPUglobalref() MEM_GLOBAL(GPUTPCRow) * SliceDataRows() const { return (mData.Rows()); } + GPUhd() GPUglobalref() GPUTPCRow* SliceDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } - GPUhd() MakeType(MEM_LG(StructGPUParametersConst) *) GPUParametersConst() + GPUhd() StructGPUParametersConst* GPUParametersConst() { return (&mGPUParametersConst); } - GPUhd() MakeType(MEM_LG(const StructGPUParametersConst) *) GetGPUParametersConst() const { return (&mGPUParametersConst); } + GPUhd() const StructGPUParametersConst* GetGPUParametersConst() const { return (&mGPUParametersConst); } GPUhd() void SetGPUTextureBase(GPUglobalref() const void* val) { mData.SetGPUTextureBase(val); } struct trackSortData { @@ -270,10 +242,7 @@ class GPUTPCTracker : public GPUProcessor int32_t mISlice; // Number of slice - /** A pointer to the ClusterData object that the SliceData was created from. This can be used to - * merge clusters from inside the SliceTracker code and recreate the SliceData. */ - MEM_LG(GPUTPCSliceData) - mData; // The SliceData object. It is used to encapsulate the storage in memory from the access + GPUTPCSliceData mData; // The SliceData object. It is used to encapsulate the storage in memory from the access uint32_t mNMaxStartHits; uint32_t mNMaxRowStartHits; @@ -295,15 +264,14 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits; // Unsorted start hits GPUglobalref() char* mGPUTrackletTemp; // Temp Memory for GPU Tracklet Constructor - MEM_LG(StructGPUParametersConst) - mGPUParametersConst; // Parameters for GPU if this is a GPU tracker + StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker // event GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) * mTracklets; // tracklets + GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row - GPUglobalref() MEM_GLOBAL(GPUTPCTrack) * mTracks; // reconstructed tracks + GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers // output diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h index 08ec8d8bf54e7..9190cdb94aa5f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h @@ -28,7 +28,6 @@ namespace gpu * The class describes the reconstructed TPC track candidate. * The class is dedicated for internal use by the GPUTPCTracker algorithm. */ -MEM_CLASS_PRE() class GPUTPCTracklet { public: @@ -40,20 +39,18 @@ class GPUTPCTracklet GPUhd() int32_t LastRow() const { return mLastRow; } GPUhd() int32_t HitWeight() const { return mHitWeight; } GPUhd() uint32_t FirstHit() const { return mFirstHit; } - GPUhd() MakeType(const MEM_LG(GPUTPCBaseTrackParam) &) Param() const { return mParam; } + GPUhd() const GPUTPCBaseTrackParam& Param() const { return mParam; } GPUhd() void SetFirstRow(int32_t v) { mFirstRow = v; } GPUhd() void SetLastRow(int32_t v) { mLastRow = v; } GPUhd() void SetFirstHit(uint32_t v) { mFirstHit = v; } - MEM_CLASS_PRE2() - GPUhd() void SetParam(const MEM_LG2(GPUTPCBaseTrackParam) & v) { mParam = reinterpret_cast(v); } + GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = reinterpret_cast(v); } GPUhd() void SetHitWeight(const int32_t w) { mHitWeight = w; } private: int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! int32_t mLastRow; // last TPC row - MEM_LG(GPUTPCBaseTrackParam) - mParam; // tracklet parameters + GPUTPCBaseTrackParam mParam; // tracklet parameters int32_t mHitWeight; // Hit Weight of Tracklet uint32_t mFirstHit; // first hit in row hit array }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index c073ad3d26b8b..e7735b4b2580c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -31,15 +31,13 @@ using namespace GPUCA_NAMESPACE::gpu; -MEM_CLASS_PRE2() -GPUdii() void GPUTPCTrackletConstructor::InitTracklet(MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam) +GPUdii() void GPUTPCTrackletConstructor::InitTracklet(GPUTPCTrackParam& GPUrestrict() tParam) { // Initialize Tracklet Parameters using default values tParam.InitParam(); } -MEM_CLASS_PRE2() -GPUd() bool GPUTPCTrackletConstructor::CheckCov(MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam) +GPUd() bool GPUTPCTrackletConstructor::CheckCov(GPUTPCTrackParam& GPUrestrict() tParam) { bool ok = 1; const float* c = tParam.Cov(); @@ -56,8 +54,7 @@ GPUd() bool GPUTPCTrackletConstructor::CheckCov(MEM_LG2(GPUTPCTrackParam) & GPUr return (ok); } -MEM_CLASS_PRE23() -GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() MEM_LG2(GPUTPCTracker) & GPUrestrict() tracker, MEM_LG3(GPUTPCTrackParam) & GPUrestrict() tParam, calink* rowHits) +GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() GPUSharedMemory& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUTPCTrackParam& GPUrestrict() tParam, calink* rowHits) { // reconstruction of tracklets, tracklet store step const uint32_t nHits = r.mLastRow + 1 - r.mFirstRow; @@ -83,7 +80,7 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ return; } - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) & GPUrestrict() tracklet = tracker.Tracklets()[itrout]; + GPUglobalref() GPUTPCTracklet& GPUrestrict() tracklet = tracker.Tracklets()[itrout]; CADEBUG(printf(" Storing tracklet: %d rows\n", nHits)); @@ -107,8 +104,8 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ } } -MEM_CLASS_PRE2_TEMPLATE(class T) -GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() T& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, MEM_LG2(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t iRow, calink& rowHit, calink* rowHits) +template +GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, int32_t /*nThreads*/, int32_t /*iBlock*/, int32_t /*iThread*/, GPUsharedref() T& s, GPUTPCThreadMemory& GPUrestrict() r, GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUTPCTrackParam& GPUrestrict() tParam, int32_t iRow, calink& rowHit, calink* rowHits) { // reconstruction of tracklets, tracklets update step CA_MAKE_SHARED_REF(GPUTPCRow, row, tracker.Row(iRow), s.mRows[iRow]); @@ -300,13 +297,13 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, calink best = CALINK_INVAL; float err2Y, err2Z; - tracker.GetErrors2Seeding(iRow, *((MEM_LG2(GPUTPCTrackParam)*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time + tracker.GetErrors2Seeding(iRow, *((GPUTPCTrackParam*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); err2Y += sErr2; err2Z += sErr2; } - if (CAMath::Abs(yUncorrected) < x * MEM_GLOBAL(GPUTPCRow)::getTPCMaxY1X()) { // search for the closest hit + if (CAMath::Abs(yUncorrected) < x * GPUTPCRow::getTPCMaxY1X()) { // search for the closest hit const float kFactor = tracker.Param().rec.tpc.hitPickUpFactor * tracker.Param().rec.tpc.hitPickUpFactor * 7.0f * 7.0f; const float maxWindow2 = tracker.Param().rec.tpc.hitSearchArea2; const float sy2 = CAMath::Min(maxWindow2, kFactor * (tParam.Err2Y() + err2Y)); @@ -393,8 +390,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (0); } if (r.mNHits == 8 && r.mNMissed == 0 && rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL && rowHits && tracker.Param().par.continuousTracking && rowHits[r.mFirstRow] != CALINK_INVAL && rowHits[r.mFirstRow] != CALINK_DEAD_CHANNEL && rowHits[r.mLastRow] != CALINK_INVAL && rowHits[r.mLastRow] != CALINK_DEAD_CHANNEL) { - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row1 = tracker.Row(r.mFirstRow); - const GPUglobalref() MEM_GLOBAL(GPUTPCRow) & GPUrestrict() row2 = tracker.Row(r.mLastRow); + const GPUglobalref() GPUTPCRow& GPUrestrict() row1 = tracker.Row(r.mFirstRow); + const GPUglobalref() GPUTPCRow& GPUrestrict() row2 = tracker.Row(r.mLastRow); GPUglobalref() const cahit2* hits1 = tracker.HitData(row1); GPUglobalref() const cahit2* hits2 = tracker.HitData(row2); const cahit2 hh1 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits1, rowHits[r.mFirstRow]); @@ -408,11 +405,10 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } -GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCTrackletConstructor::MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& GPUrestrict() r) +GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& s, GPUTPCThreadMemory& GPUrestrict() r) { int32_t iRow = 0, iRowEnd = GPUCA_ROW_COUNT; - MEM_PLAIN(GPUTPCTrackParam) - tParam; + GPUTPCTrackParam tParam; calink rowHits[GPUCA_ROW_COUNT]; if (r.mGo) { GPUTPCHitId id = tracker.TrackletStartHits()[r.mISH]; @@ -484,12 +480,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() MEM_GLO } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(MEM_PLAIN(GPUTPCRow))); + CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); GPUTPCThreadMemory rMem; @@ -500,9 +496,9 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem, processorType& GPUrestrict() tracker0) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) { - GPUconstantref() MEM_GLOBAL(GPUTPCTracker) * GPUrestrict() pTracker = &tracker0; + GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; #ifdef GPUCA_GPUCODE int32_t mySlice = get_group_id(0) % GPUCA_NSLICES; int32_t currentSlice = -1; @@ -512,7 +508,7 @@ GPUdii() void GPUTPCTrackletConstructor::Thread // FIXME: GPUgeneric() needed to make the clang spirv output link correctly -GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, MEM_LG(GPUTPCTrackParam) & GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) +GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { GPUTPCThreadMemory rMem; rMem.mISH = iTracklet; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index f82aba47788f9..b1ef74b9896c1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -28,7 +28,6 @@ namespace gpu * @class GPUTPCTrackletConstructor * */ -MEM_CLASS_PRE() class GPUTPCTracker; class GPUTPCTrackletConstructor @@ -69,9 +68,8 @@ class GPUTPCTrackletConstructor float mLastZ; // Z of the last fitted cluster }; - MEM_CLASS_PRE() struct GPUSharedMemory { - CA_SHARED_STORAGE(MEM_LG(GPUTPCRow) mRows[GPUCA_ROW_COUNT]); // rows + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? @@ -82,36 +80,32 @@ class GPUTPCTrackletConstructor #endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE }; - MEM_CLASS_PRE2() - GPUd() static void InitTracklet(MEM_LG2(GPUTPCTrackParam) & tParam); + GPUd() static void InitTracklet(GPUTPCTrackParam& tParam); - MEM_CLASS_PRE2_TEMPLATE(class T) - GPUd() static void UpdateTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() T& s, GPUTPCThreadMemory& r, GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, MEM_LG2(GPUTPCTrackParam) & tParam, int32_t iRow, calink& rowHit, calink* rowHits); + template + GPUd() static void UpdateTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() T& s, GPUTPCThreadMemory& r, GPUconstantref() GPUTPCTracker& tracker, GPUTPCTrackParam& tParam, int32_t iRow, calink& rowHit, calink* rowHits); - MEM_CLASS_PRE23() - GPUd() static void StoreTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, GPUTPCThreadMemory& r, GPUconstantref() MEM_LG2(GPUTPCTracker) & tracker, MEM_LG3(GPUTPCTrackParam) & tParam, calink* rowHits); + GPUd() static void StoreTracklet(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, GPUTPCThreadMemory& r, GPUconstantref() GPUTPCTracker& tracker, GPUTPCTrackParam& tParam, calink* rowHits); - MEM_CLASS_PRE2() - GPUd() static bool CheckCov(MEM_LG2(GPUTPCTrackParam) & tParam); + GPUd() static bool CheckCov(GPUTPCTrackParam& tParam); - GPUd() static void DoTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() GPUTPCTrackletConstructor::MEM_LOCAL(GPUSharedMemory) & sMem, GPUTPCThreadMemory& rMem); + GPUd() static void DoTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& sMem, GPUTPCThreadMemory& rMem); #ifdef GPUCA_GPUCODE - GPUd() static int32_t FetchTracklet(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & sMem); + GPUd() static int32_t FetchTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& sMem); #endif // GPUCA_GPUCODE template - GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() MEM_GLOBAL(GPUTPCTracker) & tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); + GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx index d3da504ab4ec0..b8cbbae06e8b0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx @@ -22,7 +22,7 @@ using namespace GPUCA_NAMESPACE::gpu; template <> -GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { // select best tracklets and kill clones @@ -39,7 +39,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread for (int32_t itr = s.mItr0 + iThread; itr < s.mNTracklets; itr += s.mNThreadsTotal) { GPUbarrierWarp(); - GPUglobalref() MEM_GLOBAL(GPUTPCTracklet) & GPUrestrict() tracklet = tracker.Tracklets()[itr]; + GPUglobalref() GPUTPCTracklet& GPUrestrict() tracklet = tracker.Tracklets()[itr]; int32_t firstRow = tracklet.FirstRow(); int32_t lastRow = tracklet.LastRow(); @@ -62,7 +62,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread gap++; } if (ih != CALINK_INVAL && ih != CALINK_DEAD_CHANNEL) { - GPUglobalref() const MEM_GLOBAL(GPUTPCRow)& row = tracker.Row(irow); + GPUglobalref() const GPUTPCRow& row = tracker.Row(irow); bool own = (tracker.HitWeight(row, ih) <= w); bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h index bae1cbe2bb876..af13b30022e6f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h @@ -24,7 +24,6 @@ namespace GPUCA_NAMESPACE { namespace gpu { -MEM_CLASS_PRE() class GPUTPCTracker; /** @@ -34,7 +33,6 @@ class GPUTPCTracker; class GPUTPCTrackletSelector : public GPUKernelTemplate { public: - MEM_CLASS_PRE() struct GPUSharedMemory { int32_t mItr0; // index of the first track in the block int32_t mNThreadsTotal; // total n threads @@ -45,15 +43,14 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate #endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; - typedef GPUconstantref() MEM_GLOBAL(GPUTPCTracker) processorType; + typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } - MEM_TEMPLATE() - GPUhdi() static processorType* Processor(MEM_TYPE(GPUConstantMem) & processors) + GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() MEM_LOCAL(GPUSharedMemory) & smem, processorType& tracker); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu } // namespace GPUCA_NAMESPACE From 9e155cf37491f692c081d5b2838a8071512f62b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 15 Jan 2025 21:50:33 +0100 Subject: [PATCH 3/4] Code-Checker, silence some false warnings from failures in clang-tidy --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu | 4 ++-- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 67a515df1c730..cbcba8fb8afd7 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -1215,14 +1215,14 @@ void processNeighboursHandler(const int startLayer, thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); + 0)); // NOLINT: failure in clang-tidy discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); + 0)); // NOLINT: failure in clang-tidy thrust::device_vector updatedCellIds(foundSeedsTable.back()) /*, lastCellIds(foundSeedsTable.back())*/; thrust::device_vector updatedCellSeeds(foundSeedsTable.back()) /*, lastCellSeeds(foundSeedsTable.back())*/; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 9f043915efb19..dd35a23d67c21 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -625,7 +625,7 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; GPUFailedMsg(cudaSetDevice(mDeviceId)); for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) { - GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); + GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); From e2888c45ae031f622c221e39f5c723e848e94ab1 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Wed, 15 Jan 2025 21:01:31 +0000 Subject: [PATCH 4/4] Please consider the following formatting changes --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index cbcba8fb8afd7..19edef6c40346 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -1215,14 +1215,14 @@ void processNeighboursHandler(const int startLayer, thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); // NOLINT: failure in clang-tidy + 0)); // NOLINT: failure in clang-tidy discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out nCurrentCells + 1, // num_items - 0)); // NOLINT: failure in clang-tidy + 0)); // NOLINT: failure in clang-tidy thrust::device_vector updatedCellIds(foundSeedsTable.back()) /*, lastCellIds(foundSeedsTable.back())*/; thrust::device_vector updatedCellSeeds(foundSeedsTable.back()) /*, lastCellSeeds(foundSeedsTable.back())*/;