Skip to content

Commit 3cf085d

Browse files
committed
Merge branch 'master' into ompd
2 parents e438314 + a838d8e commit 3cf085d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2471
-620
lines changed

README.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,10 @@ Options for ``NVPTX device RTL``
280280
compatible with NVCC, this option can be use to pass to NVCC a valid compiler
281281
to avoid the error.
282282

283-
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY** = ``35``
284-
CUDA compute capability that should be supported by the NVPTX device RTL. E.g.
285-
for compute capability 6.0, the option "60" should be used. Compute capability
286-
3.5 is the minimum required.
283+
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35``
284+
List of CUDA compute capabilities that should be supported by the NVPTX
285+
device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70"
286+
should be used. Compute capability 3.5 is the minimum required.
287287

288288
**LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON``
289289
Enable printing of debug messages from the NVPTX device RTL.

cmake/DetectTestCompiler/CMakeLists.txt

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
cmake_minimum_required(VERSION 2.8)
22
project(DetectTestCompiler C CXX)
33

4+
include(CheckCCompilerFlag)
5+
include(CheckCXXCompilerFlag)
6+
47
function(write_compiler_information lang)
58
set(information "${CMAKE_${lang}_COMPILER}")
69
set(information "${information}\\;${CMAKE_${lang}_COMPILER_ID}")
710
set(information "${information}\\;${CMAKE_${lang}_COMPILER_VERSION}")
8-
set(information "${information}\\;${OpenMP_${lang}_FLAGS}")
11+
set(information "${information}\\;${${lang}_FLAGS}")
912
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${lang}CompilerInformation.txt ${information})
1013
endfunction(write_compiler_information)
1114

@@ -15,5 +18,22 @@ if (NOT OpenMP_Found)
1518
set(OpenMP_CXX_FLAGS "-fopenmp")
1619
endif()
1720

21+
set(C_FLAGS ${flags} ${OpenMP_C_FLAGS})
22+
set(CXX_FLAGS ${flags} ${OpenMP_CXX_FLAGS})
23+
24+
# TODO: Implement blockaddress in GlobalISel and remove this flag!
25+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
26+
check_c_compiler_flag("-fno-experimental-isel" C_HAS_EXPERIMENTAL_ISEL_FLAG)
27+
check_cxx_compiler_flag("-fno-experimental-isel" CXX_HAS_EXPERIMENTAL_ISEL_FLAG)
28+
macro(add_experimental_isel_flag lang)
29+
if (${lang}_HAS_EXPERIMENTAL_ISEL_FLAG)
30+
set(${lang}_FLAGS "-fno-experimental-isel ${${lang}_FLAGS}")
31+
endif()
32+
endmacro(add_experimental_isel_flag)
33+
34+
add_experimental_isel_flag(C)
35+
add_experimental_isel_flag(CXX)
36+
endif()
37+
1838
write_compiler_information(C)
1939
write_compiler_information(CXX)

cmake/OpenMPTesting.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ else()
117117
# Cannot use CLANG_VERSION because we are not guaranteed that this is already set.
118118
set(OPENMP_TEST_COMPILER_VERSION "${LLVM_VERSION}")
119119
set(OPENMP_TEST_COMPILER_VERSION_MAJOR "${LLVM_MAJOR_VERSION}")
120-
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "-fopenmp")
120+
# TODO: Implement blockaddress in GlobalISel and remove this flag!
121+
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "-fopenmp -fno-experimental-isel")
121122
endif()
122123

123124
# Function to set compiler features for use in lit.

libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,16 @@ find_library (
134134

135135
# There is a libcuda.so in lib64/stubs that can be used for linking.
136136
if (NOT LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES AND CUDA_FOUND)
137-
get_filename_component(CUDA_LIBDIR ${CUDA_LIBRARIES} DIRECTORY)
137+
# Since CMake 3.3 FindCUDA.cmake defaults to using static libraries. In this
138+
# case CUDA_LIBRARIES contains additional linker arguments which breaks
139+
# get_filename_component below. Fortunately, since that change the module
140+
# exports CUDA_cudart_static_LIBRARY which points to a single file in the
141+
# right directory.
142+
set(cuda_library ${CUDA_LIBRARIES})
143+
if (DEFINED CUDA_cudart_static_LIBRARY)
144+
set(cuda_library ${CUDA_cudart_static_LIBRARY})
145+
endif()
146+
get_filename_component(CUDA_LIBDIR ${cuda_library} DIRECTORY)
138147
find_library (
139148
LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES
140149
NAMES

libomptarget/deviceRTLs/nvptx/CMakeLists.txt

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,18 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
6060

6161
# Get the compute capability the user requested or use SM_35 by default.
6262
# SM_35 is what clang uses by default.
63-
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING
64-
"CUDA Compute Capability to be used to compile the NVPTX device RTL.")
65-
set(CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
63+
set(default_capabilities 35)
64+
if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
65+
set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
66+
libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
67+
endif()
68+
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
69+
"List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
70+
string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
71+
72+
foreach(sm ${nvptx_sm_list})
73+
set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
74+
endforeach()
6675

6776
# Activate RTL message dumps if requested by the user.
6877
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
@@ -132,6 +141,11 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
132141
set(CUDA_PTX_VERSION ptx42)
133142
endif()
134143

144+
set(BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=0)
145+
if(${LIBOMPTARGET_NVPTX_DEBUG})
146+
set(BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
147+
endif()
148+
135149
# Set flags for Clang cuda compilation. Only Clang is supported because there is
136150
# no other compiler capable of generating bitcode from cuda sources.
137151
set(CUDA_FLAGS
@@ -140,7 +154,8 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
140154
-Xclang -target-feature
141155
-Xclang +${CUDA_PTX_VERSION}
142156
--cuda-device-only
143-
-DOMPTARGET_NVPTX_TEST=0 -DOMPTARGET_NVPTX_DEBUG=0
157+
-DOMPTARGET_NVPTX_TEST=0
158+
${BC_DEBUG}
144159
)
145160

146161
# CUDA 9 header files use the nv_weak attribute which clang is not yet prepared
@@ -152,46 +167,47 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
152167

153168
# Get the compute capability the user requested or use SM_35 by default.
154169
set(CUDA_ARCH "")
155-
set(CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
156-
157-
# Compile cuda files to bitcode.
158-
set(bc_files "")
159-
foreach(src ${cuda_src_files})
160-
get_filename_component(infile ${src} ABSOLUTE)
161-
get_filename_component(outfile ${src} NAME)
162-
163-
add_custom_command(OUTPUT ${outfile}.bc
164-
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
165-
-c ${infile} -o ${outfile}.bc
166-
DEPENDS ${infile}
167-
IMPLICIT_DEPENDS CXX ${infile}
168-
COMMENT "Building LLVM bitcode ${outfile}.bc"
169-
VERBATIM
170+
foreach(sm ${nvptx_sm_list})
171+
set(CUDA_ARCH --cuda-gpu-arch=sm_${sm})
172+
173+
# Compile cuda files to bitcode.
174+
set(bc_files "")
175+
foreach(src ${cuda_src_files})
176+
get_filename_component(infile ${src} ABSOLUTE)
177+
get_filename_component(outfile ${src} NAME)
178+
179+
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
180+
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
181+
-c ${infile} -o ${outfile}-sm_${sm}.bc
182+
DEPENDS ${infile}
183+
IMPLICIT_DEPENDS CXX ${infile}
184+
COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
185+
VERBATIM
186+
)
187+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)
188+
189+
list(APPEND bc_files ${outfile}-sm_${sm}.bc)
190+
endforeach()
191+
192+
# Link to a bitcode library.
193+
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
194+
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
195+
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
196+
DEPENDS ${bc_files}
197+
COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
170198
)
171-
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}.bc)
172-
173-
list(APPEND bc_files ${outfile}.bc)
174-
endforeach()
175-
176-
# Link to a bitcode library.
177-
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
178-
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
179-
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc ${bc_files}
180-
DEPENDS ${bc_files}
181-
COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc"
182-
)
183-
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc)
199+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
184200

185-
add_custom_target(omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc)
201+
add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
186202

187-
# Copy library to destination.
188-
add_custom_command(TARGET omptarget-nvptx-bc POST_BUILD
189-
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
190-
$<TARGET_FILE_DIR:omptarget-nvptx>)
191-
192-
# Install device RTL under the lib destination folder.
193-
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc DESTINATION "lib")
203+
# Copy library to destination.
204+
add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
205+
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
206+
$<TARGET_FILE_DIR:omptarget-nvptx>)
194207

208+
# Install device RTL under the lib destination folder.
209+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib")
210+
endforeach()
195211
endif()
196212
endif()
197213

libomptarget/deviceRTLs/nvptx/src/counter_groupi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ INLINE Counter omptarget_nvptx_CounterGroup::Next() {
4545

4646
// set priv to n, to be used in later waitOrRelease
4747
INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) {
48-
PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %lld->%lld (+%d)\n",
48+
PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n",
4949
P64(&priv), P64(priv), P64(priv + n), n);
5050
priv += n;
5151
}

0 commit comments

Comments
 (0)