@@ -60,9 +60,18 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
6060
6161 # Get the compute capability the user requested or use SM_35 by default.
6262 # SM_35 is what clang uses by default.
63- set (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING
64- "CUDA Compute Capability to be used to compile the NVPTX device RTL." )
65- set (CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY} )
63+ set (default_capabilities 35)
64+ if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
65+ set (default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY} )
66+ libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES" )
67+ endif ()
68+ set (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
69+ "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL." )
70+ string (REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES} )
71+
72+ foreach (sm ${nvptx_sm_list} )
73+ set (CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm} ,code=sm_${sm} )
74+ endforeach ()
6675
6776 # Activate RTL message dumps if requested by the user.
6877 set (LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
@@ -132,6 +141,11 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
132141 set (CUDA_PTX_VERSION ptx42)
133142 endif ()
134143
144+ set (BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=0)
145+ if (${LIBOMPTARGET_NVPTX_DEBUG} )
146+ set (BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
147+ endif ()
148+
135149 # Set flags for Clang cuda compilation. Only Clang is supported because there is
136150 # no other compiler capable of generating bitcode from cuda sources.
137151 set (CUDA_FLAGS
@@ -140,7 +154,8 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
140154 -Xclang -target -feature
141155 -Xclang +${CUDA_PTX_VERSION}
142156 --cuda-device-only
143- -DOMPTARGET_NVPTX_TEST=0 -DOMPTARGET_NVPTX_DEBUG=0
157+ -DOMPTARGET_NVPTX_TEST=0
158+ ${BC_DEBUG}
144159 )
145160
146161 # CUDA 9 header files use the nv_weak attribute which clang is not yet prepared
@@ -152,46 +167,47 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
152167
153168 # Get the compute capability the user requested or use SM_35 by default.
154169 set (CUDA_ARCH "" )
155- set (CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY} )
156-
157- # Compile cuda files to bitcode.
158- set (bc_files "" )
159- foreach (src ${cuda_src_files} )
160- get_filename_component (infile ${src} ABSOLUTE )
161- get_filename_component (outfile ${src} NAME )
162-
163- add_custom_command (OUTPUT ${outfile} .bc
164- COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
165- -c ${infile} -o ${outfile} .bc
166- DEPENDS ${infile}
167- IMPLICIT_DEPENDS CXX ${infile}
168- COMMENT "Building LLVM bitcode ${outfile} .bc"
169- VERBATIM
170+ foreach (sm ${nvptx_sm_list} )
171+ set (CUDA_ARCH --cuda-gpu-arch=sm_${sm} )
172+
173+ # Compile cuda files to bitcode.
174+ set (bc_files "" )
175+ foreach (src ${cuda_src_files} )
176+ get_filename_component (infile ${src} ABSOLUTE )
177+ get_filename_component (outfile ${src} NAME )
178+
179+ add_custom_command (OUTPUT ${outfile} -sm_${sm} .bc
180+ COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
181+ -c ${infile} -o ${outfile} -sm_${sm} .bc
182+ DEPENDS ${infile}
183+ IMPLICIT_DEPENDS CXX ${infile}
184+ COMMENT "Building LLVM bitcode ${outfile} -sm_${sm} .bc"
185+ VERBATIM
186+ )
187+ set_property (DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile} -sm_${sm} .bc)
188+
189+ list (APPEND bc_files ${outfile} -sm_${sm} .bc)
190+ endforeach ()
191+
192+ # Link to a bitcode library.
193+ add_custom_command (OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx-sm_${sm} .bc
194+ COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
195+ -o ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx-sm_${sm} .bc ${bc_files}
196+ DEPENDS ${bc_files}
197+ COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm} .bc"
170198 )
171- set_property (DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile} .bc)
172-
173- list (APPEND bc_files ${outfile} .bc)
174- endforeach ()
175-
176- # Link to a bitcode library.
177- add_custom_command (OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx.bc
178- COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
179- -o ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx.bc ${bc_files}
180- DEPENDS ${bc_files}
181- COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc"
182- )
183- set_property (DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc)
199+ set_property (DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm} .bc)
184200
185- add_custom_target (omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx.bc)
201+ add_custom_target (omptarget-nvptx-${sm} - bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx-sm_ ${sm} .bc)
186202
187- # Copy library to destination.
188- add_custom_command (TARGET omptarget-nvptx-bc POST_BUILD
189- COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx.bc
190- $<TARGET_FILE_DIR:omptarget-nvptx>)
191-
192- # Install device RTL under the lib destination folder.
193- install (FILES ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx.bc DESTINATION "lib" )
203+ # Copy library to destination.
204+ add_custom_command (TARGET omptarget-nvptx-${sm} -bc POST_BUILD
205+ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx-sm_${sm} .bc
206+ $<TARGET_FILE_DIR:omptarget-nvptx>)
194207
208+ # Install device RTL under the lib destination folder.
209+ install (FILES ${CMAKE_CURRENT_BINARY_DIR} /libomptarget-nvptx-sm_${sm} .bc DESTINATION "lib" )
210+ endforeach ()
195211 endif ()
196212 endif ()
197213
0 commit comments