Skip to content

Commit 5b3e406

Browse files
committed
Updated the Makefile of each module to support CUDA arch detection
1 parent 493b7b1 commit 5b3e406

File tree

9 files changed

+83
-20
lines changed

9 files changed

+83
-20
lines changed

modules/module1/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags
29-
CUDA_FLAGS = -std=c++17 -O2 -arch=sm_70
30-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_70
36+
CUDA_FLAGS = -std=c++17 -O2 $(CUDA_ARCH_FLAG)
37+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG)
3138
HIP_FLAGS = -std=c++17 -O2
3239
HIP_DEBUG_FLAGS = -std=c++17 -g
3340

modules/module2/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags
29-
CUDA_FLAGS = -std=c++17 -O2 -arch=sm_75 -lcudart -lcuda
30-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_75 -lcudart -lcuda
36+
CUDA_FLAGS = -std=c++17 -O2 $(CUDA_ARCH_FLAG) -lcudart -lcuda
37+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG) -lcudart -lcuda
3138
HIP_FLAGS = -std=c++17 -O2
3239
HIP_DEBUG_FLAGS = -std=c++17 -g
3340

modules/module3/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags
29-
CUDA_FLAGS = -std=c++17 -O2 -arch=sm_75 -lcudart -lcuda
30-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_75 -lcudart -lcuda
36+
CUDA_FLAGS = -std=c++17 -O2 $(CUDA_ARCH_FLAG) -lcudart -lcuda
37+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG) -lcudart -lcuda
3138
HIP_FLAGS = -std=c++17 -O2
3239
HIP_DEBUG_FLAGS = -std=c++17 -g
3340

modules/module4/examples/Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,17 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags
29-
CUDA_FLAGS = -std=c++17 -O2 -arch=sm_75 -rdc=true -lcudart -lcuda
30-
CUDA_DP_FLAGS = -std=c++17 -O2 -arch=sm_75 -rdc=true -lcudadevrt -lcudart -lcuda
31-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_75 -rdc=true -lcudart -lcuda
36+
CUDA_FLAGS = -std=c++17 -O2 $(CUDA_ARCH_FLAG) -rdc=true -lcudart -lcuda
37+
CUDA_DP_FLAGS = -std=c++17 -O2 $(CUDA_ARCH_FLAG) -rdc=true -lcudadevrt -lcudart -lcuda
38+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG) -rdc=true -lcudart -lcuda
3239
HIP_FLAGS = -std=c++17 -O2 -fopenmp
3340
HIP_DEBUG_FLAGS = -std=c++17 -g -fopenmp
3441

modules/module5/examples/Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags
29-
CUDA_FLAGS = -std=c++17 -O3 -arch=sm_90 -lineinfo
30-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_90
36+
CUDA_FLAGS = -std=c++17 -O3 $(CUDA_ARCH_FLAG) -lineinfo
37+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG)
3138
HIP_FLAGS = -std=c++17 -O3
3239
HIP_DEBUG_FLAGS = -std=c++17 -g
3340

@@ -241,7 +248,7 @@ validate: all
241248
@echo "Validating optimization implementations..."
242249
@echo "This will run examples with different optimization levels to verify correctness"
243250
@$(MAKE) clean
244-
@$(MAKE) CUDA_FLAGS="-std=c++17 -O0 -arch=sm_70" HIP_FLAGS="-std=c++17 -O0" all
251+
@$(MAKE) CUDA_FLAGS="-std=c++17 -O0 $(CUDA_ARCH_FLAG)" HIP_FLAGS="-std=c++17 -O0" all
245252
@echo "Running unoptimized versions for correctness baseline..."
246253
@for target in $(ALL_TARGETS); do \
247254
if [ -f $$target ]; then \

modules/module6/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ BUILD_HIP = 0
2424
GPU_VENDOR = NONE
2525
endif
2626

27+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
28+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
29+
ifeq ($(strip $(CUDA_ARCH)),)
30+
CUDA_ARCH := sm_90
31+
endif
32+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
33+
2734
# Compiler flags
28-
CUDA_FLAGS = -std=c++17 -O3 -arch=sm_90 -lineinfo
29-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_90
35+
CUDA_FLAGS = -std=c++17 -O3 $(CUDA_ARCH_FLAG) -lineinfo
36+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG)
3037
HIP_FLAGS = -std=c++17 -O3
3138
HIP_DEBUG_FLAGS = -std=c++17 -g
3239

modules/module7/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ BUILD_HIP = 0
2424
GPU_VENDOR = NONE
2525
endif
2626

27+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
28+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
29+
ifeq ($(strip $(CUDA_ARCH)),)
30+
CUDA_ARCH := sm_90
31+
endif
32+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
33+
2734
# Compiler flags
28-
CUDA_FLAGS = -std=c++17 -O3 -arch=sm_75 -lineinfo
29-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_75
35+
CUDA_FLAGS = -std=c++17 -O3 $(CUDA_ARCH_FLAG) -lineinfo
36+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG)
3037
HIP_FLAGS = -std=c++17 -O3
3138
HIP_DEBUG_FLAGS = -std=c++17 -g
3239

modules/module8/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ BUILD_HIP = 0
2424
GPU_VENDOR = NONE
2525
endif
2626

27+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
28+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
29+
ifeq ($(strip $(CUDA_ARCH)),)
30+
CUDA_ARCH := sm_90
31+
endif
32+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
33+
2734
# Compiler flags for professional-quality applications
28-
CUDA_FLAGS = -std=c++17 -O3 -arch=sm_70 -lineinfo --use_fast_math
29-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_70
35+
CUDA_FLAGS = -std=c++17 -O3 $(CUDA_ARCH_FLAG) -lineinfo --use_fast_math
36+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG)
3037
HIP_FLAGS = -std=c++17 -O3 -ffast-math
3138
HIP_DEBUG_FLAGS = -std=c++17 -g
3239

modules/module9/examples/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ BUILD_HIP = 0
2525
GPU_VENDOR = NONE
2626
endif
2727

28+
# CUDA architecture detection (prefer actual GPU via nvidia-smi; fallback sm_90)
29+
CUDA_ARCH ?= $(shell nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | awk -F. '/^[0-9]+\.[0-9]+$/ {printf "sm_%d%d", $$1, $$2}')
30+
ifeq ($(strip $(CUDA_ARCH)),)
31+
CUDA_ARCH := sm_90
32+
endif
33+
CUDA_ARCH_FLAG := -arch=$(CUDA_ARCH)
34+
2835
# Compiler flags for professional applications
29-
CUDA_FLAGS = -std=c++17 -O3 -arch=sm_70 -lineinfo --use_fast_math -DPRODUCTION_BUILD
30-
CUDA_DEBUG_FLAGS = -std=c++17 -g -G -arch=sm_70 -DDEBUG_BUILD
36+
CUDA_FLAGS = -std=c++17 -O3 $(CUDA_ARCH_FLAG) -lineinfo --use_fast_math -DPRODUCTION_BUILD
37+
CUDA_DEBUG_FLAGS = -std=c++17 -g -G $(CUDA_ARCH_FLAG) -DDEBUG_BUILD
3138
HIP_FLAGS = -std=c++17 -O3 -ffast-math -DPRODUCTION_BUILD
3239
HIP_DEBUG_FLAGS = -std=c++17 -g -DDEBUG_BUILD
3340
CXX_FLAGS = -std=c++17 -O3 -DPRODUCTION_BUILD

0 commit comments

Comments
 (0)