Skip to content

Commit 8301996

Browse files
committed
Fixed the clockRate property issues
1 parent 2b9ac28 commit 8301996

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

modules/module4/examples/02_multi_gpu_programming.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,9 @@ double runMultiGPUWeighted(float *h_data, int size, int numGPUs) {
209209
CUDA_CHECK(cudaGetDeviceProperties(&prop, gpu));
210210

211211
// Simple weight based on SM count and clock rate
212-
weights[gpu] = prop.multiProcessorCount * (prop.clockRate / 1000.0);
212+
int gpuClockKHz = 0;
213+
cudaDeviceGetAttribute(&gpuClockKHz, cudaDevAttrClockRate, gpu);
214+
weights[gpu] = prop.multiProcessorCount * (gpuClockKHz / 1000.0);
213215
totalWeight += weights[gpu];
214216
}
215217

modules/module5/examples/01_gpu_profiling_cuda.cu

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,9 @@ void analyzeDeviceProperties() {
271271
printf("Multiprocessors: %d\n", prop.multiProcessorCount);
272272
printf("Cores per MP: %d (estimated)\n", _ConvertSMVer2Cores(prop.major, prop.minor));
273273
printf("Total Cores: %d (estimated)\n", prop.multiProcessorCount * _ConvertSMVer2Cores(prop.major, prop.minor));
274-
printf("GPU Clock Rate: %.2f GHz\n", prop.clockRate / 1e6);
274+
int gpuClockKHz = 0;
275+
cudaDeviceGetAttribute(&gpuClockKHz, cudaDevAttrClockRate, device);
276+
printf("GPU Clock Rate: %.2f GHz\n", gpuClockKHz / 1e6);
275277
int memClockKHz = 0, busWidthBits = 0;
276278
cudaDeviceGetAttribute(&memClockKHz, cudaDevAttrMemoryClockRate, device);
277279
cudaDeviceGetAttribute(&busWidthBits, cudaDevAttrGlobalMemoryBusWidth, device);
@@ -421,7 +423,9 @@ void calculateTheoreticalLimits() {
421423
// Compute throughput estimation
422424
int coresPerSM = _ConvertSMVer2Cores(prop.major, prop.minor);
423425
int totalCores = prop.multiProcessorCount * coresPerSM;
424-
double computeThroughput = totalCores * prop.clockRate / 1e6; // GFLOPS (single precision)
426+
int gpuClockKHz = 0;
427+
cudaDeviceGetAttribute(&gpuClockKHz, cudaDevAttrClockRate, device);
428+
double computeThroughput = totalCores * gpuClockKHz / 1e6; // GFLOPS (single precision)
425429
printf("Estimated Peak Compute (SP): %.1f GFLOPS\n", computeThroughput);
426430

427431
// Roofline model breakpoint

0 commit comments

Comments
 (0)