-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathRNGexample.R
More file actions
80 lines (57 loc) · 2.06 KB
/
RNGexample.R
File metadata and controls
80 lines (57 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(RCUDA)
cat("Setting cuGetContext(TRUE)...\n")
cuGetContext(TRUE)
ptx = nvcc("random.cu", out = "random.ptx", target = "ptx",
"-arch=compute_20", "-code=sm_20,compute_20")
m = loadModule(ptx)
setup = m$setup_kernel
rnorm = m$rnorm_kernel
N = 1e8L # NOTE 'N' is of type integer
N_per_thread = 1000L
mu = 0.3
sigma = 1.5
verbose = FALSE
# setting grid and block dimensions
threads_per_block <- 1024L
block_dims <- c(threads_per_block, 1L, 1L)
grid_d <- as.integer(ceiling(sqrt((N/N_per_thread)/threads_per_block)))
grid_dims <- c(grid_d, grid_d, 1L)
cat("Grid size:\n")
print(grid_dims)
nthreads <- as.integer(prod(grid_dims)*prod(block_dims))
cat("Total number of threads to launch = ", nthreads, "\n")
if (nthreads*N_per_thread < N){
stop("Grid is not large enough...!")
}
cat("Running CUDA kernel...\n")
seed = 0L
tRNGinit <- system.time({
rng_states <- cudaMalloc(numEls=nthreads, sizeof=48L, elType="curandState")
.cuda(setup, rng_states, seed, nthreads, as.integer(verbose), gridDim=grid_dims, blockDim=block_dims)
cudaDeviceSynchronize()
})
tAlloc <- system.time({
dX = cudaMalloc(N, sizeof = 8L, elType = "double", strict = TRUE)
cudaDeviceSynchronize()
})
tCalc <- system.time({
.cuda(rnorm, rng_states, dX, N, mu, sigma, N_per_thread, gridDim=grid_dims, blockDim=block_dims,.numericAsDouble = getOption("CUDA.useDouble", TRUE))
cudaDeviceSynchronize()
})
tTransferFromGPU <- system.time({
out = copyFromDevice(obj = dX, nels = dX@nels, type = "double")
cudaDeviceSynchronize()
})
tCPU <- system.time({
out2 <- rnorm(N, mu, sigma)
})
# having RCUDA determine gridding
tCalc_gridby <- system.time({
.cuda(rnorm, rng_states, dX, N, mu, sigma, N_per_thread, gridBy = as.integer(ceiling(N/N_per_thread)), .numericAsDouble = getOption("CUDA.useDouble", TRUE))
cudaDeviceSynchronize()
})
cat("RNG initiation time: ", tRNGinit[3], "\n")
cat("GPU memory allocation time: ", tAlloc[3], "\n")
cat("Calculation time (GPU): ", tCalc[3], "\n")
cat("Transfer from GPU time: ", tTransferFromGPU[3], "\n")
cat("Calculation time (CPU): ", tCPU[3], "\n")