forked from nhardy01/RNN
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmexGPUExample.cu
More file actions
87 lines (75 loc) · 2.42 KB
/
mexGPUExample.cu
File metadata and controls
87 lines (75 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*
* Example of how to use the mxGPUArray API in a MEX file. This example shows
* how to write a MEX function that takes a gpuArray input and returns a
* gpuArray output, e.g. B=mexFunction(A).
*
* Copyright 2012 The MathWorks, Inc.
*/
#include "mex.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "gpu/mxGPUArray.h"
/*
* Device code
*/
void __global__ TimesTwo(double * const A,
int const N)
{
/* Calculate the global linear index, assuming a 1-d grid. */
int const i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N) {
A[i] = 2.0 * A[i];
}
}
/*
* Host code
*/
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, mxArray const *prhs[])
{
/* Declare all variables.*/
mxGPUArray *A;
mxGPUArray *B;
double *d_A;
double *d_B;
int N;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Choose a reasonably sized number of threads for the block. */
int const threadsPerBlock = 256;
int blocksPerGrid;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs!=1) || !(mxIsGPUArray(prhs[0]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCopyFromMxArray(prhs[0]);
/*
* Verify that A really is a double array before extracting the pointer.
*/
if (mxGPUGetClassID(A) != mxDOUBLE_CLASS) {
mexErrMsgIdAndTxt(errId, errMsg);
}
/*
* Now that we have verified the data type, extract a pointer to the input
* data on the device.
*/
d_A = (double *)(mxGPUGetData(A));
/*
* Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
* and it would be possible for the number of elements to be too large for
* the grid. For this example we are not guarding against this possibility.
*/
N = (int)(mxGPUGetNumberOfElements(A));
blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
TimesTwo<<<blocksPerGrid, threadsPerBlock>>>(d_A, N);
/* Wrap the result up as a MATLAB gpuArray for return. */
plhs[0] = mxGPUCreateMxArrayOnGPU(A);
/*
* The mxGPUArray pointers are host-side structures that refer to device
* data. These must be destroyed before leaving the MEX function.
*/
mxGPUDestroyGPUArray(A);
// mxGPUDestroyGPUArray(B);
}