From 3718b3a30adb6dd61c51564cecb936535019dd8d Mon Sep 17 00:00:00 2001 From: Jun Shi Date: Tue, 19 Apr 2016 13:32:26 -0700 Subject: [PATCH] limit number of available GPUs --- src/main/scala/BIDMat/GMat.scala | 20 +++++++++-------- src/main/scala/BIDMat/Mat.scala | 37 ++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/main/scala/BIDMat/GMat.scala b/src/main/scala/BIDMat/GMat.scala index bb9ac557..aa430cd4 100755 --- a/src/main/scala/BIDMat/GMat.scala +++ b/src/main/scala/BIDMat/GMat.scala @@ -1859,28 +1859,28 @@ object GMat { } - def setGPU(i:Int) = jcuda.runtime.JCuda.cudaSetDevice(i) + def setGPU(i:Int) = jcuda.runtime.JCuda.cudaSetDevice(Mat.cudaDeviceIndexMap(i)) def getGPU:Int = { val ar = Array[Int](1) jcuda.runtime.JCuda.cudaGetDevice(ar) - ar(0) + Mat.cudaDeviceInverseIndexMap(ar(0)) } def connect(i:Int) = { - val v0 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(i,0) + val v0 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(Mat.cudaDeviceIndexMap(i),0) val j = getGPU setGPU(i) - val v1 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(j,0) + val v1 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(Mat.cudaDeviceInverseIndexMap(j),0) setGPU(j) (v0, v1) } def disconnect(i:Int) = { - val v0 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(i) + val v0 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(Mat.cudaDeviceIndexMap(i)) val j = getGPU setGPU(i) - val v1 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(j) + val v1 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(Mat.cudaDeviceInverseIndexMap(j)) setGPU(j) (v0, v1) } @@ -1888,9 +1888,11 @@ object GMat { def canconnect(i:Int) = { val ar = Array[Int](1) val j = getGPU - jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, i, j) - val v0 = ar(0) - jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, j, i) + val mi: Int = Mat.cudaDeviceIndexMap(i) + val mj: Int = Mat.cudaDeviceIndexMap(j) + jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, mi, mj) + val v0 = ar(0) + jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, mj, mi) (v0, ar(0)) } diff --git a/src/main/scala/BIDMat/Mat.scala b/src/main/scala/BIDMat/Mat.scala index 2150ae6e..409d9c86 100755 --- a/src/main/scala/BIDMat/Mat.scala +++ b/src/main/scala/BIDMat/Mat.scala @@ -630,7 +630,11 @@ object Mat { var recycleGrow = 1.2 // For caching, amount to grow re-allocated matrices var hasCUDA = 0 // Number of available CUDA GPUs - + + var cudaDeviceIndexMap = HashMap.empty[Int, Int] // from logical index to physical index + + var cudaDeviceInverseIndexMap = HashMap.empty[Int, Int] // from physical index to logical index + var useBLAS = true; var useMKL = true; // Use MKL libs @@ -854,7 +858,11 @@ object Mat { def checkCUDA:Unit = checkCUDA(false); - def checkCUDA(verbose:Boolean):Unit = { + def checkCUDA(verbose:Boolean):Unit = checkCUDA(verbose, -1); + + def checkCUDA(numGPUs: Int):Unit = checkCUDA(false, numGPUs); + + def checkCUDA(verbose:Boolean, numGPUs: Int):Unit = { if (hasCUDA == 0) { val os = System.getProperty("os.name"); try { @@ -901,8 +909,7 @@ object Mat { if (hasCUDA >= 0) { try { var cudanum = new Array[Int](1); - jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum); - hasCUDA = cudanum(0); + findGPUs(numGPUs) printf("%d CUDA device%s found", hasCUDA, if (hasCUDA == 1) "" else "s"); if (hasCUDA > 0) { jcuda.runtime.JCuda.cudaRuntimeGetVersion(cudanum); @@ -926,6 +933,28 @@ object Mat { } } + def findGPUs(numGPUs: Int): Unit = { + val cudanum = new Array[Int](1); + jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum); + val minNumGPUs = if (numGPUs < -1) cudanum(0) else Math.min(numGPUs, cudanum(0)) + var i = 0 + var j = 0 + var continue = true + val ptr: jcuda.Pointer = new jcuda.Pointer() + while(continue) { + if ((jcuda.runtime.cudaError.cudaSuccess == jcuda.runtime.JCuda.cudaSetDevice(j)) && + (jcuda.runtime.cudaError.cudaSuccess == jcuda.runtime.JCuda.cudaFree(ptr))) { + cudaDeviceIndexMap += (i -> j) + cudaDeviceInverseIndexMap += (j -> i) + println("Map logical device #" + i + " --> physical device #" + j) + i += 1 + } + j += 1 + if ((j >= cudanum(0)) || (i >= minNumGPUs)) continue = false + } + hasCUDA = i + } + def copyToIntArray[@specialized(Double, Float, Long, Byte, Short) T](data:Array[T], i0:Int, idata:Array[Int], d0:Int, n:Int) (implicit numeric : Numeric[T]) = { var i = 0