Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "THCCachingHostAllocator.h"
#include "THCSleep.h"
#include "THCTensorRandom.h"
#include "THCHalf.h" // for CUDA_HALF_TENSOR

extern void cutorch_CudaByteStorage_init(lua_State* L);
extern void cutorch_CudaCharStorage_init(lua_State* L);
Expand Down Expand Up @@ -721,7 +720,6 @@ static int cutorch_getMemoryUsage(lua_State *L) {

static int cutorch_setDevice(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int device = (int)luaL_checknumber(L, 1)-1;
THCudaCheck(cudaSetDevice(device));
return 0;
Expand Down Expand Up @@ -1091,6 +1089,10 @@ int luaopen_libcutorch(lua_State *L)
#endif
lua_setfield(L, -2, "hasHalf");

/* true fp16 vs pseudo-fp16 mode: this one is per device */
lua_pushboolean(L, THC_nativeHalfInstructions(state));
lua_setfield(L, -2, "hasHalfInstructions");

/* store gpu driver version in field */
int driverVersion;
THCudaCheck(cudaDriverGetVersion(&driverVersion));
Expand Down
63 changes: 58 additions & 5 deletions lib/THC/THCHalf.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
#ifndef THC_HALF_CONVERSION_INC
#define THC_HALF_CONVERSION_INC
# define THC_HALF_CONVERSION_INC

#include "THCGeneral.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include "cuda_fp16.h"

/* We compile with CudaHalfTensor support if we have this: */
#if CUDA_VERSION >= 7050 || CUDA_HAS_FP16
#define CUDA_HALF_TENSOR 1
# define CUDA_HALF_TENSOR 1
#endif

#ifdef CUDA_HALF_TENSOR

#include <cuda_fp16.h>
#include "THCGeneral.h"
#include "THHalf.h"

#include <stdint.h>

THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, ptrdiff_t len);
Expand All @@ -24,6 +29,54 @@ THC_API int THC_nativeHalfInstructions(THCState *state);
/* Check for performant native fp16 support on the current device */
THC_API int THC_fastHalfInstructions(THCState *state);

#endif /* CUDA_HALF_TENSOR */
# if defined (__CUDA_ARCH__)
/* use instrintic functons defined for device only in cuda_fp16.h */
# define THC_FLOAT_TO_HALF(x) __float2half((float)x)
# define THC_HALF_TO_FLOAT(x) __half2float(x)
# define THC_DECL __host__ __device__ __forceinline__
# else
/* use host conversion functions */
# define THC_FLOAT_TO_HALF(x) THC_float2half((float)x)
# define THC_HALF_TO_FLOAT(x) THC_half2float(x)
# define THC_DECL inline
# endif

#if __CUDA_ARCH__ == 600 || __CUDA_ARCH__ >= 620
# define CUDA_HALF_INSTRUCTIONS 1
#endif

#if defined (__cplusplus__) || defined (__CUDACC__)

/// `half` has some type conversion issues associated with it, since it
/// is a struct without a constructor/implicit conversion constructor.
/// We use this to convert scalar values to the given type that the
/// tensor expects.

template <typename In, typename Out>
struct ScalarConvert {
static THC_DECL Out to(const In& v) { return Out(v); }
};

template <typename Out>
struct ScalarConvert<half, Out> {
static THC_DECL Out to(const half& v) {
return (Out) THC_HALF_TO_FLOAT(v);
}
};

template <typename In>
struct ScalarConvert<In, half> {
static THC_DECL half to(const In& v) {
return THC_FLOAT_TO_HALF(v);
}
};

template <>
struct ScalarConvert<half, half> {
static THC_DECL const half& to(const half& v) {
return v;
}
};
# endif /* __cplusplus__ */
# endif /* CUDA_HALF_TENSOR */
#endif /* THC_HALF_CONVERSION_INC */
Loading