mirror of https://github.com/AlexeyAB/darknet.git
Compile fix
This commit is contained in:
parent
7c2f302321
commit
742bb7c7ce
2
Makefile
2
Makefile
|
@ -7,7 +7,7 @@ OPENMP=0
|
|||
LIBSO=0
|
||||
|
||||
# set GPU=1 and CUDNN=1 to speedup on GPU
|
||||
# set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision using Tensor Cores) on GPU Tesla V100, Titan V, DGX-2
|
||||
# set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
|
||||
# set AVX=1 and OPENMP=1 to speedup on CPU (if error occurs then set AVX=0)
|
||||
|
||||
DEBUG=0
|
||||
|
|
|
@ -138,8 +138,8 @@ void fast_binarize_weights_gpu(float *weights, int n, int size, float *binary, f
|
|||
__global__ void cuda_f32_to_f16(float* input_f32, size_t size, half *output_f16)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
//if (idx < size) output_f16[idx] = __float2half(input_f32[idx]);
|
||||
if (idx < size) output_f16[idx] = __float2half_rn(input_f32[idx]);
|
||||
if (idx < size) output_f16[idx] = __float2half(input_f32[idx]);
|
||||
//if (idx < size) output_f16[idx] = __float2half_rn(input_f32[idx]); // can't be compiled on Linux without casting
|
||||
// __float2half_ru, __float2half_rd, __float2half_rz, __float2half_rn
|
||||
//if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue