From d88a9eb0d90b07ca2ea076210d0b83180c91667a Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 15 Jan 2020 14:38:09 +0300 Subject: [PATCH] Tensor Cores fix --- src/convolutional_kernels.cu | 4 ++-- src/convolutional_layer.c | 2 +- src/darknet.c | 10 ++++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 1a00c189..868bb599 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -420,7 +420,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) //if (state.use_mixed_precision) { int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) && - (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 8 && l.size > 1) + (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1) { //printf("\n CUDNN_HALF!!! state.index = %d \n", state.index); @@ -671,7 +671,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state //#ifdef CUDNN_HALF int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) && - (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 8 && l.size > 1) + (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1) { const size_t input16_size = l.batch*l.c*l.w*l.h; const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 458a11a7..83e9f2ab 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -232,7 +232,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_ // 3. FP32 Master Copy of Weights // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups)); - //if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH)); + CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH)); #if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2 //CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution #endif diff --git a/src/darknet.c b/src/darknet.c index 44227bca..ab5931da 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -433,12 +433,6 @@ int main(int argc, char **argv) _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); #endif -#ifdef GPU - show_cuda_cudnn_info(); -#endif// GPU - - show_opencv_info(); - int i; for (i = 0; i < argc; ++i) { if (!argv[i]) continue; @@ -466,8 +460,12 @@ int main(int argc, char **argv) cuda_set_device(gpu_index); CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); } + + show_cuda_cudnn_info(); #endif + show_opencv_info(); + if (0 == strcmp(argv[1], "average")){ average(argc, argv); } else if (0 == strcmp(argv[1], "yolo")){