mirror of https://github.com/AlexeyAB/darknet.git
Tensor Cores fix
This commit is contained in:
parent
14172d42b6
commit
d88a9eb0d9
|
@ -420,7 +420,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
|||
//if (state.use_mixed_precision) {
|
||||
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
|
||||
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
|
||||
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 8 && l.size > 1)
|
||||
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
|
||||
{
|
||||
//printf("\n CUDNN_HALF!!! state.index = %d \n", state.index);
|
||||
|
||||
|
@ -671,7 +671,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||
//#ifdef CUDNN_HALF
|
||||
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
|
||||
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
|
||||
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 8 && l.size > 1)
|
||||
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
|
||||
{
|
||||
const size_t input16_size = l.batch*l.c*l.w*l.h;
|
||||
const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h;
|
||||
|
|
|
@ -232,7 +232,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
|
|||
// 3. FP32 Master Copy of Weights
|
||||
// More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops
|
||||
CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups));
|
||||
//if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
|
||||
CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
|
||||
#if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2
|
||||
//CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution
|
||||
#endif
|
||||
|
|
|
@ -433,12 +433,6 @@ int main(int argc, char **argv)
|
|||
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
|
||||
#endif
|
||||
|
||||
#ifdef GPU
|
||||
show_cuda_cudnn_info();
|
||||
#endif// GPU
|
||||
|
||||
show_opencv_info();
|
||||
|
||||
int i;
|
||||
for (i = 0; i < argc; ++i) {
|
||||
if (!argv[i]) continue;
|
||||
|
@ -466,8 +460,12 @@ int main(int argc, char **argv)
|
|||
cuda_set_device(gpu_index);
|
||||
CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
|
||||
}
|
||||
|
||||
show_cuda_cudnn_info();
|
||||
#endif
|
||||
|
||||
show_opencv_info();
|
||||
|
||||
if (0 == strcmp(argv[1], "average")){
|
||||
average(argc, argv);
|
||||
} else if (0 == strcmp(argv[1], "yolo")){
|
||||
|
|
Loading…
Reference in New Issue