From 05dee78fa3c41d92eb322d8d57fb065ddebc00b4 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Mon, 17 Aug 2020 02:13:46 +0300 Subject: [PATCH] Added [convolutional] coordconv=1 for GPU-only --- build/darknet/darknet.vcxproj | 6 ++--- include/darknet.h | 1 + src/blas.h | 1 + src/blas_kernels.cu | 47 +++++++++++++++++++++++++++++++++++ src/conv_lstm_layer.c | 1 - src/convolutional_kernels.cu | 8 ++++++ src/parser.c | 1 + 7 files changed, 61 insertions(+), 4 deletions(-) diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index b685bebd..824fb57e 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -52,7 +52,7 @@ - + @@ -153,7 +153,7 @@ 64 - compute_30,sm_30;compute_75,sm_75 + compute_35,sm_35;compute_75,sm_75 @@ -302,6 +302,6 @@ - + \ No newline at end of file diff --git a/include/darknet.h b/include/darknet.h index 69d45969..0a4dec8e 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -259,6 +259,7 @@ struct layer { int maxpool_zero_nonmax; int out_channels; float reverse; + int coordconv; int flatten; int spatial; int pad; diff --git a/src/blas.h b/src/blas.h index 02ada92c..86c9801d 100644 --- a/src/blas.h +++ b/src/blas.h @@ -172,6 +172,7 @@ void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups); void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps); void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); +void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type); #endif // GPU #ifdef __cplusplus diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index bbf50eb1..b8f7aed5 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -2381,5 +2381,52 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, const int num_blocks = get_number_of_blocks(contrast_p_size, block_size); P_constrastive_f_det_kernel << > > (labels, feature_size, temperature, contrast_p, contrast_p_size); + CHECK_CUDA(cudaPeekAtLastError()); +} + + + + +__global__ void coord_conv_kernel(float *dst, int w, int h, int chan, int batch, int type) +{ + int i = blockIdx.x*blockDim.x + threadIdx.x; + + const int x = i % w; + i = i / w; + const int y = i % h; + i = i / h; + const int c = i % chan; + //i = i / chan; + //const int b = i % batch; + + if (type == 0) { + if (c == 0) { + const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1) + dst[i] = x_val; // x - coord + } + else if (c == 1) { + const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1) + dst[i] = y_val; // y - coord + } + else if (c == 2) { + const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1) + const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1) + const float rad_val = sqrtf(x_val*x_val + y_val*y_val); // [0; 1.414) + dst[i] = rad_val; // rad - coord + } + } + else if (type == 1) { + if (c >= 0 && c <= 2) { + dst[i] = 0; + } + } +} + +extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type) +{ + const int block_size = BLOCK; + const int num_blocks = get_number_of_blocks(size, block_size); + coord_conv_kernel << > > (dst, w, h, chan, b, type); + CHECK_CUDA(cudaPeekAtLastError()); } \ No newline at end of file diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c index d56eb1c1..4b0f2511 100644 --- a/src/conv_lstm_layer.c +++ b/src/conv_lstm_layer.c @@ -242,7 +242,6 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train) { - //steps = 1; layer l = { (LAYER_TYPE)0 }; l.train = train; l.batch = batch; diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index ae922eef..b1aa4e69 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -629,10 +629,18 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu); simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu); } + + if (l.coordconv) { + coord_conv_gpu(l.output_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 0); + } } void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) { + if (l.coordconv) { + coord_conv_gpu(l.delta_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 1); + } + if (l.antialiasing) { network_state s = { 0 }; s.train = state.train; diff --git a/src/parser.c b/src/parser.c index 59d71bed..fc562595 100644 --- a/src/parser.c +++ b/src/parser.c @@ -229,6 +229,7 @@ convolutional_layer parse_convolutional(list *options, size_params params) layer.angle = option_find_float_quiet(options, "angle", 15); layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0); layer.reverse = option_find_float_quiet(options, "reverse", 0); + layer.coordconv = option_find_int_quiet(options, "coordconv", 0); if(params.net.adam){ layer.B1 = params.net.B1;