From 05dee78fa3c41d92eb322d8d57fb065ddebc00b4 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 17 Aug 2020 02:13:46 +0300
Subject: [PATCH] Added [convolutional] coordconv=1 for GPU-only

---
 build/darknet/darknet.vcxproj |  6 ++---
 include/darknet.h             |  1 +
 src/blas.h                    |  1 +
 src/blas_kernels.cu           | 47 +++++++++++++++++++++++++++++++++++
 src/conv_lstm_layer.c         |  1 -
 src/convolutional_kernels.cu  |  8 ++++++
 src/parser.c                  |  1 +
 7 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index b685bebd..824fb57e 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -52,7 +52,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -153,7 +153,7 @@
     </Link>
     <CudaCompile>
       <TargetMachinePlatform>64</TargetMachinePlatform>
-      <CodeGeneration>compute_30,sm_30;compute_75,sm_75</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_75,sm_75</CodeGeneration>
     </CudaCompile>
   </ItemDefinitionGroup>
   <ItemGroup>
@@ -302,6 +302,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
   </ImportGroup>
 </Project>
\ No newline at end of file
diff --git a/include/darknet.h b/include/darknet.h
index 69d45969..0a4dec8e 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -259,6 +259,7 @@ struct layer {
     int maxpool_zero_nonmax;
     int out_channels;
     float reverse;
+    int coordconv;
     int flatten;
     int spatial;
     int pad;
diff --git a/src/blas.h b/src/blas.h
index 02ada92c..86c9801d 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -172,6 +172,7 @@ void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size,
 void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
 void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps);
 void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size);
+void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type);
 
 #endif // GPU
 #ifdef __cplusplus
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index bbf50eb1..b8f7aed5 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -2381,5 +2381,52 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size,
     const int num_blocks = get_number_of_blocks(contrast_p_size, block_size);
     P_constrastive_f_det_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (labels, feature_size, temperature, contrast_p, contrast_p_size);
 
+    CHECK_CUDA(cudaPeekAtLastError());
+}
+
+
+
+
+__global__ void coord_conv_kernel(float *dst, int w, int h, int chan, int batch, int type)
+{
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+    const int x = i % w;
+    i = i / w;
+    const int y = i % h;
+    i = i / h;
+    const int c = i % chan;
+    //i = i / chan;
+    //const int b = i % batch;
+
+    if (type == 0) {
+        if (c == 0) {
+            const float x_val = (2.0f * x) / w - 1.0f;  // [-1; 1)
+            dst[i] = x_val; // x - coord
+        }
+        else if (c == 1) {
+            const float y_val = (2.0f * y) / h - 1.0f;  // [-1; 1)
+            dst[i] = y_val; // y - coord
+        }
+        else if (c == 2) {
+            const float x_val = (2.0f * x) / w - 1.0f;  // [-1; 1)
+            const float y_val = (2.0f * y) / h - 1.0f;  // [-1; 1)
+            const float rad_val = sqrtf(x_val*x_val + y_val*y_val);  // [0; 1.414)
+            dst[i] = rad_val; // rad - coord
+        }
+    }
+    else if (type == 1) {
+        if (c >= 0 && c <= 2) {
+            dst[i] = 0;
+        }
+    }
+}
+
+extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type)
+{
+    const int block_size = BLOCK;
+    const int num_blocks = get_number_of_blocks(size, block_size);
+    coord_conv_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (dst, w, h, chan, b, type);
+
     CHECK_CUDA(cudaPeekAtLastError());
 }
\ No newline at end of file
diff --git a/src/conv_lstm_layer.c b/src/conv_lstm_layer.c
index d56eb1c1..4b0f2511 100644
--- a/src/conv_lstm_layer.c
+++ b/src/conv_lstm_layer.c
@@ -242,7 +242,6 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
 
 layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train)
 {
-    //steps = 1;
     layer l = { (LAYER_TYPE)0 };
     l.train = train;
     l.batch = batch;
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index ae922eef..b1aa4e69 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -629,10 +629,18 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
         simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu);
         simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu);
     }
+
+    if (l.coordconv) {
+        coord_conv_gpu(l.output_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 0);
+    }
 }
 
 void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
+    if (l.coordconv) {
+        coord_conv_gpu(l.delta_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 1);
+    }
+
     if (l.antialiasing) {
         network_state s = { 0 };
         s.train = state.train;
diff --git a/src/parser.c b/src/parser.c
index 59d71bed..fc562595 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -229,6 +229,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
     layer.angle = option_find_float_quiet(options, "angle", 15);
     layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0);
     layer.reverse = option_find_float_quiet(options, "reverse", 0);
+    layer.coordconv = option_find_int_quiet(options, "coordconv", 0);
 
     if(params.net.adam){
         layer.B1 = params.net.B1;