Added [convolutional] coordconv=1 for GPU-only

2020-08-17 02:13:46 +03:00 · 2020-08-17 02:13:46 +03:00 · 05dee78fa3
parent 0d8718a434
commit 05dee78fa3
7 changed files with 61 additions and 4 deletions
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@ -52,7 +52,7 @@
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.props" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -153,7 +153,7 @@
    </Link>
    <CudaCompile>
      <TargetMachinePlatform>64</TargetMachinePlatform>
-      <CodeGeneration>compute_30,sm_30;compute_75,sm_75</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_75,sm_75</CodeGeneration>
    </CudaCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
@ -302,6 +302,6 @@
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.targets" />
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.1.targets" />
  </ImportGroup>
 </Project>
--- a/include/darknet.h
+++ b/include/darknet.h
@ -259,6 +259,7 @@ struct layer {
    int maxpool_zero_nonmax;
    int out_channels;
    float reverse;
+    int coordconv;
    int flatten;
    int spatial;
    int pad;
--- a/src/blas.h
+++ b/src/blas.h
@ -172,6 +172,7 @@ void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size,
 void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups);
 void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps);
 void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size);
+void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type);

 #endif // GPU
 #ifdef __cplusplus
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -2381,5 +2381,52 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size,
    const int num_blocks = get_number_of_blocks(contrast_p_size, block_size);
    P_constrastive_f_det_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (labels, feature_size, temperature, contrast_p, contrast_p_size);

+    CHECK_CUDA(cudaPeekAtLastError());
+}
+
+
+
+
+__global__ void coord_conv_kernel(float *dst, int w, int h, int chan, int batch, int type)
+{
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+    const int x = i % w;
+    i = i / w;
+    const int y = i % h;
+    i = i / h;
+    const int c = i % chan;
+    //i = i / chan;
+    //const int b = i % batch;
+
+    if (type == 0) {
+        if (c == 0) {
+            const float x_val = (2.0f * x) / w - 1.0f;  // [-1; 1)
+            dst[i] = x_val; // x - coord
+        }
+        else if (c == 1) {
+            const float y_val = (2.0f * y) / h - 1.0f;  // [-1; 1)
+            dst[i] = y_val; // y - coord
+        }
+        else if (c == 2) {
+            const float x_val = (2.0f * x) / w - 1.0f;  // [-1; 1)
+            const float y_val = (2.0f * y) / h - 1.0f;  // [-1; 1)
+            const float rad_val = sqrtf(x_val*x_val + y_val*y_val);  // [0; 1.414)
+            dst[i] = rad_val; // rad - coord
+        }
+    }
+    else if (type == 1) {
+        if (c >= 0 && c <= 2) {
+            dst[i] = 0;
+        }
+    }
+}
+
+extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type)
+{
+    const int block_size = BLOCK;
+    const int num_blocks = get_number_of_blocks(size, block_size);
+    coord_conv_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (dst, w, h, chan, b, type);
+
    CHECK_CUDA(cudaPeekAtLastError());
 }
--- a/src/conv_lstm_layer.c
+++ b/src/conv_lstm_layer.c
@ -242,7 +242,6 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i

 layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train)
 {
-    //steps = 1;
    layer l = { (LAYER_TYPE)0 };
    l.train = train;
    l.batch = batch;
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@ -629,10 +629,18 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
        simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu);
        simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu);
    }
+
+    if (l.coordconv) {
+        coord_conv_gpu(l.output_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 0);
+    }
 }

 void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
+    if (l.coordconv) {
+        coord_conv_gpu(l.delta_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 1);
+    }
+
    if (l.antialiasing) {
        network_state s = { 0 };
        s.train = state.train;
--- a/src/parser.c
+++ b/src/parser.c
@ -229,6 +229,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
    layer.angle = option_find_float_quiet(options, "angle", 15);
    layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0);
    layer.reverse = option_find_float_quiet(options, "reverse", 0);
+    layer.coordconv = option_find_int_quiet(options, "coordconv", 0);

    if(params.net.adam){
        layer.B1 = params.net.B1;