Fixed new [shortcut] layer

2020-01-07 00:01:26 +03:00 · 2020-01-07 00:01:26 +03:00 · c89482c39b
parent c2ce28bbcc
commit c89482c39b
4 changed files with 15 additions and 2 deletions
--- a/src/blas.h
+++ b/src/blas.h
@ -63,6 +63,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size);
 void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
 void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
 void simple_copy_ongpu(int size, float *src, float *dst);
+void memcpy_ongpu(float *dst, float *src, int size_bytes);
 void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
 void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
 void scal_ongpu(int N, float ALPHA, float * X, int INCX);
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -576,6 +576,12 @@ extern "C" void simple_copy_ongpu(int size, float *src, float *dst)
    CHECK_CUDA(cudaPeekAtLastError());
 }

+extern "C" void memcpy_ongpu(float *dst, float *src, int size_bytes)
+{
+    CHECK_CUDA(cudaMemcpyAsync(dst, src, size_bytes, cudaMemcpyDefault, get_cuda_stream()));
+    CHECK_CUDA(cudaPeekAtLastError());
+}
+
 extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY)
 {
    mul_kernel<<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, X, INCX, Y, INCY);
--- a/src/parser.c
+++ b/src/parser.c
@ -845,7 +845,6 @@ layer parse_shortcut(list *options, size_params params, network net)
        sizes[i] = params.net.layers[index].outputs;
        layers_output[i] = params.net.layers[index].output;
        layers_delta[i] = params.net.layers[index].delta;
-
    }

 #ifdef GPU
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@ -80,7 +80,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
    for (i = 0; i < l->n; ++i) {
        int index = l->input_layers[i];
        l->input_sizes[i] = net->layers[index].outputs;
-        assert(l->w == net->layers[index].w && l->h == net->layers[index].h);
+        l->layers_output[i] = net->layers[index].output;
+        l->layers_delta[i] = net->layers[index].delta;
+
+        assert(l->w == net->layers[index].out_w && l->h == net->layers[index].out_h);
    }

 #ifdef GPU
@ -91,6 +94,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
        cuda_free(l->delta_gpu);
        l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch);
    }
+
+    memcpy_ongpu(l->input_sizes_gpu, l->input_sizes, l->n * sizeof(int));
+    memcpy_ongpu(l->layers_output_gpu, l->layers_output, l->n * sizeof(float*));
+    memcpy_ongpu(l->layers_delta_gpu, l->layers_delta, l->n * sizeof(float*));
 #endif

 }