From c89482c39b5d0c7906b4385f65f3290dd13f22c9 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 7 Jan 2020 00:01:26 +0300 Subject: [PATCH] Fixed new [shortcut] layer --- src/blas.h | 1 + src/blas_kernels.cu | 6 ++++++ src/parser.c | 1 - src/shortcut_layer.c | 9 ++++++++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/blas.h b/src/blas.h index a2f7e5d4..9e1d5a9a 100644 --- a/src/blas.h +++ b/src/blas.h @@ -63,6 +63,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size); void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void simple_copy_ongpu(int size, float *src, float *dst); +void memcpy_ongpu(float *dst, float *src, int size_bytes); void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void scal_ongpu(int N, float ALPHA, float * X, int INCX); diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index d1ee4b80..7eec0e10 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -576,6 +576,12 @@ extern "C" void simple_copy_ongpu(int size, float *src, float *dst) CHECK_CUDA(cudaPeekAtLastError()); } +extern "C" void memcpy_ongpu(float *dst, float *src, int size_bytes) +{ + CHECK_CUDA(cudaMemcpyAsync(dst, src, size_bytes, cudaMemcpyDefault, get_cuda_stream())); + CHECK_CUDA(cudaPeekAtLastError()); +} + extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY) { mul_kernel<<>>(N, X, INCX, Y, INCY); diff --git a/src/parser.c b/src/parser.c index d4b13ccb..478c620e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -845,7 +845,6 @@ layer parse_shortcut(list *options, size_params params, network net) sizes[i] = params.net.layers[index].outputs; layers_output[i] = params.net.layers[index].output; layers_delta[i] = params.net.layers[index].delta; - } #ifdef GPU diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index f87d6b61..50234c32 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -80,7 +80,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net) for (i = 0; i < l->n; ++i) { int index = l->input_layers[i]; l->input_sizes[i] = net->layers[index].outputs; - assert(l->w == net->layers[index].w && l->h == net->layers[index].h); + l->layers_output[i] = net->layers[index].output; + l->layers_delta[i] = net->layers[index].delta; + + assert(l->w == net->layers[index].out_w && l->h == net->layers[index].out_h); } #ifdef GPU @@ -91,6 +94,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net) cuda_free(l->delta_gpu); l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); } + + memcpy_ongpu(l->input_sizes_gpu, l->input_sizes, l->n * sizeof(int)); + memcpy_ongpu(l->layers_output_gpu, l->layers_output, l->n * sizeof(float*)); + memcpy_ongpu(l->layers_delta_gpu, l->layers_delta, l->n * sizeof(float*)); #endif }