Fixed new [shortcut] layer

This commit is contained in:
AlexeyAB 2020-01-07 00:01:26 +03:00
parent c2ce28bbcc
commit c89482c39b
4 changed files with 15 additions and 2 deletions

View File

@ -63,6 +63,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size);
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void simple_copy_ongpu(int size, float *src, float *dst);
void memcpy_ongpu(float *dst, float *src, int size_bytes);
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void scal_ongpu(int N, float ALPHA, float * X, int INCX);

View File

@ -576,6 +576,12 @@ extern "C" void simple_copy_ongpu(int size, float *src, float *dst)
CHECK_CUDA(cudaPeekAtLastError());
}
extern "C" void memcpy_ongpu(float *dst, float *src, int size_bytes)
{
CHECK_CUDA(cudaMemcpyAsync(dst, src, size_bytes, cudaMemcpyDefault, get_cuda_stream()));
CHECK_CUDA(cudaPeekAtLastError());
}
extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY)
{
mul_kernel<<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, X, INCX, Y, INCY);

View File

@ -845,7 +845,6 @@ layer parse_shortcut(list *options, size_params params, network net)
sizes[i] = params.net.layers[index].outputs;
layers_output[i] = params.net.layers[index].output;
layers_delta[i] = params.net.layers[index].delta;
}
#ifdef GPU

View File

@ -80,7 +80,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
for (i = 0; i < l->n; ++i) {
int index = l->input_layers[i];
l->input_sizes[i] = net->layers[index].outputs;
assert(l->w == net->layers[index].w && l->h == net->layers[index].h);
l->layers_output[i] = net->layers[index].output;
l->layers_delta[i] = net->layers[index].delta;
assert(l->w == net->layers[index].out_w && l->h == net->layers[index].out_h);
}
#ifdef GPU
@ -91,6 +94,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
cuda_free(l->delta_gpu);
l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch);
}
memcpy_ongpu(l->input_sizes_gpu, l->input_sizes, l->n * sizeof(int));
memcpy_ongpu(l->layers_output_gpu, l->layers_output, l->n * sizeof(float*));
memcpy_ongpu(l->layers_delta_gpu, l->layers_delta, l->n * sizeof(float*));
#endif
}