mirror of https://github.com/AlexeyAB/darknet.git
Fixed new [shortcut] layer
This commit is contained in:
parent
c2ce28bbcc
commit
c89482c39b
|
@ -63,6 +63,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size);
|
|||
void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
|
||||
void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||
void simple_copy_ongpu(int size, float *src, float *dst);
|
||||
void memcpy_ongpu(float *dst, float *src, int size_bytes);
|
||||
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
|
||||
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
|
||||
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
|
||||
|
|
|
@ -576,6 +576,12 @@ extern "C" void simple_copy_ongpu(int size, float *src, float *dst)
|
|||
CHECK_CUDA(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void memcpy_ongpu(float *dst, float *src, int size_bytes)
|
||||
{
|
||||
CHECK_CUDA(cudaMemcpyAsync(dst, src, size_bytes, cudaMemcpyDefault, get_cuda_stream()));
|
||||
CHECK_CUDA(cudaPeekAtLastError());
|
||||
}
|
||||
|
||||
extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY)
|
||||
{
|
||||
mul_kernel<<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, X, INCX, Y, INCY);
|
||||
|
|
|
@ -845,7 +845,6 @@ layer parse_shortcut(list *options, size_params params, network net)
|
|||
sizes[i] = params.net.layers[index].outputs;
|
||||
layers_output[i] = params.net.layers[index].output;
|
||||
layers_delta[i] = params.net.layers[index].delta;
|
||||
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
|
|
@ -80,7 +80,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
|
|||
for (i = 0; i < l->n; ++i) {
|
||||
int index = l->input_layers[i];
|
||||
l->input_sizes[i] = net->layers[index].outputs;
|
||||
assert(l->w == net->layers[index].w && l->h == net->layers[index].h);
|
||||
l->layers_output[i] = net->layers[index].output;
|
||||
l->layers_delta[i] = net->layers[index].delta;
|
||||
|
||||
assert(l->w == net->layers[index].out_w && l->h == net->layers[index].out_h);
|
||||
}
|
||||
|
||||
#ifdef GPU
|
||||
|
@ -91,6 +94,10 @@ void resize_shortcut_layer(layer *l, int w, int h, network *net)
|
|||
cuda_free(l->delta_gpu);
|
||||
l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch);
|
||||
}
|
||||
|
||||
memcpy_ongpu(l->input_sizes_gpu, l->input_sizes, l->n * sizeof(int));
|
||||
memcpy_ongpu(l->layers_output_gpu, l->layers_output, l->n * sizeof(float*));
|
||||
memcpy_ongpu(l->layers_delta_gpu, l->layers_delta, l->n * sizeof(float*));
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue