mirror of https://github.com/AlexeyAB/darknet.git
Minor fix for shortcut multilayer if it doesn't use weights
This commit is contained in:
parent
2a9fe045f3
commit
678ba01335
|
@ -79,7 +79,7 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
|
||||||
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
||||||
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
||||||
int step = 0;
|
int step = 0;
|
||||||
if (weights) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
||||||
|
|
||||||
int id;
|
int id;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
|
@ -148,7 +148,7 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
|
||||||
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
||||||
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
||||||
int step = 0;
|
int step = 0;
|
||||||
if (weights) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
||||||
|
|
||||||
int id;
|
int id;
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
|
|
|
@ -687,7 +687,7 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
|
||||||
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
||||||
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
||||||
int step = 0;
|
int step = 0;
|
||||||
if (weights_gpu) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
||||||
|
|
||||||
int src_id = id;
|
int src_id = id;
|
||||||
const int src_i = src_id % src_outputs;
|
const int src_i = src_id % src_outputs;
|
||||||
|
@ -762,7 +762,7 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
|
||||||
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
|
||||||
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
||||||
int step = 0;
|
int step = 0;
|
||||||
if (weights_gpu) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
||||||
|
|
||||||
int src_id = id;
|
int src_id = id;
|
||||||
const int src_i = src_id % src_outputs;
|
const int src_i = src_id % src_outputs;
|
||||||
|
@ -836,7 +836,8 @@ extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int
|
||||||
float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalizion)
|
float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalizion)
|
||||||
{
|
{
|
||||||
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
|
||||||
const int step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
int step = 0;
|
||||||
|
if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
|
||||||
//printf(" nweights = %d, n = %d, layer_step = %d, step = %d \n", nweights, n, layer_step, step);
|
//printf(" nweights = %d, n = %d, layer_step = %d, step = %d \n", nweights, n, layer_step, step);
|
||||||
|
|
||||||
//printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
|
//printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
|
||||||
|
|
|
@ -466,6 +466,8 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
show_opencv_info();
|
show_opencv_info();
|
||||||
|
|
||||||
|
init_cpu();
|
||||||
|
|
||||||
if (0 == strcmp(argv[1], "average")){
|
if (0 == strcmp(argv[1], "average")){
|
||||||
average(argc, argv);
|
average(argc, argv);
|
||||||
} else if (0 == strcmp(argv[1], "yolo")){
|
} else if (0 == strcmp(argv[1], "yolo")){
|
||||||
|
|
|
@ -1544,7 +1544,6 @@ void save_shortcut_weights(layer l, FILE *fp)
|
||||||
#endif
|
#endif
|
||||||
int num = l.nweights;
|
int num = l.nweights;
|
||||||
fwrite(l.weights, sizeof(float), num, fp);
|
fwrite(l.weights, sizeof(float), num, fp);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void save_convolutional_weights(layer l, FILE *fp)
|
void save_convolutional_weights(layer l, FILE *fp)
|
||||||
|
@ -1822,10 +1821,6 @@ void load_convolutional_weights(layer l, FILE *fp)
|
||||||
|
|
||||||
void load_shortcut_weights(layer l, FILE *fp)
|
void load_shortcut_weights(layer l, FILE *fp)
|
||||||
{
|
{
|
||||||
if (l.binary) {
|
|
||||||
//load_convolutional_weights_binary(l, fp);
|
|
||||||
//return;
|
|
||||||
}
|
|
||||||
int num = l.nweights;
|
int num = l.nweights;
|
||||||
int read_bytes;
|
int read_bytes;
|
||||||
read_bytes = fread(l.weights, sizeof(float), num, fp);
|
read_bytes = fread(l.weights, sizeof(float), num, fp);
|
||||||
|
|
|
@ -186,6 +186,7 @@ void backward_shortcut_layer(const layer l, network_state state)
|
||||||
|
|
||||||
void update_shortcut_layer(layer l, int batch, float learning_rate_init, float momentum, float decay)
|
void update_shortcut_layer(layer l, int batch, float learning_rate_init, float momentum, float decay)
|
||||||
{
|
{
|
||||||
|
if (l.nweights > 0) {
|
||||||
float learning_rate = learning_rate_init*l.learning_rate_scale;
|
float learning_rate = learning_rate_init*l.learning_rate_scale;
|
||||||
//float momentum = a.momentum;
|
//float momentum = a.momentum;
|
||||||
//float decay = a.decay;
|
//float decay = a.decay;
|
||||||
|
@ -194,6 +195,7 @@ void update_shortcut_layer(layer l, int batch, float learning_rate_init, float m
|
||||||
axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
|
||||||
axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
|
axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
|
||||||
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
|
scal_cpu(l.nweights, momentum, l.weight_updates, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GPU
|
#ifdef GPU
|
||||||
|
@ -238,6 +240,7 @@ void backward_shortcut_layer_gpu(const layer l, network_state state)
|
||||||
|
|
||||||
void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay)
|
void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay)
|
||||||
{
|
{
|
||||||
|
if (l.nweights > 0) {
|
||||||
float learning_rate = learning_rate_init*l.learning_rate_scale;
|
float learning_rate = learning_rate_init*l.learning_rate_scale;
|
||||||
//float momentum = a.momentum;
|
//float momentum = a.momentum;
|
||||||
//float decay = a.decay;
|
//float decay = a.decay;
|
||||||
|
@ -253,13 +256,14 @@ void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, flo
|
||||||
//if (l.clip) {
|
//if (l.clip) {
|
||||||
// constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1);
|
// constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1);
|
||||||
//}
|
//}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void pull_shortcut_layer(layer l)
|
void pull_shortcut_layer(layer l)
|
||||||
{
|
{
|
||||||
cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights);
|
cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights);
|
||||||
CHECK_CUDA(cudaPeekAtLastError());
|
CHECK_CUDA(cudaPeekAtLastError());
|
||||||
cudaStreamSynchronize(get_cuda_stream());
|
CHECK_CUDA(cudaStreamSynchronize(get_cuda_stream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
void push_shortcut_layer(layer l)
|
void push_shortcut_layer(layer l)
|
||||||
|
|
Loading…
Reference in New Issue