mirror of https://github.com/AlexeyAB/darknet.git
Fixed weights-size for Depthwise grouped convolutional
This commit is contained in:
parent
f0582446f2
commit
8c80ba6b53
|
@ -518,7 +518,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
|
|||
printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf);
|
||||
if (input_nan_inf) getchar();
|
||||
|
||||
int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.size * l.size * l.c * l.n);
|
||||
int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights);
|
||||
printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf);
|
||||
if (weights_nan_inf) getchar();
|
||||
*/
|
||||
|
@ -697,8 +697,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||
// calculate conv weight updates
|
||||
// Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum
|
||||
// so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m
|
||||
assert((l.c*l.n*l.size*l.size) > 0);
|
||||
cuda_convert_f32_to_f16(l.weight_updates_gpu, l.c*l.n*l.size*l.size, l.weight_updates_gpu16);
|
||||
assert((l.nweights) > 0);
|
||||
cuda_convert_f32_to_f16(l.weight_updates_gpu, l.nweights, l.weight_updates_gpu16);
|
||||
|
||||
CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(),
|
||||
&one,
|
||||
|
@ -714,7 +714,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||
l.dweightDesc16,
|
||||
l.weight_updates_gpu16)); // l.weight_updates_gpu);
|
||||
|
||||
cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.c*l.n*l.size*l.size, l.weight_updates_gpu);
|
||||
cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.nweights, l.weight_updates_gpu);
|
||||
|
||||
if (state.delta) {
|
||||
if (l.binary || l.xnor) swap_binary(&l);
|
||||
|
@ -856,7 +856,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
|
|||
if (state.delta) {
|
||||
fix_nan_and_inf(state.delta, l.inputs * l.batch);
|
||||
}
|
||||
int size = l.size * l.size * l.c * l.n;
|
||||
int size = l.nweights;
|
||||
fix_nan_and_inf(l.weight_updates_gpu, size);
|
||||
fix_nan_and_inf(l.weights_gpu, size);
|
||||
}
|
||||
|
|
|
@ -1001,7 +1001,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
|
|||
//size_t bit_input_size = intput_size / 8 + 1;
|
||||
//char *bit_input = calloc(bit_input_size, sizeof(char));
|
||||
|
||||
//size_t weights_size = k * m; //l.size*l.size*l.c*l.n;
|
||||
//size_t weights_size = k * m; //l.size*l.size*l.c*l.n; // l.nweights
|
||||
//size_t bit_weights_size = weights_size / 8 + 1;
|
||||
|
||||
//char *bit_weights = calloc(bit_weights_size, sizeof(char));
|
||||
|
@ -1136,7 +1136,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
|
|||
|
||||
void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
|
||||
{
|
||||
//int size = l.size*l.size*l.c*l.n;
|
||||
//int size = l.nweights;
|
||||
axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
|
||||
scal_cpu(l.n, momentum, l.bias_updates, 1);
|
||||
|
||||
|
|
|
@ -162,8 +162,8 @@ void forward_backward_network_gpu(network net, float *x, float *y)
|
|||
layer l = net.layers[i];
|
||||
if (net.cudnn_half){
|
||||
if (l.type == CONVOLUTIONAL && l.weights_gpu && l.weights_gpu16) {
|
||||
assert((l.c*l.n*l.size*l.size) > 0);
|
||||
cuda_convert_f32_to_f16(l.weights_gpu, l.c*l.n*l.size*l.size, l.weights_gpu16);
|
||||
assert((l.nweights) > 0);
|
||||
cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16);
|
||||
}
|
||||
else if (l.type == CRNN && l.input_layer->weights_gpu && l.input_layer->weights_gpu16) {
|
||||
assert((l.input_layer->c*l.input_layer->n*l.input_layer->size*l.input_layer->size) > 0);
|
||||
|
|
Loading…
Reference in New Issue