Removed unnecessary zero initialization of parameter gradients in core.h.

This commit is contained in:
Davis King 2015-11-11 17:59:40 -05:00
parent eada4be8e3
commit e2a2a26a1b
5 changed files with 15 additions and 15 deletions

View File

@ -365,7 +365,6 @@ namespace dlib
{
dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
params_grad.copy_size(details.get_layer_params());
params_grad = 0;
details.backward(get_output(), get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
// Don't try to adjust the parameters if this layer doesn't have any.
if (params_grad.size() != 0)
@ -601,7 +600,6 @@ namespace dlib
{
subnet_wrapper wsub(x, grad_final_ignored);
params_grad.copy_size(details.get_layer_params());
params_grad = 0;
details.backward(get_output(), get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
// Don't try to adjust the parameters if this layer doesn't have any.
if (params_grad.size() != 0)
@ -1605,11 +1603,11 @@ namespace dlib
// Now tell the layer to compute all the gradients. In the rest of this function
// we will just be checking that these gradients were computed correctly by
// comparing them to a central differences approximation.
resizable_tensor params_grad, random_noise;
resizable_tensor params_grad;
params_grad.copy_size(l.get_layer_params());
random_noise.copy_size(l.get_layer_params());
randomize_parameters(random_noise, 5, rnd);
params_grad = random_noise;
// Set the params grad to something crazy so that it's very obvious if it doesn't
// get fully assigned.
params_grad = std::numeric_limits<float>::infinity();
l.backward(output, input_grad, subnetwork, params_grad);
@ -1631,7 +1629,7 @@ namespace dlib
// Compute a reference derivative via a central differences approximation and
// compare it to the one output by the layer and make sure they match.
double reference_derivative = (dot(out2,input_grad)-dot(out3, input_grad))/(2*eps);
double output_derivative = params_grad.host()[i]-random_noise.host()[i];
double output_derivative = params_grad.host()[i];
double relative_error = (reference_derivative - output_derivative)/(reference_derivative + 1e-100);
if (std::abs(relative_error) > 0.01)
{

View File

@ -221,7 +221,7 @@ namespace dlib
gradient_input.size() > 0,"");
const float alpha = 1;
const float beta = 1;
const float beta = 0;
check(cudnnConvolutionBackwardBias(context(),
&alpha,
descriptor(gradient_input),
@ -483,7 +483,7 @@ namespace dlib
)
{
const float alpha = 1;
const float beta = 1;
const float beta = 0;
check(cudnnConvolutionBackwardFilter_v3(context(),
&alpha,
descriptor(data),

View File

@ -128,7 +128,7 @@ namespace dlib
- let OUT be the output of add(1,OUT,1,BIAS)
- let f(gradient_input,BIAS) == dot(gradient_input,OUT)
- Then this function computes the gradient of f() with respect to BIAS and
adds it to grad.
assigns it to grad.
!*/
// ------------------------------------------------------------------------------------
@ -219,7 +219,7 @@ namespace dlib
- let OUT be the output of (*this)(OUT,data,filters).
- let f(data,filters) == dot(OUT, gradient_input)
- This function finds the gradient of f() with respect to filters
and adds this gradient to filters_gradient.
and assigns this gradient to filters_gradient.
!*/
private:

View File

@ -92,7 +92,7 @@ namespace dlib
void backward(const tensor& , const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
{
// compute the gradient of the parameters.
params_grad += trans(mat(sub.get_output()))*mat(gradient_input);
params_grad = trans(mat(sub.get_output()))*mat(gradient_input);
// compute the gradient for the data
sub.get_gradient_input() += mat(gradient_input)*trans(mat(params));
@ -161,7 +161,9 @@ namespace dlib
for (unsigned long i = 0; i < sub.get_output().size(); ++i)
{
if (in[i] > 0)
out[i] += grad[i];
out[i] = grad[i];
else
out[i] = 0;
}
}

View File

@ -201,8 +201,8 @@ namespace dlib
draw inputs from the immediate sub layer, sub.subnet(), or
any earlier layer. So you must consider the gradients with
respect to all inputs drawn from sub)
Finally, backward() adds these gradients into the output by performing:
- params_grad += PARAMETER_GRADIENT
Finally, backward() outputs these gradients by performing:
- params_grad = PARAMETER_GRADIENT
- for all valid I:
- layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
!*/