Added a multiply_ layer and set it up so you can use it instead of dropout_

after training has finished.
This commit is contained in:
Davis King 2016-01-24 07:03:06 -05:00
parent 565bed38f7
commit 919cbd1103
3 changed files with 146 additions and 1 deletions

View File

@ -692,6 +692,7 @@ namespace dlib
) :
drop_rate(drop_rate_)
{
DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1,"");
}
// We have to add a copy constructor and assignment operator because the rnd object
@ -771,6 +772,81 @@ namespace dlib
template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>;
// ----------------------------------------------------------------------------------------
class multiply_
{
public:
explicit multiply_(
float val_ = 0.5
) :
val(val_)
{
}
multiply_ (
const dropout_& item
) : val(1-item.get_drop_rate()) {}
float get_multiply_value (
) const { return val; }
template <typename SUBNET>
void setup (const SUBNET& /*sub*/)
{
}
void forward_inplace(const tensor& input, tensor& output)
{
tt::affine_transform(output, input, val, 0);
}
void backward_inplace(
const tensor& gradient_input,
tensor& data_grad,
tensor& /*params_grad*/
)
{
tt::affine_transform(data_grad, gradient_input, val, 0);
}
const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }
friend void serialize(const multiply_& item, std::ostream& out)
{
serialize("multiply_", out);
serialize(item.val, out);
}
friend void deserialize(multiply_& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version == "dropout_")
{
// Since we can build a multiply_ from a dropout_ we check if that's what
// is in the stream and if so then just convert it right here.
unserialize sin(version, in);
dropout_ temp;
deserialize(temp, sin);
item = temp;
return;
}
if (version != "multiply_")
throw serialization_error("Unexpected version found while deserializing dlib::multiply_.");
deserialize(item.val, in);
}
private:
float val;
resizable_tensor params; // unused
};
template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>;
// ----------------------------------------------------------------------------------------
class affine_

View File

@ -516,6 +516,10 @@ namespace dlib
through the stochastic function f(x) which outputs either 0 or x. The
probability of 0 being output is given by the drop_rate argument to this
object's constructor.
Note that, after you finish training a network with dropout, it is a good
idea to replace each dropout_ layer with a multiply_ layer because the
multiply_ layer is faster and deterministic.
!*/
public:
@ -524,6 +528,8 @@ namespace dlib
float drop_rate = 0.5
);
/*!
requires
- 0 <= drop_rate <= 1
ensures
- #get_drop_rate() == drop_rate
!*/
@ -555,6 +561,64 @@ namespace dlib
template <typename SUBNET>
using dropout = add_layer<dropout_, SUBNET>;
// ----------------------------------------------------------------------------------------
class multiply_
{
/*!
WHAT THIS OBJECT REPRESENTS
This is an implementation of the EXAMPLE_LAYER_ interface defined above.
In particular, it defines a basic layer that just multiplies its input
tensor with a constant value and returns the result. It therefore has no
learnable parameters.
!*/
public:
explicit multiply_(
float val = 0.5
);
/*!
ensures
- #get_multiply_value() == val
!*/
multiply_ (
const dropout_& item
);
/*!
ensures
- #get_multiply_value() == 1-item.get_drop_rate()
(i.e. We construct the multiply_ layer so that it is essentially a
deterministic version of the given dropout_ layer)
!*/
float get_multiply_value (
) const;
/*!
ensures
- this layer simply multiplies its input tensor by get_multiply_value() and
produces the result as output.
!*/
template <typename SUBNET> void setup (const SUBNET& sub);
void forward_inplace(const tensor& input, tensor& output);
void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
const tensor& get_layer_params() const;
tensor& get_layer_params();
/*!
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
!*/
};
void serialize(const multiply_& item, std::ostream& out);
void deserialize(multiply_& item, std::istream& in);
/*!
provides serialization support
!*/
template <typename SUBNET>
using multiply = add_layer<multiply_, SUBNET>;
// ----------------------------------------------------------------------------------------
enum layer_mode

View File

@ -351,7 +351,7 @@ namespace
DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5);
matrix<float> truth4 = pointwise_multiply(mat(A), mat(B));
multiply(A, A, B);
tt::multiply(A, A, B);
DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5);
matrix<float> truth5 = mat(B) > 0.1;
@ -965,6 +965,11 @@ namespace
void test_layers()
{
{
print_spinner();
multiply_ l;
DLIB_TEST_MSG(test_layer(l), test_layer(l));
}
{
print_spinner();
max_pool_ l;