mirror of https://github.com/davisking/dlib.git
Added a multiply_ layer and set it up so you can use it instead of dropout_
after training has finished.
This commit is contained in:
parent
565bed38f7
commit
919cbd1103
|
@ -692,6 +692,7 @@ namespace dlib
|
|||
) :
|
||||
drop_rate(drop_rate_)
|
||||
{
|
||||
DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1,"");
|
||||
}
|
||||
|
||||
// We have to add a copy constructor and assignment operator because the rnd object
|
||||
|
@ -771,6 +772,81 @@ namespace dlib
|
|||
template <typename SUBNET>
|
||||
using dropout = add_layer<dropout_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class multiply_
|
||||
{
|
||||
public:
|
||||
explicit multiply_(
|
||||
float val_ = 0.5
|
||||
) :
|
||||
val(val_)
|
||||
{
|
||||
}
|
||||
|
||||
multiply_ (
|
||||
const dropout_& item
|
||||
) : val(1-item.get_drop_rate()) {}
|
||||
|
||||
float get_multiply_value (
|
||||
) const { return val; }
|
||||
|
||||
template <typename SUBNET>
|
||||
void setup (const SUBNET& /*sub*/)
|
||||
{
|
||||
}
|
||||
|
||||
void forward_inplace(const tensor& input, tensor& output)
|
||||
{
|
||||
tt::affine_transform(output, input, val, 0);
|
||||
}
|
||||
|
||||
void backward_inplace(
|
||||
const tensor& gradient_input,
|
||||
tensor& data_grad,
|
||||
tensor& /*params_grad*/
|
||||
)
|
||||
{
|
||||
tt::affine_transform(data_grad, gradient_input, val, 0);
|
||||
}
|
||||
|
||||
const tensor& get_layer_params() const { return params; }
|
||||
tensor& get_layer_params() { return params; }
|
||||
|
||||
friend void serialize(const multiply_& item, std::ostream& out)
|
||||
{
|
||||
serialize("multiply_", out);
|
||||
serialize(item.val, out);
|
||||
}
|
||||
|
||||
friend void deserialize(multiply_& item, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version == "dropout_")
|
||||
{
|
||||
// Since we can build a multiply_ from a dropout_ we check if that's what
|
||||
// is in the stream and if so then just convert it right here.
|
||||
unserialize sin(version, in);
|
||||
dropout_ temp;
|
||||
deserialize(temp, sin);
|
||||
item = temp;
|
||||
return;
|
||||
}
|
||||
|
||||
if (version != "multiply_")
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::multiply_.");
|
||||
deserialize(item.val, in);
|
||||
}
|
||||
|
||||
private:
|
||||
float val;
|
||||
resizable_tensor params; // unused
|
||||
};
|
||||
|
||||
template <typename SUBNET>
|
||||
using multiply = add_layer<multiply_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class affine_
|
||||
|
|
|
@ -516,6 +516,10 @@ namespace dlib
|
|||
through the stochastic function f(x) which outputs either 0 or x. The
|
||||
probability of 0 being output is given by the drop_rate argument to this
|
||||
object's constructor.
|
||||
|
||||
Note that, after you finish training a network with dropout, it is a good
|
||||
idea to replace each dropout_ layer with a multiply_ layer because the
|
||||
multiply_ layer is faster and deterministic.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
@ -524,6 +528,8 @@ namespace dlib
|
|||
float drop_rate = 0.5
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 <= drop_rate <= 1
|
||||
ensures
|
||||
- #get_drop_rate() == drop_rate
|
||||
!*/
|
||||
|
@ -555,6 +561,64 @@ namespace dlib
|
|||
template <typename SUBNET>
|
||||
using dropout = add_layer<dropout_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class multiply_
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is an implementation of the EXAMPLE_LAYER_ interface defined above.
|
||||
In particular, it defines a basic layer that just multiplies its input
|
||||
tensor with a constant value and returns the result. It therefore has no
|
||||
learnable parameters.
|
||||
!*/
|
||||
|
||||
public:
|
||||
explicit multiply_(
|
||||
float val = 0.5
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_multiply_value() == val
|
||||
!*/
|
||||
|
||||
multiply_ (
|
||||
const dropout_& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_multiply_value() == 1-item.get_drop_rate()
|
||||
(i.e. We construct the multiply_ layer so that it is essentially a
|
||||
deterministic version of the given dropout_ layer)
|
||||
!*/
|
||||
|
||||
float get_multiply_value (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- this layer simply multiplies its input tensor by get_multiply_value() and
|
||||
produces the result as output.
|
||||
!*/
|
||||
|
||||
template <typename SUBNET> void setup (const SUBNET& sub);
|
||||
void forward_inplace(const tensor& input, tensor& output);
|
||||
void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
|
||||
const tensor& get_layer_params() const;
|
||||
tensor& get_layer_params();
|
||||
/*!
|
||||
These functions are implemented as described in the EXAMPLE_LAYER_ interface.
|
||||
!*/
|
||||
};
|
||||
|
||||
void serialize(const multiply_& item, std::ostream& out);
|
||||
void deserialize(multiply_& item, std::istream& in);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <typename SUBNET>
|
||||
using multiply = add_layer<multiply_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
enum layer_mode
|
||||
|
|
|
@ -351,7 +351,7 @@ namespace
|
|||
DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5);
|
||||
|
||||
matrix<float> truth4 = pointwise_multiply(mat(A), mat(B));
|
||||
multiply(A, A, B);
|
||||
tt::multiply(A, A, B);
|
||||
DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5);
|
||||
|
||||
matrix<float> truth5 = mat(B) > 0.1;
|
||||
|
@ -965,6 +965,11 @@ namespace
|
|||
|
||||
void test_layers()
|
||||
{
|
||||
{
|
||||
print_spinner();
|
||||
multiply_ l;
|
||||
DLIB_TEST_MSG(test_layer(l), test_layer(l));
|
||||
}
|
||||
{
|
||||
print_spinner();
|
||||
max_pool_ l;
|
||||
|
|
Loading…
Reference in New Issue