diff --git a/dlib/dnn/layers.h b/dlib/dnn/layers.h index 8c58d810f..4ef8458e7 100644 --- a/dlib/dnn/layers.h +++ b/dlib/dnn/layers.h @@ -692,6 +692,7 @@ namespace dlib ) : drop_rate(drop_rate_) { + DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1,""); } // We have to add a copy constructor and assignment operator because the rnd object @@ -771,6 +772,81 @@ namespace dlib template using dropout = add_layer; +// ---------------------------------------------------------------------------------------- + + class multiply_ + { + public: + explicit multiply_( + float val_ = 0.5 + ) : + val(val_) + { + } + + multiply_ ( + const dropout_& item + ) : val(1-item.get_drop_rate()) {} + + float get_multiply_value ( + ) const { return val; } + + template + void setup (const SUBNET& /*sub*/) + { + } + + void forward_inplace(const tensor& input, tensor& output) + { + tt::affine_transform(output, input, val, 0); + } + + void backward_inplace( + const tensor& gradient_input, + tensor& data_grad, + tensor& /*params_grad*/ + ) + { + tt::affine_transform(data_grad, gradient_input, val, 0); + } + + const tensor& get_layer_params() const { return params; } + tensor& get_layer_params() { return params; } + + friend void serialize(const multiply_& item, std::ostream& out) + { + serialize("multiply_", out); + serialize(item.val, out); + } + + friend void deserialize(multiply_& item, std::istream& in) + { + std::string version; + deserialize(version, in); + if (version == "dropout_") + { + // Since we can build a multiply_ from a dropout_ we check if that's what + // is in the stream and if so then just convert it right here. + unserialize sin(version, in); + dropout_ temp; + deserialize(temp, sin); + item = temp; + return; + } + + if (version != "multiply_") + throw serialization_error("Unexpected version found while deserializing dlib::multiply_."); + deserialize(item.val, in); + } + + private: + float val; + resizable_tensor params; // unused + }; + + template + using multiply = add_layer; + // ---------------------------------------------------------------------------------------- class affine_ diff --git a/dlib/dnn/layers_abstract.h b/dlib/dnn/layers_abstract.h index 1788bb320..68661645d 100644 --- a/dlib/dnn/layers_abstract.h +++ b/dlib/dnn/layers_abstract.h @@ -516,6 +516,10 @@ namespace dlib through the stochastic function f(x) which outputs either 0 or x. The probability of 0 being output is given by the drop_rate argument to this object's constructor. + + Note that, after you finish training a network with dropout, it is a good + idea to replace each dropout_ layer with a multiply_ layer because the + multiply_ layer is faster and deterministic. !*/ public: @@ -524,6 +528,8 @@ namespace dlib float drop_rate = 0.5 ); /*! + requires + - 0 <= drop_rate <= 1 ensures - #get_drop_rate() == drop_rate !*/ @@ -555,6 +561,64 @@ namespace dlib template using dropout = add_layer; +// ---------------------------------------------------------------------------------------- + + class multiply_ + { + /*! + WHAT THIS OBJECT REPRESENTS + This is an implementation of the EXAMPLE_LAYER_ interface defined above. + In particular, it defines a basic layer that just multiplies its input + tensor with a constant value and returns the result. It therefore has no + learnable parameters. + !*/ + + public: + explicit multiply_( + float val = 0.5 + ); + /*! + ensures + - #get_multiply_value() == val + !*/ + + multiply_ ( + const dropout_& item + ); + /*! + ensures + - #get_multiply_value() == 1-item.get_drop_rate() + (i.e. We construct the multiply_ layer so that it is essentially a + deterministic version of the given dropout_ layer) + !*/ + + float get_multiply_value ( + ) const; + /*! + ensures + - this layer simply multiplies its input tensor by get_multiply_value() and + produces the result as output. + !*/ + + template void setup (const SUBNET& sub); + void forward_inplace(const tensor& input, tensor& output); + void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad); + const tensor& get_layer_params() const; + tensor& get_layer_params(); + /*! + These functions are implemented as described in the EXAMPLE_LAYER_ interface. + !*/ + }; + + void serialize(const multiply_& item, std::ostream& out); + void deserialize(multiply_& item, std::istream& in); + /*! + provides serialization support + !*/ + + template + using multiply = add_layer; + // ---------------------------------------------------------------------------------------- enum layer_mode diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index ddef99f2b..67950850c 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -351,7 +351,7 @@ namespace DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5); matrix truth4 = pointwise_multiply(mat(A), mat(B)); - multiply(A, A, B); + tt::multiply(A, A, B); DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5); matrix truth5 = mat(B) > 0.1; @@ -965,6 +965,11 @@ namespace void test_layers() { + { + print_spinner(); + multiply_ l; + DLIB_TEST_MSG(test_layer(l), test_layer(l)); + } { print_spinner(); max_pool_ l;