Added a multiply_ layer and set it up so you can use it instead of dropout_

after training has finished.
2016-01-24 07:03:06 -05:00 · 2016-01-24 07:03:06 -05:00 · 919cbd1103
parent 565bed38f7
commit 919cbd1103
3 changed files with 146 additions and 1 deletions
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@ -692,6 +692,7 @@ namespace dlib
        ) :
            drop_rate(drop_rate_)
        {
+            DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1,"");
        }

        // We have to add a copy constructor and assignment operator because the rnd object
@ -771,6 +772,81 @@ namespace dlib
    template <typename SUBNET>
    using dropout = add_layer<dropout_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class multiply_
+    {
+    public:
+        explicit multiply_(
+            float val_ = 0.5
+        ) :
+            val(val_)
+        {
+        }
+
+        multiply_ (
+            const dropout_& item
+        ) : val(1-item.get_drop_rate()) {}
+
+        float get_multiply_value (
+        ) const { return val; }
+
+        template <typename SUBNET>
+        void setup (const SUBNET& /*sub*/)
+        {
+        }
+
+        void forward_inplace(const tensor& input, tensor& output)
+        {
+            tt::affine_transform(output, input, val, 0);
+        } 
+
+        void backward_inplace(
+            const tensor& gradient_input, 
+            tensor& data_grad, 
+            tensor& /*params_grad*/
+        )
+        {
+            tt::affine_transform(data_grad, gradient_input, val, 0);
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+        friend void serialize(const multiply_& item, std::ostream& out)
+        {
+            serialize("multiply_", out);
+            serialize(item.val, out);
+        }
+
+        friend void deserialize(multiply_& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version == "dropout_")
+            {
+                // Since we can build a multiply_ from a dropout_ we check if that's what
+                // is in the stream and if so then just convert it right here.
+                unserialize sin(version, in);
+                dropout_ temp;
+                deserialize(temp, sin);
+                item = temp;
+                return;
+            }
+
+            if (version != "multiply_")
+                throw serialization_error("Unexpected version found while deserializing dlib::multiply_.");
+            deserialize(item.val, in);
+        }
+
+    private:
+        float val;
+        resizable_tensor params; // unused
+    };
+
+    template <typename SUBNET>
+    using multiply = add_layer<multiply_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

    class affine_
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@ -516,6 +516,10 @@ namespace dlib
                through the stochastic function f(x) which outputs either 0 or x.  The
                probability of 0 being output is given by the drop_rate argument to this
                object's constructor.
+
+                Note that, after you finish training a network with dropout, it is a good
+                idea to replace each dropout_ layer with a multiply_ layer because the
+                multiply_ layer is faster and deterministic. 
        !*/

    public:
@ -524,6 +528,8 @@ namespace dlib
            float drop_rate = 0.5
        );
        /*!
+            requires
+                - 0 <= drop_rate <= 1
            ensures
                - #get_drop_rate() == drop_rate
        !*/
@ -555,6 +561,64 @@ namespace dlib
    template <typename SUBNET>
    using dropout = add_layer<dropout_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class multiply_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a basic layer that just multiplies its input
+                tensor with a constant value and returns the result.  It therefore has no
+                learnable parameters.
+        !*/
+
+    public:
+        explicit multiply_(
+            float val = 0.5
+        ); 
+        /*!
+            ensures
+                - #get_multiply_value() == val
+        !*/
+
+        multiply_ (
+            const dropout_& item
+        ); 
+        /*!
+            ensures
+                - #get_multiply_value() == 1-item.get_drop_rate()
+                  (i.e. We construct the multiply_ layer so that it is essentially a
+                  deterministic version of the given dropout_ layer)
+        !*/
+
+        float get_multiply_value (
+        ) const;
+        /*!
+            ensures
+                - this layer simply multiplies its input tensor by get_multiply_value() and
+                  produces the result as output.
+        !*/
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        void forward_inplace(const tensor& input, tensor& output);
+        void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+    void serialize(const multiply_& item, std::ostream& out);
+    void deserialize(multiply_& item, std::istream& in);
+    /*!
+        provides serialization support  
+    !*/
+
+    template <typename SUBNET>
+    using multiply = add_layer<multiply_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

    enum layer_mode
--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -351,7 +351,7 @@ namespace
        DLIB_TEST(max(abs(truth3-mat(dest))) < 1e-5);

        matrix<float> truth4 = pointwise_multiply(mat(A), mat(B));
-        multiply(A, A, B);
+        tt::multiply(A, A, B);
        DLIB_TEST(max(abs(truth4-mat(A))) < 1e-5);

        matrix<float> truth5 = mat(B) > 0.1;
@ -965,6 +965,11 @@ namespace

    void test_layers()
    {
+        {
+            print_spinner();
+            multiply_ l;
+            DLIB_TEST_MSG(test_layer(l), test_layer(l));
+        }
        {
            print_spinner();
            max_pool_ l;