Added a bias term to the assignment_function's model so the user doesn't need

to remember, or even understand, that they should add it themselves. However, this change breaks backwards compatibility with the previous serialization format for assignment_function objects.
2014-02-20 21:13:41 -05:00 · 2014-02-20 21:13:41 -05:00 · 71d4306edf
parent 5563194476
commit 71d4306edf
6 changed files with 97 additions and 29 deletions
--- a/dlib/svm/assignment_function.h
+++ b/dlib/svm/assignment_function.h
@ -33,13 +33,16 @@ namespace dlib
        {
            weights.set_size(fe.num_features());
            weights = 0;
+            bias = 0;
            force_assignment = false;
        }

        explicit assignment_function(
-            const matrix<double,0,1>& weights_
+            const matrix<double,0,1>& weights_,
+            double bias_
        ) : 
            weights(weights_),
+            bias(bias_),
            force_assignment(false)
        {
            // make sure requires clause is not broken
@ -55,10 +58,12 @@ namespace dlib

        assignment_function(
            const matrix<double,0,1>& weights_,
+            double bias_,
            const feature_extractor& fe_
        ) :
            fe(fe_),
            weights(weights_),
+            bias(bias_),
            force_assignment(false)
        {
            // make sure requires clause is not broken
@ -73,11 +78,13 @@ namespace dlib

        assignment_function(
            const matrix<double,0,1>& weights_,
+            double bias_,
            const feature_extractor& fe_,
            bool force_assignment_
        ) :
            fe(fe_),
            weights(weights_),
+            bias(bias_),
            force_assignment(force_assignment_)
        {
            // make sure requires clause is not broken
@ -96,6 +103,9 @@ namespace dlib
        const matrix<double,0,1>& get_weights (
        ) const { return weights; }

+        double get_bias (
+        ) const { return bias; }
+
        bool forces_assignment (
        ) const { return force_assignment; }

@ -130,7 +140,7 @@ namespace dlib
                    if (r < (long)lhs.size() && c < (long)rhs.size())
                    {
                        fe.get_features(lhs[r], rhs[c], feats);
-                        cost(r,c) = dot(weights, feats);
+                        cost(r,c) = dot(weights, feats) + bias;
                    }
                    else
                    {
@ -188,6 +198,7 @@ namespace dlib

        feature_extractor fe;
        matrix<double,0,1> weights;
+        double bias;
        bool force_assignment;
    };

@ -201,8 +212,11 @@ namespace dlib
        std::ostream& out
    )
    {
+        int version = 2;
+        serialize(version, out);
        serialize(item.get_feature_extractor(), out);
        serialize(item.get_weights(), out);
+        serialize(item.get_bias(), out);
        serialize(item.forces_assignment(), out);
    }

@ -218,13 +232,19 @@ namespace dlib
    {
        feature_extractor fe;
        matrix<double,0,1> weights;
+        double bias;
        bool force_assignment;
+        int version = 0;
+        deserialize(version, in);
+        if (version != 2)
+            throw serialization_error("Unexpected version found while deserializing dlib::assignment_function.");

        deserialize(fe, in);
        deserialize(weights, in);
+        deserialize(bias, in);
        deserialize(force_assignment, in);

-        item = assignment_function<feature_extractor>(weights, fe, force_assignment);
+        item = assignment_function<feature_extractor>(weights, bias, fe, force_assignment);
    }

 // ----------------------------------------------------------------------------------------
--- a/dlib/svm/assignment_function_abstract.h
+++ b/dlib/svm/assignment_function_abstract.h
@ -29,9 +29,9 @@ namespace dlib
                case it is excluded from the sum.    

                Finally, match_score() is defined as: 
-                    match_score(l,r) == dot(w, PSI(l,r))
-                where l is an element of LHS, r is an element of RHS, and
-                w is a parameter vector.
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector and bias is a scalar valued parameter.

                Therefore, a feature extractor defines how the PSI() feature vector 
                is calculated.  In particular, PSI() is defined by the get_features()
@ -140,9 +140,10 @@ namespace dlib
                case it is excluded from the sum.    

                Finally, this object supports match_score() functions of the form: 
-                    match_score(l,r) == dot(w, PSI(l,r))
-                where l is an element of LHS, r is an element of RHS, w is a parameter 
-                vector, and PSI() is defined by the feature_extractor template argument.  
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector, bias is a scalar valued parameter, and PSI() is defined by the
+                feature_extractor template argument.  

            THREAD SAFETY
                It is always safe to use distinct instances of this object in different
@ -170,11 +171,13 @@ namespace dlib
                  (i.e. it will have its default value)
                - #get_weights().size() == #get_feature_extractor().num_features()
                - #get_weights() == 0
+                - #get_bias() == 0
                - #forces_assignment() == false 
        !*/

        explicit assignment_function(
-            const matrix<double,0,1>& weights
+            const matrix<double,0,1>& weights,
+            double bias
        );
        /*!
            requires
@ -183,11 +186,13 @@ namespace dlib
                - #get_feature_extractor() == feature_extractor() 
                  (i.e. it will have its default value)
                - #get_weights() == weights
+                - #get_bias() == bias
                - #forces_assignment() == false 
        !*/

        assignment_function(
            const matrix<double,0,1>& weights,
+            double bias,
            const feature_extractor& fe
        );
        /*!
@ -196,11 +201,13 @@ namespace dlib
            ensures
                - #get_feature_extractor() == fe
                - #get_weights() == weights
+                - #get_bias() == bias
                - #forces_assignment() == false 
        !*/

        assignment_function(
            const matrix<double,0,1>& weights,
+            double bias,
            const feature_extractor& fe,
            bool force_assignment
        );
@ -210,6 +217,7 @@ namespace dlib
            ensures
                - #get_feature_extractor() == fe
                - #get_weights() == weights
+                - #get_bias() == bias
                - #forces_assignment() == force_assignment
        !*/

@ -228,6 +236,13 @@ namespace dlib
                  The length of the vector is get_feature_extractor().num_features().  
        !*/

+        double get_bias (
+        ) const;
+        /*!
+            ensures
+                - returns the bias parameter associated with this assignment function.
+        !*/
+
        bool forces_assignment (
        ) const; 
        /*!
--- a/dlib/svm/structural_assignment_trainer.h
+++ b/dlib/svm/structural_assignment_trainer.h
@ -184,9 +184,13 @@ namespace dlib

            matrix<double,0,1> weights; 

-            solver(prob, weights, num_nonnegative_weights(fe));
+            // Take the min here because we want to prevent the user from accidentally
+            // forcing the bias term to be non-negative.
+            const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe));
+            solver(prob, weights, num_nonneg);

-            return assignment_function<feature_extractor>(weights,fe,force_assignment);
+            const double bias = weights(weights.size()-1);
+            return assignment_function<feature_extractor>(colm(weights,0,weights.size()-1), bias,fe,force_assignment);

        }

--- a/dlib/svm/structural_svm_assignment_problem.h
+++ b/dlib/svm/structural_svm_assignment_problem.h
@ -14,16 +14,41 @@

 namespace dlib
 {
+    template <long n, typename T>
+    struct column_matrix_static_resize
+    {
+        typedef T type;
+    };
+
+    template <long n, typename T, long NR, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,NR,NC,MM,L> >
+    {
+        typedef matrix<T,NR+n,NC,MM,L> type;
+    };
+
+    template <long n, typename T, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,0,NC,MM,L> >
+    {
+        typedef matrix<T,0,NC,MM,L> type;
+    };
+
+    template <typename T>
+    struct add_one_to_static_feat_size
+    {
+        typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type;
+    };
+
+// ----------------------------------------------------------------------------------------

    template <
        typename feature_extractor
        >
    class structural_svm_assignment_problem : noncopyable,
-        public structural_svm_problem_threaded<matrix<double,0,1>, typename feature_extractor::feature_vector_type >
+        public structural_svm_problem_threaded<matrix<double,0,1>, typename add_one_to_static_feat_size<feature_extractor>::type >
    {
    public:
        typedef matrix<double,0,1> matrix_type;
-        typedef typename feature_extractor::feature_vector_type feature_vector_type;
+        typedef typename add_one_to_static_feat_size<feature_extractor>::type feature_vector_type;

        typedef typename feature_extractor::lhs_element lhs_element;
        typedef typename feature_extractor::rhs_element rhs_element;
@ -77,7 +102,7 @@ namespace dlib
        virtual long get_num_dimensions (
        ) const 
        {
-            return fe.num_features();
+            return fe.num_features()+1; // +1 for the bias term
        }

        virtual long get_num_samples (
@ -94,14 +119,15 @@ namespace dlib
        ) const 
        {
            typename feature_extractor::feature_vector_type feats;
-            psi.set_size(fe.num_features());
+            psi.set_size(get_num_dimensions());
            psi = 0;
            for (unsigned long i = 0; i < sample.first.size(); ++i)
            {
                if (label[i] != -1)
                {
                    fe.get_features(sample.first[i], sample.second[label[i]], feats);
-                    psi += feats;
+                    set_rowm(psi,range(0,feats.size()-1)) += feats;
+                    psi(get_num_dimensions()-1) += 1;
                }
            }
        }
@ -123,15 +149,18 @@ namespace dlib
        ) const 
        {
            psi.clear();
-            typename feature_extractor::feature_vector_type feats;
+            feature_vector_type feats;
+            int num_assignments = 0;
            for (unsigned long i = 0; i < sample.first.size(); ++i)
            {
                if (label[i] != -1)
                {
                    fe.get_features(sample.first[i], sample.second[label[i]], feats);
                    append_to_sparse_vect(psi, feats);
+                    ++num_assignments;
                }
            }
+            psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments));
        }

        virtual void get_truth_joint_feature_vector (
@ -176,7 +205,8 @@ namespace dlib
                        if (c < (long)samples[idx].second.size())
                        {
                            fe.get_features(samples[idx].first[r], samples[idx].second[c], feats);
-                            cost(r,c) = dot(current_solution, feats);
+                            const double bias = current_solution(current_solution.size()-1);
+                            cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias;

                            // add in the loss since this corresponds to an incorrect prediction.
                            if (c != labels[idx][r])
--- a/dlib/svm/structural_svm_assignment_problem_abstract.h
+++ b/dlib/svm/structural_svm_assignment_problem_abstract.h
@ -27,9 +27,9 @@ namespace dlib
                the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h.

            WHAT THIS OBJECT REPRESENTS
-                This object is a tool for learning the weight vector needed to use
-                an assignment_function object.  It learns the parameter vector by 
-                formulating the problem as a structural SVM problem.  
+                This object is a tool for learning the parameters needed to use an
+                assignment_function object.  It learns the parameters by formulating the
+                problem as a structural SVM problem.  
        !*/

    public:
@ -56,8 +56,8 @@ namespace dlib
                - This object attempts to learn a mapping from the given samples to the 
                  given labels.  In particular, it attempts to learn to predict labels[i] 
                  based on samples[i].  Or in other words, this object can be used to learn 
-                  a parameter vector, w, such that an assignment_function declared as:
-                    assignment_function<feature_extractor> assigner(w,fe,force_assignment)
+                  a parameter vector and bias, w and b, such that an assignment_function declared as:
+                    assignment_function<feature_extractor> assigner(w,b,fe,force_assignment)
                  results in an assigner object which attempts to compute the following mapping:
                    labels[i] == labeler(samples[i])
                - This object will use num_threads threads during the optimization 
--- a/dlib/test/assignment_learning.cpp
+++ b/dlib/test/assignment_learning.cpp
@ -29,14 +29,14 @@ namespace

    struct feature_extractor_dense
    {
-        typedef matrix<double,4,1> feature_vector_type;
+        typedef matrix<double,3,1> feature_vector_type;

        typedef ::lhs_element lhs_element;
        typedef ::rhs_element rhs_element;

        unsigned long num_features() const
        {
-            return 4;
+            return 3;
        }

        void get_features (
@ -45,7 +45,7 @@ namespace
            feature_vector_type& feats
        ) const
        {
-            feats = join_cols(squared(left - right), ones_matrix<double>(1,1));
+            feats = squared(left - right);
        }

    };
@ -64,7 +64,7 @@ namespace

        unsigned long num_features() const
        {
-            return 4;
+            return 3;
        }

        void get_features (
@ -77,7 +77,6 @@ namespace
            feats.push_back(make_pair(0,squared(left-right)(0)));
            feats.push_back(make_pair(1,squared(left-right)(1)));
            feats.push_back(make_pair(2,squared(left-right)(2)));
-            feats.push_back(make_pair(3,1.0));
        }

    };