From 71d4306edfd3e170c7cf2013ce0b0427662a25db Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Thu, 20 Feb 2014 21:13:41 -0500
Subject: [PATCH] Added a bias term to the assignment_function's model so the
 user doesn't need to remember, or even understand, that they should add it
 themselves.  However, this change breaks backwards compatibility with the
 previous serialization format for assignment_function objects.

---
 dlib/svm/assignment_function.h                | 26 +++++++++--
 dlib/svm/assignment_function_abstract.h       | 29 +++++++++---
 dlib/svm/structural_assignment_trainer.h      |  8 +++-
 dlib/svm/structural_svm_assignment_problem.h  | 44 ++++++++++++++++---
 ...ructural_svm_assignment_problem_abstract.h | 10 ++---
 dlib/test/assignment_learning.cpp             |  9 ++--
 6 files changed, 97 insertions(+), 29 deletions(-)

diff --git a/dlib/svm/assignment_function.h b/dlib/svm/assignment_function.h
index 4b6a53ccb..b435c0ad0 100644
--- a/dlib/svm/assignment_function.h
+++ b/dlib/svm/assignment_function.h
@@ -33,13 +33,16 @@ namespace dlib
         {
             weights.set_size(fe.num_features());
             weights = 0;
+            bias = 0;
             force_assignment = false;
         }
 
         explicit assignment_function(
-            const matrix<double,0,1>& weights_
+            const matrix<double,0,1>& weights_,
+            double bias_
         ) : 
             weights(weights_),
+            bias(bias_),
             force_assignment(false)
         {
             // make sure requires clause is not broken
@@ -55,10 +58,12 @@ namespace dlib
 
         assignment_function(
             const matrix<double,0,1>& weights_,
+            double bias_,
             const feature_extractor& fe_
         ) :
             fe(fe_),
             weights(weights_),
+            bias(bias_),
             force_assignment(false)
         {
             // make sure requires clause is not broken
@@ -73,11 +78,13 @@ namespace dlib
 
         assignment_function(
             const matrix<double,0,1>& weights_,
+            double bias_,
             const feature_extractor& fe_,
             bool force_assignment_
         ) :
             fe(fe_),
             weights(weights_),
+            bias(bias_),
             force_assignment(force_assignment_)
         {
             // make sure requires clause is not broken
@@ -96,6 +103,9 @@ namespace dlib
         const matrix<double,0,1>& get_weights (
         ) const { return weights; }
 
+        double get_bias (
+        ) const { return bias; }
+
         bool forces_assignment (
         ) const { return force_assignment; }
 
@@ -130,7 +140,7 @@ namespace dlib
                     if (r < (long)lhs.size() && c < (long)rhs.size())
                     {
                         fe.get_features(lhs[r], rhs[c], feats);
-                        cost(r,c) = dot(weights, feats);
+                        cost(r,c) = dot(weights, feats) + bias;
                     }
                     else
                     {
@@ -188,6 +198,7 @@ namespace dlib
 
         feature_extractor fe;
         matrix<double,0,1> weights;
+        double bias;
         bool force_assignment;
     };
 
@@ -201,8 +212,11 @@ namespace dlib
         std::ostream& out
     )
     {
+        int version = 2;
+        serialize(version, out);
         serialize(item.get_feature_extractor(), out);
         serialize(item.get_weights(), out);
+        serialize(item.get_bias(), out);
         serialize(item.forces_assignment(), out);
     }
 
@@ -218,13 +232,19 @@ namespace dlib
     {
         feature_extractor fe;
         matrix<double,0,1> weights;
+        double bias;
         bool force_assignment;
+        int version = 0;
+        deserialize(version, in);
+        if (version != 2)
+            throw serialization_error("Unexpected version found while deserializing dlib::assignment_function.");
 
         deserialize(fe, in);
         deserialize(weights, in);
+        deserialize(bias, in);
         deserialize(force_assignment, in);
 
-        item = assignment_function<feature_extractor>(weights, fe, force_assignment);
+        item = assignment_function<feature_extractor>(weights, bias, fe, force_assignment);
     }
 
 // ----------------------------------------------------------------------------------------
diff --git a/dlib/svm/assignment_function_abstract.h b/dlib/svm/assignment_function_abstract.h
index e54cb975a..210f79920 100644
--- a/dlib/svm/assignment_function_abstract.h
+++ b/dlib/svm/assignment_function_abstract.h
@@ -29,9 +29,9 @@ namespace dlib
                 case it is excluded from the sum.    
 
                 Finally, match_score() is defined as: 
-                    match_score(l,r) == dot(w, PSI(l,r))
-                where l is an element of LHS, r is an element of RHS, and
-                w is a parameter vector.
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector and bias is a scalar valued parameter.
 
                 Therefore, a feature extractor defines how the PSI() feature vector 
                 is calculated.  In particular, PSI() is defined by the get_features()
@@ -140,9 +140,10 @@ namespace dlib
                 case it is excluded from the sum.    
 
                 Finally, this object supports match_score() functions of the form: 
-                    match_score(l,r) == dot(w, PSI(l,r))
-                where l is an element of LHS, r is an element of RHS, w is a parameter 
-                vector, and PSI() is defined by the feature_extractor template argument.  
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector, bias is a scalar valued parameter, and PSI() is defined by the
+                feature_extractor template argument.  
 
             THREAD SAFETY
                 It is always safe to use distinct instances of this object in different
@@ -170,11 +171,13 @@ namespace dlib
                   (i.e. it will have its default value)
                 - #get_weights().size() == #get_feature_extractor().num_features()
                 - #get_weights() == 0
+                - #get_bias() == 0
                 - #forces_assignment() == false 
         !*/
 
         explicit assignment_function(
-            const matrix<double,0,1>& weights
+            const matrix<double,0,1>& weights,
+            double bias
         );
         /*!
             requires
@@ -183,11 +186,13 @@ namespace dlib
                 - #get_feature_extractor() == feature_extractor() 
                   (i.e. it will have its default value)
                 - #get_weights() == weights
+                - #get_bias() == bias
                 - #forces_assignment() == false 
         !*/
 
         assignment_function(
             const matrix<double,0,1>& weights,
+            double bias,
             const feature_extractor& fe
         );
         /*!
@@ -196,11 +201,13 @@ namespace dlib
             ensures
                 - #get_feature_extractor() == fe
                 - #get_weights() == weights
+                - #get_bias() == bias
                 - #forces_assignment() == false 
         !*/
 
         assignment_function(
             const matrix<double,0,1>& weights,
+            double bias,
             const feature_extractor& fe,
             bool force_assignment
         );
@@ -210,6 +217,7 @@ namespace dlib
             ensures
                 - #get_feature_extractor() == fe
                 - #get_weights() == weights
+                - #get_bias() == bias
                 - #forces_assignment() == force_assignment
         !*/
 
@@ -228,6 +236,13 @@ namespace dlib
                   The length of the vector is get_feature_extractor().num_features().  
         !*/
 
+        double get_bias (
+        ) const;
+        /*!
+            ensures
+                - returns the bias parameter associated with this assignment function.
+        !*/
+
         bool forces_assignment (
         ) const; 
         /*!
diff --git a/dlib/svm/structural_assignment_trainer.h b/dlib/svm/structural_assignment_trainer.h
index f4a67990e..a229235fc 100644
--- a/dlib/svm/structural_assignment_trainer.h
+++ b/dlib/svm/structural_assignment_trainer.h
@@ -184,9 +184,13 @@ namespace dlib
 
             matrix<double,0,1> weights; 
 
-            solver(prob, weights, num_nonnegative_weights(fe));
+            // Take the min here because we want to prevent the user from accidentally
+            // forcing the bias term to be non-negative.
+            const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe));
+            solver(prob, weights, num_nonneg);
 
-            return assignment_function<feature_extractor>(weights,fe,force_assignment);
+            const double bias = weights(weights.size()-1);
+            return assignment_function<feature_extractor>(colm(weights,0,weights.size()-1), bias,fe,force_assignment);
 
         }
 
diff --git a/dlib/svm/structural_svm_assignment_problem.h b/dlib/svm/structural_svm_assignment_problem.h
index 6974bc777..7c2f2ad61 100644
--- a/dlib/svm/structural_svm_assignment_problem.h
+++ b/dlib/svm/structural_svm_assignment_problem.h
@@ -14,16 +14,41 @@
 
 namespace dlib
 {
+    template <long n, typename T>
+    struct column_matrix_static_resize
+    {
+        typedef T type;
+    };
+
+    template <long n, typename T, long NR, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,NR,NC,MM,L> >
+    {
+        typedef matrix<T,NR+n,NC,MM,L> type;
+    };
+
+    template <long n, typename T, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,0,NC,MM,L> >
+    {
+        typedef matrix<T,0,NC,MM,L> type;
+    };
+
+    template <typename T>
+    struct add_one_to_static_feat_size
+    {
+        typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type;
+    };
+
+// ----------------------------------------------------------------------------------------
 
     template <
         typename feature_extractor
         >
     class structural_svm_assignment_problem : noncopyable,
-        public structural_svm_problem_threaded<matrix<double,0,1>, typename feature_extractor::feature_vector_type >
+        public structural_svm_problem_threaded<matrix<double,0,1>, typename add_one_to_static_feat_size<feature_extractor>::type >
     {
     public:
         typedef matrix<double,0,1> matrix_type;
-        typedef typename feature_extractor::feature_vector_type feature_vector_type;
+        typedef typename add_one_to_static_feat_size<feature_extractor>::type feature_vector_type;
 
         typedef typename feature_extractor::lhs_element lhs_element;
         typedef typename feature_extractor::rhs_element rhs_element;
@@ -77,7 +102,7 @@ namespace dlib
         virtual long get_num_dimensions (
         ) const 
         {
-            return fe.num_features();
+            return fe.num_features()+1; // +1 for the bias term
         }
 
         virtual long get_num_samples (
@@ -94,14 +119,15 @@ namespace dlib
         ) const 
         {
             typename feature_extractor::feature_vector_type feats;
-            psi.set_size(fe.num_features());
+            psi.set_size(get_num_dimensions());
             psi = 0;
             for (unsigned long i = 0; i < sample.first.size(); ++i)
             {
                 if (label[i] != -1)
                 {
                     fe.get_features(sample.first[i], sample.second[label[i]], feats);
-                    psi += feats;
+                    set_rowm(psi,range(0,feats.size()-1)) += feats;
+                    psi(get_num_dimensions()-1) += 1;
                 }
             }
         }
@@ -123,15 +149,18 @@ namespace dlib
         ) const 
         {
             psi.clear();
-            typename feature_extractor::feature_vector_type feats;
+            feature_vector_type feats;
+            int num_assignments = 0;
             for (unsigned long i = 0; i < sample.first.size(); ++i)
             {
                 if (label[i] != -1)
                 {
                     fe.get_features(sample.first[i], sample.second[label[i]], feats);
                     append_to_sparse_vect(psi, feats);
+                    ++num_assignments;
                 }
             }
+            psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments));
         }
 
         virtual void get_truth_joint_feature_vector (
@@ -176,7 +205,8 @@ namespace dlib
                         if (c < (long)samples[idx].second.size())
                         {
                             fe.get_features(samples[idx].first[r], samples[idx].second[c], feats);
-                            cost(r,c) = dot(current_solution, feats);
+                            const double bias = current_solution(current_solution.size()-1);
+                            cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias;
 
                             // add in the loss since this corresponds to an incorrect prediction.
                             if (c != labels[idx][r])
diff --git a/dlib/svm/structural_svm_assignment_problem_abstract.h b/dlib/svm/structural_svm_assignment_problem_abstract.h
index 0c11c722e..4b129d3ea 100644
--- a/dlib/svm/structural_svm_assignment_problem_abstract.h
+++ b/dlib/svm/structural_svm_assignment_problem_abstract.h
@@ -27,9 +27,9 @@ namespace dlib
                 the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h.
 
             WHAT THIS OBJECT REPRESENTS
-                This object is a tool for learning the weight vector needed to use
-                an assignment_function object.  It learns the parameter vector by 
-                formulating the problem as a structural SVM problem.  
+                This object is a tool for learning the parameters needed to use an
+                assignment_function object.  It learns the parameters by formulating the
+                problem as a structural SVM problem.  
         !*/
 
     public:
@@ -56,8 +56,8 @@ namespace dlib
                 - This object attempts to learn a mapping from the given samples to the 
                   given labels.  In particular, it attempts to learn to predict labels[i] 
                   based on samples[i].  Or in other words, this object can be used to learn 
-                  a parameter vector, w, such that an assignment_function declared as:
-                    assignment_function<feature_extractor> assigner(w,fe,force_assignment)
+                  a parameter vector and bias, w and b, such that an assignment_function declared as:
+                    assignment_function<feature_extractor> assigner(w,b,fe,force_assignment)
                   results in an assigner object which attempts to compute the following mapping:
                     labels[i] == labeler(samples[i])
                 - This object will use num_threads threads during the optimization 
diff --git a/dlib/test/assignment_learning.cpp b/dlib/test/assignment_learning.cpp
index 5e3efb920..bba47db3d 100644
--- a/dlib/test/assignment_learning.cpp
+++ b/dlib/test/assignment_learning.cpp
@@ -29,14 +29,14 @@ namespace
 
     struct feature_extractor_dense
     {
-        typedef matrix<double,4,1> feature_vector_type;
+        typedef matrix<double,3,1> feature_vector_type;
 
         typedef ::lhs_element lhs_element;
         typedef ::rhs_element rhs_element;
 
         unsigned long num_features() const
         {
-            return 4;
+            return 3;
         }
 
         void get_features (
@@ -45,7 +45,7 @@ namespace
             feature_vector_type& feats
         ) const
         {
-            feats = join_cols(squared(left - right), ones_matrix<double>(1,1));
+            feats = squared(left - right);
         }
 
     };
@@ -64,7 +64,7 @@ namespace
 
         unsigned long num_features() const
         {
-            return 4;
+            return 3;
         }
 
         void get_features (
@@ -77,7 +77,6 @@ namespace
             feats.push_back(make_pair(0,squared(left-right)(0)));
             feats.push_back(make_pair(1,squared(left-right)(1)));
             feats.push_back(make_pair(2,squared(left-right)(2)));
-            feats.push_back(make_pair(3,1.0));
         }
 
     };