From 71d4306edfd3e170c7cf2013ce0b0427662a25db Mon Sep 17 00:00:00 2001 From: Davis King Date: Thu, 20 Feb 2014 21:13:41 -0500 Subject: [PATCH] Added a bias term to the assignment_function's model so the user doesn't need to remember, or even understand, that they should add it themselves. However, this change breaks backwards compatibility with the previous serialization format for assignment_function objects. --- dlib/svm/assignment_function.h | 26 +++++++++-- dlib/svm/assignment_function_abstract.h | 29 +++++++++--- dlib/svm/structural_assignment_trainer.h | 8 +++- dlib/svm/structural_svm_assignment_problem.h | 44 ++++++++++++++++--- ...ructural_svm_assignment_problem_abstract.h | 10 ++--- dlib/test/assignment_learning.cpp | 9 ++-- 6 files changed, 97 insertions(+), 29 deletions(-) diff --git a/dlib/svm/assignment_function.h b/dlib/svm/assignment_function.h index 4b6a53ccb..b435c0ad0 100644 --- a/dlib/svm/assignment_function.h +++ b/dlib/svm/assignment_function.h @@ -33,13 +33,16 @@ namespace dlib { weights.set_size(fe.num_features()); weights = 0; + bias = 0; force_assignment = false; } explicit assignment_function( - const matrix& weights_ + const matrix& weights_, + double bias_ ) : weights(weights_), + bias(bias_), force_assignment(false) { // make sure requires clause is not broken @@ -55,10 +58,12 @@ namespace dlib assignment_function( const matrix& weights_, + double bias_, const feature_extractor& fe_ ) : fe(fe_), weights(weights_), + bias(bias_), force_assignment(false) { // make sure requires clause is not broken @@ -73,11 +78,13 @@ namespace dlib assignment_function( const matrix& weights_, + double bias_, const feature_extractor& fe_, bool force_assignment_ ) : fe(fe_), weights(weights_), + bias(bias_), force_assignment(force_assignment_) { // make sure requires clause is not broken @@ -96,6 +103,9 @@ namespace dlib const matrix& get_weights ( ) const { return weights; } + double get_bias ( + ) const { return bias; } + bool forces_assignment ( ) const { return force_assignment; } @@ -130,7 +140,7 @@ namespace dlib if (r < (long)lhs.size() && c < (long)rhs.size()) { fe.get_features(lhs[r], rhs[c], feats); - cost(r,c) = dot(weights, feats); + cost(r,c) = dot(weights, feats) + bias; } else { @@ -188,6 +198,7 @@ namespace dlib feature_extractor fe; matrix weights; + double bias; bool force_assignment; }; @@ -201,8 +212,11 @@ namespace dlib std::ostream& out ) { + int version = 2; + serialize(version, out); serialize(item.get_feature_extractor(), out); serialize(item.get_weights(), out); + serialize(item.get_bias(), out); serialize(item.forces_assignment(), out); } @@ -218,13 +232,19 @@ namespace dlib { feature_extractor fe; matrix weights; + double bias; bool force_assignment; + int version = 0; + deserialize(version, in); + if (version != 2) + throw serialization_error("Unexpected version found while deserializing dlib::assignment_function."); deserialize(fe, in); deserialize(weights, in); + deserialize(bias, in); deserialize(force_assignment, in); - item = assignment_function(weights, fe, force_assignment); + item = assignment_function(weights, bias, fe, force_assignment); } // ---------------------------------------------------------------------------------------- diff --git a/dlib/svm/assignment_function_abstract.h b/dlib/svm/assignment_function_abstract.h index e54cb975a..210f79920 100644 --- a/dlib/svm/assignment_function_abstract.h +++ b/dlib/svm/assignment_function_abstract.h @@ -29,9 +29,9 @@ namespace dlib case it is excluded from the sum. Finally, match_score() is defined as: - match_score(l,r) == dot(w, PSI(l,r)) - where l is an element of LHS, r is an element of RHS, and - w is a parameter vector. + match_score(l,r) == dot(w, PSI(l,r)) + bias + where l is an element of LHS, r is an element of RHS, w is a parameter + vector and bias is a scalar valued parameter. Therefore, a feature extractor defines how the PSI() feature vector is calculated. In particular, PSI() is defined by the get_features() @@ -140,9 +140,10 @@ namespace dlib case it is excluded from the sum. Finally, this object supports match_score() functions of the form: - match_score(l,r) == dot(w, PSI(l,r)) - where l is an element of LHS, r is an element of RHS, w is a parameter - vector, and PSI() is defined by the feature_extractor template argument. + match_score(l,r) == dot(w, PSI(l,r)) + bias + where l is an element of LHS, r is an element of RHS, w is a parameter + vector, bias is a scalar valued parameter, and PSI() is defined by the + feature_extractor template argument. THREAD SAFETY It is always safe to use distinct instances of this object in different @@ -170,11 +171,13 @@ namespace dlib (i.e. it will have its default value) - #get_weights().size() == #get_feature_extractor().num_features() - #get_weights() == 0 + - #get_bias() == 0 - #forces_assignment() == false !*/ explicit assignment_function( - const matrix& weights + const matrix& weights, + double bias ); /*! requires @@ -183,11 +186,13 @@ namespace dlib - #get_feature_extractor() == feature_extractor() (i.e. it will have its default value) - #get_weights() == weights + - #get_bias() == bias - #forces_assignment() == false !*/ assignment_function( const matrix& weights, + double bias, const feature_extractor& fe ); /*! @@ -196,11 +201,13 @@ namespace dlib ensures - #get_feature_extractor() == fe - #get_weights() == weights + - #get_bias() == bias - #forces_assignment() == false !*/ assignment_function( const matrix& weights, + double bias, const feature_extractor& fe, bool force_assignment ); @@ -210,6 +217,7 @@ namespace dlib ensures - #get_feature_extractor() == fe - #get_weights() == weights + - #get_bias() == bias - #forces_assignment() == force_assignment !*/ @@ -228,6 +236,13 @@ namespace dlib The length of the vector is get_feature_extractor().num_features(). !*/ + double get_bias ( + ) const; + /*! + ensures + - returns the bias parameter associated with this assignment function. + !*/ + bool forces_assignment ( ) const; /*! diff --git a/dlib/svm/structural_assignment_trainer.h b/dlib/svm/structural_assignment_trainer.h index f4a67990e..a229235fc 100644 --- a/dlib/svm/structural_assignment_trainer.h +++ b/dlib/svm/structural_assignment_trainer.h @@ -184,9 +184,13 @@ namespace dlib matrix weights; - solver(prob, weights, num_nonnegative_weights(fe)); + // Take the min here because we want to prevent the user from accidentally + // forcing the bias term to be non-negative. + const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe)); + solver(prob, weights, num_nonneg); - return assignment_function(weights,fe,force_assignment); + const double bias = weights(weights.size()-1); + return assignment_function(colm(weights,0,weights.size()-1), bias,fe,force_assignment); } diff --git a/dlib/svm/structural_svm_assignment_problem.h b/dlib/svm/structural_svm_assignment_problem.h index 6974bc777..7c2f2ad61 100644 --- a/dlib/svm/structural_svm_assignment_problem.h +++ b/dlib/svm/structural_svm_assignment_problem.h @@ -14,16 +14,41 @@ namespace dlib { + template + struct column_matrix_static_resize + { + typedef T type; + }; + + template + struct column_matrix_static_resize > + { + typedef matrix type; + }; + + template + struct column_matrix_static_resize > + { + typedef matrix type; + }; + + template + struct add_one_to_static_feat_size + { + typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type; + }; + +// ---------------------------------------------------------------------------------------- template < typename feature_extractor > class structural_svm_assignment_problem : noncopyable, - public structural_svm_problem_threaded, typename feature_extractor::feature_vector_type > + public structural_svm_problem_threaded, typename add_one_to_static_feat_size::type > { public: typedef matrix matrix_type; - typedef typename feature_extractor::feature_vector_type feature_vector_type; + typedef typename add_one_to_static_feat_size::type feature_vector_type; typedef typename feature_extractor::lhs_element lhs_element; typedef typename feature_extractor::rhs_element rhs_element; @@ -77,7 +102,7 @@ namespace dlib virtual long get_num_dimensions ( ) const { - return fe.num_features(); + return fe.num_features()+1; // +1 for the bias term } virtual long get_num_samples ( @@ -94,14 +119,15 @@ namespace dlib ) const { typename feature_extractor::feature_vector_type feats; - psi.set_size(fe.num_features()); + psi.set_size(get_num_dimensions()); psi = 0; for (unsigned long i = 0; i < sample.first.size(); ++i) { if (label[i] != -1) { fe.get_features(sample.first[i], sample.second[label[i]], feats); - psi += feats; + set_rowm(psi,range(0,feats.size()-1)) += feats; + psi(get_num_dimensions()-1) += 1; } } } @@ -123,15 +149,18 @@ namespace dlib ) const { psi.clear(); - typename feature_extractor::feature_vector_type feats; + feature_vector_type feats; + int num_assignments = 0; for (unsigned long i = 0; i < sample.first.size(); ++i) { if (label[i] != -1) { fe.get_features(sample.first[i], sample.second[label[i]], feats); append_to_sparse_vect(psi, feats); + ++num_assignments; } } + psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments)); } virtual void get_truth_joint_feature_vector ( @@ -176,7 +205,8 @@ namespace dlib if (c < (long)samples[idx].second.size()) { fe.get_features(samples[idx].first[r], samples[idx].second[c], feats); - cost(r,c) = dot(current_solution, feats); + const double bias = current_solution(current_solution.size()-1); + cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias; // add in the loss since this corresponds to an incorrect prediction. if (c != labels[idx][r]) diff --git a/dlib/svm/structural_svm_assignment_problem_abstract.h b/dlib/svm/structural_svm_assignment_problem_abstract.h index 0c11c722e..4b129d3ea 100644 --- a/dlib/svm/structural_svm_assignment_problem_abstract.h +++ b/dlib/svm/structural_svm_assignment_problem_abstract.h @@ -27,9 +27,9 @@ namespace dlib the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h. WHAT THIS OBJECT REPRESENTS - This object is a tool for learning the weight vector needed to use - an assignment_function object. It learns the parameter vector by - formulating the problem as a structural SVM problem. + This object is a tool for learning the parameters needed to use an + assignment_function object. It learns the parameters by formulating the + problem as a structural SVM problem. !*/ public: @@ -56,8 +56,8 @@ namespace dlib - This object attempts to learn a mapping from the given samples to the given labels. In particular, it attempts to learn to predict labels[i] based on samples[i]. Or in other words, this object can be used to learn - a parameter vector, w, such that an assignment_function declared as: - assignment_function assigner(w,fe,force_assignment) + a parameter vector and bias, w and b, such that an assignment_function declared as: + assignment_function assigner(w,b,fe,force_assignment) results in an assigner object which attempts to compute the following mapping: labels[i] == labeler(samples[i]) - This object will use num_threads threads during the optimization diff --git a/dlib/test/assignment_learning.cpp b/dlib/test/assignment_learning.cpp index 5e3efb920..bba47db3d 100644 --- a/dlib/test/assignment_learning.cpp +++ b/dlib/test/assignment_learning.cpp @@ -29,14 +29,14 @@ namespace struct feature_extractor_dense { - typedef matrix feature_vector_type; + typedef matrix feature_vector_type; typedef ::lhs_element lhs_element; typedef ::rhs_element rhs_element; unsigned long num_features() const { - return 4; + return 3; } void get_features ( @@ -45,7 +45,7 @@ namespace feature_vector_type& feats ) const { - feats = join_cols(squared(left - right), ones_matrix(1,1)); + feats = squared(left - right); } }; @@ -64,7 +64,7 @@ namespace unsigned long num_features() const { - return 4; + return 3; } void get_features ( @@ -77,7 +77,6 @@ namespace feats.push_back(make_pair(0,squared(left-right)(0))); feats.push_back(make_pair(1,squared(left-right)(1))); feats.push_back(make_pair(2,squared(left-right)(2))); - feats.push_back(make_pair(3,1.0)); } };