Added a bias term to the assignment_function's model so the user doesn't need

to remember, or even understand, that they should add it themselves.  However,
this change breaks backwards compatibility with the previous serialization
format for assignment_function objects.
This commit is contained in:
Davis King 2014-02-20 21:13:41 -05:00
parent 5563194476
commit 71d4306edf
6 changed files with 97 additions and 29 deletions

View File

@ -33,13 +33,16 @@ namespace dlib
{
weights.set_size(fe.num_features());
weights = 0;
bias = 0;
force_assignment = false;
}
explicit assignment_function(
const matrix<double,0,1>& weights_
const matrix<double,0,1>& weights_,
double bias_
) :
weights(weights_),
bias(bias_),
force_assignment(false)
{
// make sure requires clause is not broken
@ -55,10 +58,12 @@ namespace dlib
assignment_function(
const matrix<double,0,1>& weights_,
double bias_,
const feature_extractor& fe_
) :
fe(fe_),
weights(weights_),
bias(bias_),
force_assignment(false)
{
// make sure requires clause is not broken
@ -73,11 +78,13 @@ namespace dlib
assignment_function(
const matrix<double,0,1>& weights_,
double bias_,
const feature_extractor& fe_,
bool force_assignment_
) :
fe(fe_),
weights(weights_),
bias(bias_),
force_assignment(force_assignment_)
{
// make sure requires clause is not broken
@ -96,6 +103,9 @@ namespace dlib
const matrix<double,0,1>& get_weights (
) const { return weights; }
double get_bias (
) const { return bias; }
bool forces_assignment (
) const { return force_assignment; }
@ -130,7 +140,7 @@ namespace dlib
if (r < (long)lhs.size() && c < (long)rhs.size())
{
fe.get_features(lhs[r], rhs[c], feats);
cost(r,c) = dot(weights, feats);
cost(r,c) = dot(weights, feats) + bias;
}
else
{
@ -188,6 +198,7 @@ namespace dlib
feature_extractor fe;
matrix<double,0,1> weights;
double bias;
bool force_assignment;
};
@ -201,8 +212,11 @@ namespace dlib
std::ostream& out
)
{
int version = 2;
serialize(version, out);
serialize(item.get_feature_extractor(), out);
serialize(item.get_weights(), out);
serialize(item.get_bias(), out);
serialize(item.forces_assignment(), out);
}
@ -218,13 +232,19 @@ namespace dlib
{
feature_extractor fe;
matrix<double,0,1> weights;
double bias;
bool force_assignment;
int version = 0;
deserialize(version, in);
if (version != 2)
throw serialization_error("Unexpected version found while deserializing dlib::assignment_function.");
deserialize(fe, in);
deserialize(weights, in);
deserialize(bias, in);
deserialize(force_assignment, in);
item = assignment_function<feature_extractor>(weights, fe, force_assignment);
item = assignment_function<feature_extractor>(weights, bias, fe, force_assignment);
}
// ----------------------------------------------------------------------------------------

View File

@ -29,9 +29,9 @@ namespace dlib
case it is excluded from the sum.
Finally, match_score() is defined as:
match_score(l,r) == dot(w, PSI(l,r))
where l is an element of LHS, r is an element of RHS, and
w is a parameter vector.
match_score(l,r) == dot(w, PSI(l,r)) + bias
where l is an element of LHS, r is an element of RHS, w is a parameter
vector and bias is a scalar valued parameter.
Therefore, a feature extractor defines how the PSI() feature vector
is calculated. In particular, PSI() is defined by the get_features()
@ -140,9 +140,10 @@ namespace dlib
case it is excluded from the sum.
Finally, this object supports match_score() functions of the form:
match_score(l,r) == dot(w, PSI(l,r))
where l is an element of LHS, r is an element of RHS, w is a parameter
vector, and PSI() is defined by the feature_extractor template argument.
match_score(l,r) == dot(w, PSI(l,r)) + bias
where l is an element of LHS, r is an element of RHS, w is a parameter
vector, bias is a scalar valued parameter, and PSI() is defined by the
feature_extractor template argument.
THREAD SAFETY
It is always safe to use distinct instances of this object in different
@ -170,11 +171,13 @@ namespace dlib
(i.e. it will have its default value)
- #get_weights().size() == #get_feature_extractor().num_features()
- #get_weights() == 0
- #get_bias() == 0
- #forces_assignment() == false
!*/
explicit assignment_function(
const matrix<double,0,1>& weights
const matrix<double,0,1>& weights,
double bias
);
/*!
requires
@ -183,11 +186,13 @@ namespace dlib
- #get_feature_extractor() == feature_extractor()
(i.e. it will have its default value)
- #get_weights() == weights
- #get_bias() == bias
- #forces_assignment() == false
!*/
assignment_function(
const matrix<double,0,1>& weights,
double bias,
const feature_extractor& fe
);
/*!
@ -196,11 +201,13 @@ namespace dlib
ensures
- #get_feature_extractor() == fe
- #get_weights() == weights
- #get_bias() == bias
- #forces_assignment() == false
!*/
assignment_function(
const matrix<double,0,1>& weights,
double bias,
const feature_extractor& fe,
bool force_assignment
);
@ -210,6 +217,7 @@ namespace dlib
ensures
- #get_feature_extractor() == fe
- #get_weights() == weights
- #get_bias() == bias
- #forces_assignment() == force_assignment
!*/
@ -228,6 +236,13 @@ namespace dlib
The length of the vector is get_feature_extractor().num_features().
!*/
double get_bias (
) const;
/*!
ensures
- returns the bias parameter associated with this assignment function.
!*/
bool forces_assignment (
) const;
/*!

View File

@ -184,9 +184,13 @@ namespace dlib
matrix<double,0,1> weights;
solver(prob, weights, num_nonnegative_weights(fe));
// Take the min here because we want to prevent the user from accidentally
// forcing the bias term to be non-negative.
const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe));
solver(prob, weights, num_nonneg);
return assignment_function<feature_extractor>(weights,fe,force_assignment);
const double bias = weights(weights.size()-1);
return assignment_function<feature_extractor>(colm(weights,0,weights.size()-1), bias,fe,force_assignment);
}

View File

@ -14,16 +14,41 @@
namespace dlib
{
template <long n, typename T>
struct column_matrix_static_resize
{
typedef T type;
};
template <long n, typename T, long NR, long NC, typename MM, typename L>
struct column_matrix_static_resize<n, matrix<T,NR,NC,MM,L> >
{
typedef matrix<T,NR+n,NC,MM,L> type;
};
template <long n, typename T, long NC, typename MM, typename L>
struct column_matrix_static_resize<n, matrix<T,0,NC,MM,L> >
{
typedef matrix<T,0,NC,MM,L> type;
};
template <typename T>
struct add_one_to_static_feat_size
{
typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type;
};
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class structural_svm_assignment_problem : noncopyable,
public structural_svm_problem_threaded<matrix<double,0,1>, typename feature_extractor::feature_vector_type >
public structural_svm_problem_threaded<matrix<double,0,1>, typename add_one_to_static_feat_size<feature_extractor>::type >
{
public:
typedef matrix<double,0,1> matrix_type;
typedef typename feature_extractor::feature_vector_type feature_vector_type;
typedef typename add_one_to_static_feat_size<feature_extractor>::type feature_vector_type;
typedef typename feature_extractor::lhs_element lhs_element;
typedef typename feature_extractor::rhs_element rhs_element;
@ -77,7 +102,7 @@ namespace dlib
virtual long get_num_dimensions (
) const
{
return fe.num_features();
return fe.num_features()+1; // +1 for the bias term
}
virtual long get_num_samples (
@ -94,14 +119,15 @@ namespace dlib
) const
{
typename feature_extractor::feature_vector_type feats;
psi.set_size(fe.num_features());
psi.set_size(get_num_dimensions());
psi = 0;
for (unsigned long i = 0; i < sample.first.size(); ++i)
{
if (label[i] != -1)
{
fe.get_features(sample.first[i], sample.second[label[i]], feats);
psi += feats;
set_rowm(psi,range(0,feats.size()-1)) += feats;
psi(get_num_dimensions()-1) += 1;
}
}
}
@ -123,15 +149,18 @@ namespace dlib
) const
{
psi.clear();
typename feature_extractor::feature_vector_type feats;
feature_vector_type feats;
int num_assignments = 0;
for (unsigned long i = 0; i < sample.first.size(); ++i)
{
if (label[i] != -1)
{
fe.get_features(sample.first[i], sample.second[label[i]], feats);
append_to_sparse_vect(psi, feats);
++num_assignments;
}
}
psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments));
}
virtual void get_truth_joint_feature_vector (
@ -176,7 +205,8 @@ namespace dlib
if (c < (long)samples[idx].second.size())
{
fe.get_features(samples[idx].first[r], samples[idx].second[c], feats);
cost(r,c) = dot(current_solution, feats);
const double bias = current_solution(current_solution.size()-1);
cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias;
// add in the loss since this corresponds to an incorrect prediction.
if (c != labels[idx][r])

View File

@ -27,9 +27,9 @@ namespace dlib
the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h.
WHAT THIS OBJECT REPRESENTS
This object is a tool for learning the weight vector needed to use
an assignment_function object. It learns the parameter vector by
formulating the problem as a structural SVM problem.
This object is a tool for learning the parameters needed to use an
assignment_function object. It learns the parameters by formulating the
problem as a structural SVM problem.
!*/
public:
@ -56,8 +56,8 @@ namespace dlib
- This object attempts to learn a mapping from the given samples to the
given labels. In particular, it attempts to learn to predict labels[i]
based on samples[i]. Or in other words, this object can be used to learn
a parameter vector, w, such that an assignment_function declared as:
assignment_function<feature_extractor> assigner(w,fe,force_assignment)
a parameter vector and bias, w and b, such that an assignment_function declared as:
assignment_function<feature_extractor> assigner(w,b,fe,force_assignment)
results in an assigner object which attempts to compute the following mapping:
labels[i] == labeler(samples[i])
- This object will use num_threads threads during the optimization

View File

@ -29,14 +29,14 @@ namespace
struct feature_extractor_dense
{
typedef matrix<double,4,1> feature_vector_type;
typedef matrix<double,3,1> feature_vector_type;
typedef ::lhs_element lhs_element;
typedef ::rhs_element rhs_element;
unsigned long num_features() const
{
return 4;
return 3;
}
void get_features (
@ -45,7 +45,7 @@ namespace
feature_vector_type& feats
) const
{
feats = join_cols(squared(left - right), ones_matrix<double>(1,1));
feats = squared(left - right);
}
};
@ -64,7 +64,7 @@ namespace
unsigned long num_features() const
{
return 4;
return 3;
}
void get_features (
@ -77,7 +77,6 @@ namespace
feats.push_back(make_pair(0,squared(left-right)(0)));
feats.push_back(make_pair(1,squared(left-right)(1)));
feats.push_back(make_pair(2,squared(left-right)(2)));
feats.push_back(make_pair(3,1.0));
}
};