mirror of https://github.com/davisking/dlib.git
Moved the new OCA implementation into dlib proper.
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403487
This commit is contained in:
parent
e8baa7d1a4
commit
b934a51286
|
@ -6,6 +6,7 @@
|
|||
#include "optimization/optimization.h"
|
||||
#include "optimization/optimization_bobyqa.h"
|
||||
#include "optimization/optimization_solve_qp_using_smo.h"
|
||||
#include "optimization/optimization_oca.h"
|
||||
|
||||
#endif // DLIB_OPTIMIZATIOn_HEADER
|
||||
|
||||
|
|
|
@ -0,0 +1,294 @@
|
|||
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_OPTIMIZATIoN_OCA_H__
|
||||
#define DLIB_OPTIMIZATIoN_OCA_H__
|
||||
|
||||
#include "optimization_oca_abstract.h"
|
||||
|
||||
#include "../matrix.h"
|
||||
#include "optimization_solve_qp_using_smo.h"
|
||||
#include <list>
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <typename matrix_type>
|
||||
class oca_problem
|
||||
{
|
||||
public:
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
|
||||
virtual ~oca_problem() {}
|
||||
|
||||
virtual void optimization_status (
|
||||
scalar_type ,
|
||||
scalar_type ,
|
||||
unsigned long
|
||||
) const {}
|
||||
|
||||
virtual bool R_has_lower_bound (
|
||||
scalar_type&
|
||||
) const { return false; }
|
||||
|
||||
virtual scalar_type get_C (
|
||||
) const = 0;
|
||||
|
||||
virtual long num_dimensions (
|
||||
) const = 0;
|
||||
|
||||
virtual void get_risk (
|
||||
matrix_type& current_solution,
|
||||
scalar_type& risk_value,
|
||||
matrix_type& risk_subgradient
|
||||
) const = 0;
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class oca
|
||||
{
|
||||
public:
|
||||
|
||||
oca ()
|
||||
{
|
||||
eps = 0.001;
|
||||
max_iter = 1000000;
|
||||
|
||||
sub_eps = 1e-5;
|
||||
sub_max_iter = 20000;
|
||||
|
||||
inactive_thresh = 15;
|
||||
}
|
||||
|
||||
void set_epsilon (
|
||||
double eps_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(eps_ > 0,
|
||||
"\t void oca::set_epsilon"
|
||||
<< "\n\t epsilon must be greater than 0"
|
||||
<< "\n\t eps_: " << eps_
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
eps = eps_;
|
||||
}
|
||||
|
||||
double get_epsilon (
|
||||
) const { return eps; }
|
||||
|
||||
void set_max_iterations (
|
||||
unsigned long max_iter_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(max_iter_ > 0,
|
||||
"\t void oca::set_max_iterations"
|
||||
<< "\n\t max iterations must be greater than 0"
|
||||
<< "\n\t max_iter_: " << max_iter_
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
max_iter = max_iter_;
|
||||
}
|
||||
|
||||
unsigned long get_max_iterations (
|
||||
) const { return max_iter; }
|
||||
|
||||
void set_subproblem_epsilon (
|
||||
double eps_
|
||||
) { sub_eps = eps_; }
|
||||
|
||||
double get_subproblem_epsilon (
|
||||
) const { return sub_eps; }
|
||||
|
||||
void set_subproblem_max_iterations (
|
||||
unsigned long sub_max_iter_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(sub_max_iter_ > 0,
|
||||
"\t void oca::set_subproblem_max_iterations"
|
||||
<< "\n\t max iterations must be greater than 0"
|
||||
<< "\n\t sub_max_iter_: " << sub_max_iter_
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
sub_max_iter = sub_max_iter_;
|
||||
}
|
||||
|
||||
unsigned long get_subproblem_max_iterations (
|
||||
) const { return sub_max_iter; }
|
||||
|
||||
void set_inactive_plane_threshold (
|
||||
unsigned long inactive_thresh_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(inactive_thresh_ > 0,
|
||||
"\t void oca::set_inactive_plane_threshold"
|
||||
<< "\n\t inactive threshold must be greater than 0"
|
||||
<< "\n\t inactive_thresh_: " << inactive_thresh_
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
inactive_thresh = inactive_thresh_;
|
||||
}
|
||||
|
||||
unsigned long get_inactive_plane_threshold (
|
||||
) const { return inactive_thresh; }
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void operator() (
|
||||
const oca_problem<matrix_type>& problem,
|
||||
matrix_type& w
|
||||
) const
|
||||
{
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
typedef typename matrix_type::layout_type layout_type;
|
||||
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
||||
typedef matrix<scalar_type,0,1,mem_manager_type, layout_type> vect_type;
|
||||
|
||||
const scalar_type C = problem.get_C();
|
||||
|
||||
std::list<vect_type> planes;
|
||||
std::vector<scalar_type> bs, miss_count;
|
||||
|
||||
vect_type temp, alpha, w_cur;
|
||||
|
||||
w.set_size(problem.num_dimensions(), 1);
|
||||
w = 0;
|
||||
w_cur = w;
|
||||
|
||||
// The best objective value seen so far. Note also
|
||||
// that w always contains the best solution seen so far.
|
||||
scalar_type best_obj = std::numeric_limits<scalar_type>::max();
|
||||
|
||||
// This will hold the cutting plane objective value. This value is
|
||||
// a lower bound on the true optimal objective value.
|
||||
scalar_type cp_obj = 0;
|
||||
|
||||
scalar_type R_lower_bound;
|
||||
if (problem.R_has_lower_bound(R_lower_bound))
|
||||
{
|
||||
// The flat lower bounding plane is always good to have if we know
|
||||
// what it is.
|
||||
bs.push_back(R_lower_bound);
|
||||
planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
|
||||
miss_count.push_back(0);
|
||||
}
|
||||
|
||||
matrix<scalar_type,0,0,mem_manager_type, layout_type> K;
|
||||
|
||||
for (unsigned long iter = 0; iter < max_iter; ++iter)
|
||||
{
|
||||
|
||||
// add the next cutting plane
|
||||
scalar_type cur_risk;
|
||||
planes.resize(planes.size()+1);
|
||||
problem.get_risk(w_cur, cur_risk, planes.back());
|
||||
bs.push_back(cur_risk - dot(w_cur,planes.back()));
|
||||
miss_count.push_back(0);
|
||||
|
||||
// Check the objective value at w_cur and see if it is better than
|
||||
// the best seen so far.
|
||||
const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk;
|
||||
if (cur_obj < best_obj)
|
||||
{
|
||||
best_obj = cur_obj;
|
||||
w = w_cur;
|
||||
}
|
||||
|
||||
// check stopping condition and stop if we can
|
||||
if (best_obj - cp_obj <= eps)
|
||||
break;
|
||||
|
||||
|
||||
// compute kernel matrix for all the planes
|
||||
K.set_size(planes.size(), planes.size());
|
||||
long rr = 0;
|
||||
for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
|
||||
{
|
||||
long cc = rr;
|
||||
for (typename std::list<vect_type>::iterator c = r; c != planes.end(); ++c)
|
||||
{
|
||||
K(rr,cc) = dot(*r, *c);
|
||||
K(cc,rr) = K(rr,cc);
|
||||
++cc;
|
||||
}
|
||||
++rr;
|
||||
}
|
||||
|
||||
alpha = uniform_matrix<scalar_type>(planes.size(),1, C/planes.size());
|
||||
|
||||
// solve the cutting plane subproblem for the next w_cur
|
||||
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, static_cast<scalar_type>(sub_eps), sub_max_iter);
|
||||
|
||||
// construct the w_cur that minimized the subproblem.
|
||||
w_cur = 0;
|
||||
rr = 0;
|
||||
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
|
||||
{
|
||||
if (alpha(rr) != 0)
|
||||
{
|
||||
w_cur -= alpha(rr)*(*i);
|
||||
miss_count[rr] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
miss_count[rr] += 1;
|
||||
}
|
||||
++rr;
|
||||
}
|
||||
|
||||
// Compute the lower bound on the true objective given to us by the cutting
|
||||
// plane subproblem.
|
||||
cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs);
|
||||
|
||||
// check stopping condition and stop if we can
|
||||
if (best_obj - cp_obj <= eps)
|
||||
break;
|
||||
|
||||
// report current status
|
||||
problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
|
||||
|
||||
// If it has been a while since a cutting plane was an active constraint then
|
||||
// we should throw it away.
|
||||
while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
|
||||
{
|
||||
long idx = index_of_max(vector_to_matrix(miss_count));
|
||||
typename std::list<vect_type>::iterator i0 = planes.begin();
|
||||
advance(i0, idx);
|
||||
planes.erase(i0);
|
||||
bs.erase(bs.begin()+idx);
|
||||
miss_count.erase(miss_count.begin()+idx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// report current status
|
||||
problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
double eps;
|
||||
double sub_eps;
|
||||
|
||||
unsigned long max_iter;
|
||||
unsigned long sub_max_iter;
|
||||
|
||||
unsigned long inactive_thresh;
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // DLIB_OPTIMIZATIoN_OCA_H__
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
|
||||
#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <typename matrix_type>
|
||||
class oca_problem
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON matrix_type
|
||||
- matrix_type == a dlib::matrix capable of storing column vectors
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is the interface used to define the optimization
|
||||
problems solved by the oca optimizer defined later in this file.
|
||||
|
||||
OCA solves optimization problems with the following form:
|
||||
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
|
||||
|
||||
Where R(w) is a convex function and C > 0
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
|
||||
virtual ~oca_problem() {}
|
||||
|
||||
virtual void optimization_status (
|
||||
scalar_type current_objective_value,
|
||||
scalar_type current_error_gap,
|
||||
unsigned long num_cutting_planes
|
||||
) const {}
|
||||
/*!
|
||||
This function is called by the OCA optimizer each iteration. It
|
||||
exists to allow the user to monitor the progress of the optimization.
|
||||
!*/
|
||||
|
||||
virtual bool R_has_lower_bound (
|
||||
scalar_type& lower_bound
|
||||
) const { return false; }
|
||||
/*!
|
||||
ensures
|
||||
- if (R(w) >= a constant for all values of w) then
|
||||
- returns true
|
||||
- #lower_bound == the constant that lower bounds R(w)
|
||||
- else
|
||||
- returns false
|
||||
!*/
|
||||
|
||||
virtual scalar_type get_C (
|
||||
) const = 0;
|
||||
/*!
|
||||
ensures
|
||||
- returns the C parameter
|
||||
!*/
|
||||
|
||||
virtual long num_dimensions (
|
||||
) const = 0;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of free variables in this optimization problem
|
||||
!*/
|
||||
|
||||
virtual void get_risk (
|
||||
matrix_type& current_solution,
|
||||
scalar_type& risk_value,
|
||||
matrix_type& risk_subgradient
|
||||
) const = 0;
|
||||
/*!
|
||||
requires
|
||||
- is_col_vector(current_solution) == true
|
||||
- current_solution.size() == num_dimensions()
|
||||
ensures
|
||||
- #current_solution will be set to one of the following:
|
||||
- current_solution (i.e. it won't be modified at all)
|
||||
- The result of a line search passing through current_solution.
|
||||
- #risk_value == R(#current_solution)
|
||||
- #risk_subgradient == an element of the subgradient of R() at the
|
||||
point #current_solution
|
||||
- Note that risk_value and risk_subgradient are NOT multiplied by get_C()
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class oca
|
||||
{
|
||||
/*!
|
||||
INITIAL VALUE
|
||||
- get_epsilon() == 0.001
|
||||
- get_max_iterations() == 1000000
|
||||
- get_subproblem_epsilon() == 1e-5
|
||||
- get_subproblem_max_iterations() == 20000
|
||||
- get_inactive_plane_threshold() == 15
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for solving the optimization problem defined above
|
||||
by the oca_problem abstract class.
|
||||
|
||||
For reference, OCA solves optimization problems with the following form:
|
||||
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
|
||||
|
||||
Where R(w) is a convex function and C > 0
|
||||
|
||||
|
||||
For a detailed discussion you should consult the following papers:
|
||||
Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
|
||||
Vojtěch Franc, Sören Sonnenburg; 10(Oct):2157--2192, 2009.
|
||||
|
||||
Bundle Methods for Regularized Risk Minimization
|
||||
Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311−365, 2010.
|
||||
!*/
|
||||
public:
|
||||
|
||||
oca (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void operator() (
|
||||
const oca_problem<matrix_type>& problem,
|
||||
matrix_type& w
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- solves the given oca problem and stores the solution in #w
|
||||
!*/
|
||||
|
||||
void set_epsilon (
|
||||
double eps_
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- eps > 0
|
||||
ensures
|
||||
- #get_epsilon() == eps
|
||||
!*/
|
||||
|
||||
double get_epsilon (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the error epsilon that determines when training should stop.
|
||||
Smaller values may result in a more accurate solution but may cause
|
||||
the algorithm to take longer to execute.
|
||||
!*/
|
||||
|
||||
void set_max_iterations (
|
||||
unsigned long max_iter
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- max_iter > 0
|
||||
ensures
|
||||
- #get_max_iterations() == max_iter
|
||||
!*/
|
||||
|
||||
unsigned long get_max_iterations (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the maximum number of iterations this object will perform
|
||||
while attempting to solve an oca_problem.
|
||||
!*/
|
||||
|
||||
void set_subproblem_epsilon (
|
||||
double eps
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- eps > 0
|
||||
ensures
|
||||
- #get_subproblem_epsilon() == eps
|
||||
!*/
|
||||
|
||||
double get_subproblem_epsilon (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the accuracy used in solving the quadratic programming
|
||||
subproblem that is part of the overall OCA algorithm.
|
||||
!*/
|
||||
|
||||
void set_subproblem_max_iterations (
|
||||
unsigned long sub_max_iter
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- sub_max_iter > 0
|
||||
ensures
|
||||
- #get_subproblem_max_iterations() == sub_max_iter
|
||||
!*/
|
||||
|
||||
unsigned long get_subproblem_max_iterations (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the maximum number of iterations this object will perform
|
||||
while attempting to solve each quadratic programming subproblem.
|
||||
!*/
|
||||
|
||||
void set_inactive_plane_threshold (
|
||||
unsigned long inactive_thresh
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- inactive_thresh > 0
|
||||
ensures
|
||||
- #get_inactive_plane_threshold() == inactive_thresh
|
||||
!*/
|
||||
|
||||
unsigned long get_inactive_plane_threshold (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- As OCA runs it builds up a set of cutting planes. Typically
|
||||
cutting planes become inactive after a certain point and can then
|
||||
be removed. This function returns the number of iterations of
|
||||
inactivity required before a cutting plane is removed.
|
||||
!*/
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
|
||||
|
||||
|
Loading…
Reference in New Issue