Moved the new OCA implementation into dlib proper.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403487
This commit is contained in:
Davis King 2010-02-25 03:43:17 +00:00
parent e8baa7d1a4
commit b934a51286
3 changed files with 535 additions and 0 deletions

View File

@ -6,6 +6,7 @@
#include "optimization/optimization.h"
#include "optimization/optimization_bobyqa.h"
#include "optimization/optimization_solve_qp_using_smo.h"
#include "optimization/optimization_oca.h"
#endif // DLIB_OPTIMIZATIOn_HEADER

View File

@ -0,0 +1,294 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIoN_OCA_H__
#define DLIB_OPTIMIZATIoN_OCA_H__
#include "optimization_oca_abstract.h"
#include "../matrix.h"
#include "optimization_solve_qp_using_smo.h"
#include <list>
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <typename matrix_type>
class oca_problem
{
public:
typedef typename matrix_type::type scalar_type;
virtual ~oca_problem() {}
virtual void optimization_status (
scalar_type ,
scalar_type ,
unsigned long
) const {}
virtual bool R_has_lower_bound (
scalar_type&
) const { return false; }
virtual scalar_type get_C (
) const = 0;
virtual long num_dimensions (
) const = 0;
virtual void get_risk (
matrix_type& current_solution,
scalar_type& risk_value,
matrix_type& risk_subgradient
) const = 0;
};
// ----------------------------------------------------------------------------------------
class oca
{
public:
oca ()
{
eps = 0.001;
max_iter = 1000000;
sub_eps = 1e-5;
sub_max_iter = 20000;
inactive_thresh = 15;
}
void set_epsilon (
double eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\t void oca::set_epsilon"
<< "\n\t epsilon must be greater than 0"
<< "\n\t eps_: " << eps_
<< "\n\t this: " << this
);
eps = eps_;
}
double get_epsilon (
) const { return eps; }
void set_max_iterations (
unsigned long max_iter_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(max_iter_ > 0,
"\t void oca::set_max_iterations"
<< "\n\t max iterations must be greater than 0"
<< "\n\t max_iter_: " << max_iter_
<< "\n\t this: " << this
);
max_iter = max_iter_;
}
unsigned long get_max_iterations (
) const { return max_iter; }
void set_subproblem_epsilon (
double eps_
) { sub_eps = eps_; }
double get_subproblem_epsilon (
) const { return sub_eps; }
void set_subproblem_max_iterations (
unsigned long sub_max_iter_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(sub_max_iter_ > 0,
"\t void oca::set_subproblem_max_iterations"
<< "\n\t max iterations must be greater than 0"
<< "\n\t sub_max_iter_: " << sub_max_iter_
<< "\n\t this: " << this
);
sub_max_iter = sub_max_iter_;
}
unsigned long get_subproblem_max_iterations (
) const { return sub_max_iter; }
void set_inactive_plane_threshold (
unsigned long inactive_thresh_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(inactive_thresh_ > 0,
"\t void oca::set_inactive_plane_threshold"
<< "\n\t inactive threshold must be greater than 0"
<< "\n\t inactive_thresh_: " << inactive_thresh_
<< "\n\t this: " << this
);
inactive_thresh = inactive_thresh_;
}
unsigned long get_inactive_plane_threshold (
) const { return inactive_thresh; }
template <
typename matrix_type
>
void operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w
) const
{
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef matrix<scalar_type,0,1,mem_manager_type, layout_type> vect_type;
const scalar_type C = problem.get_C();
std::list<vect_type> planes;
std::vector<scalar_type> bs, miss_count;
vect_type temp, alpha, w_cur;
w.set_size(problem.num_dimensions(), 1);
w = 0;
w_cur = w;
// The best objective value seen so far. Note also
// that w always contains the best solution seen so far.
scalar_type best_obj = std::numeric_limits<scalar_type>::max();
// This will hold the cutting plane objective value. This value is
// a lower bound on the true optimal objective value.
scalar_type cp_obj = 0;
scalar_type R_lower_bound;
if (problem.R_has_lower_bound(R_lower_bound))
{
// The flat lower bounding plane is always good to have if we know
// what it is.
bs.push_back(R_lower_bound);
planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
miss_count.push_back(0);
}
matrix<scalar_type,0,0,mem_manager_type, layout_type> K;
for (unsigned long iter = 0; iter < max_iter; ++iter)
{
// add the next cutting plane
scalar_type cur_risk;
planes.resize(planes.size()+1);
problem.get_risk(w_cur, cur_risk, planes.back());
bs.push_back(cur_risk - dot(w_cur,planes.back()));
miss_count.push_back(0);
// Check the objective value at w_cur and see if it is better than
// the best seen so far.
const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk;
if (cur_obj < best_obj)
{
best_obj = cur_obj;
w = w_cur;
}
// check stopping condition and stop if we can
if (best_obj - cp_obj <= eps)
break;
// compute kernel matrix for all the planes
K.set_size(planes.size(), planes.size());
long rr = 0;
for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
{
long cc = rr;
for (typename std::list<vect_type>::iterator c = r; c != planes.end(); ++c)
{
K(rr,cc) = dot(*r, *c);
K(cc,rr) = K(rr,cc);
++cc;
}
++rr;
}
alpha = uniform_matrix<scalar_type>(planes.size(),1, C/planes.size());
// solve the cutting plane subproblem for the next w_cur
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, static_cast<scalar_type>(sub_eps), sub_max_iter);
// construct the w_cur that minimized the subproblem.
w_cur = 0;
rr = 0;
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
{
if (alpha(rr) != 0)
{
w_cur -= alpha(rr)*(*i);
miss_count[rr] = 0;
}
else
{
miss_count[rr] += 1;
}
++rr;
}
// Compute the lower bound on the true objective given to us by the cutting
// plane subproblem.
cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs);
// check stopping condition and stop if we can
if (best_obj - cp_obj <= eps)
break;
// report current status
problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
// If it has been a while since a cutting plane was an active constraint then
// we should throw it away.
while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
{
long idx = index_of_max(vector_to_matrix(miss_count));
typename std::list<vect_type>::iterator i0 = planes.begin();
advance(i0, idx);
planes.erase(i0);
bs.erase(bs.begin()+idx);
miss_count.erase(miss_count.begin()+idx);
}
}
// report current status
problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
}
private:
double eps;
double sub_eps;
unsigned long max_iter;
unsigned long sub_max_iter;
unsigned long inactive_thresh;
};
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_OPTIMIZATIoN_OCA_H__

View File

@ -0,0 +1,240 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <typename matrix_type>
class oca_problem
{
/*!
REQUIREMENTS ON matrix_type
- matrix_type == a dlib::matrix capable of storing column vectors
WHAT THIS OBJECT REPRESENTS
This object is the interface used to define the optimization
problems solved by the oca optimizer defined later in this file.
OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
Where R(w) is a convex function and C > 0
!*/
public:
typedef typename matrix_type::type scalar_type;
virtual ~oca_problem() {}
virtual void optimization_status (
scalar_type current_objective_value,
scalar_type current_error_gap,
unsigned long num_cutting_planes
) const {}
/*!
This function is called by the OCA optimizer each iteration. It
exists to allow the user to monitor the progress of the optimization.
!*/
virtual bool R_has_lower_bound (
scalar_type& lower_bound
) const { return false; }
/*!
ensures
- if (R(w) >= a constant for all values of w) then
- returns true
- #lower_bound == the constant that lower bounds R(w)
- else
- returns false
!*/
virtual scalar_type get_C (
) const = 0;
/*!
ensures
- returns the C parameter
!*/
virtual long num_dimensions (
) const = 0;
/*!
ensures
- returns the number of free variables in this optimization problem
!*/
virtual void get_risk (
matrix_type& current_solution,
scalar_type& risk_value,
matrix_type& risk_subgradient
) const = 0;
/*!
requires
- is_col_vector(current_solution) == true
- current_solution.size() == num_dimensions()
ensures
- #current_solution will be set to one of the following:
- current_solution (i.e. it won't be modified at all)
- The result of a line search passing through current_solution.
- #risk_value == R(#current_solution)
- #risk_subgradient == an element of the subgradient of R() at the
point #current_solution
- Note that risk_value and risk_subgradient are NOT multiplied by get_C()
!*/
};
// ----------------------------------------------------------------------------------------
class oca
{
/*!
INITIAL VALUE
- get_epsilon() == 0.001
- get_max_iterations() == 1000000
- get_subproblem_epsilon() == 1e-5
- get_subproblem_max_iterations() == 20000
- get_inactive_plane_threshold() == 15
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the optimization problem defined above
by the oca_problem abstract class.
For reference, OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
Where R(w) is a convex function and C > 0
For a detailed discussion you should consult the following papers:
Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
Vojtěch Franc, Sören Sonnenburg; 10(Oct):2157--2192, 2009.
Bundle Methods for Regularized Risk Minimization
Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311365, 2010.
!*/
public:
oca (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename matrix_type
>
void operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w
) const;
/*!
ensures
- solves the given oca problem and stores the solution in #w
!*/
void set_epsilon (
double eps_
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but may cause
the algorithm to take longer to execute.
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
requires
- max_iter > 0
ensures
- #get_max_iterations() == max_iter
!*/
unsigned long get_max_iterations (
) const;
/*!
ensures
- returns the maximum number of iterations this object will perform
while attempting to solve an oca_problem.
!*/
void set_subproblem_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_subproblem_epsilon() == eps
!*/
double get_subproblem_epsilon (
) const;
/*!
ensures
- returns the accuracy used in solving the quadratic programming
subproblem that is part of the overall OCA algorithm.
!*/
void set_subproblem_max_iterations (
unsigned long sub_max_iter
);
/*!
requires
- sub_max_iter > 0
ensures
- #get_subproblem_max_iterations() == sub_max_iter
!*/
unsigned long get_subproblem_max_iterations (
) const;
/*!
ensures
- returns the maximum number of iterations this object will perform
while attempting to solve each quadratic programming subproblem.
!*/
void set_inactive_plane_threshold (
unsigned long inactive_thresh
);
/*!
requires
- inactive_thresh > 0
ensures
- #get_inactive_plane_threshold() == inactive_thresh
!*/
unsigned long get_inactive_plane_threshold (
) const;
/*!
ensures
- As OCA runs it builds up a set of cutting planes. Typically
cutting planes become inactive after a certain point and can then
be removed. This function returns the number of iterations of
inactivity required before a cutting plane is removed.
!*/
};
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_H__