diff --git a/dlib/optimization.h b/dlib/optimization.h index 4f5023ad9..ff454eef0 100644 --- a/dlib/optimization.h +++ b/dlib/optimization.h @@ -6,6 +6,7 @@ #include "optimization/optimization.h" #include "optimization/optimization_bobyqa.h" #include "optimization/optimization_solve_qp_using_smo.h" +#include "optimization/optimization_oca.h" #endif // DLIB_OPTIMIZATIOn_HEADER diff --git a/dlib/optimization/optimization_oca.h b/dlib/optimization/optimization_oca.h new file mode 100644 index 000000000..cffc5fb3b --- /dev/null +++ b/dlib/optimization/optimization_oca.h @@ -0,0 +1,294 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_OPTIMIZATIoN_OCA_H__ +#define DLIB_OPTIMIZATIoN_OCA_H__ + +#include "optimization_oca_abstract.h" + +#include "../matrix.h" +#include "optimization_solve_qp_using_smo.h" +#include + +// ---------------------------------------------------------------------------------------- + +namespace dlib +{ + template + class oca_problem + { + public: + typedef typename matrix_type::type scalar_type; + + virtual ~oca_problem() {} + + virtual void optimization_status ( + scalar_type , + scalar_type , + unsigned long + ) const {} + + virtual bool R_has_lower_bound ( + scalar_type& + ) const { return false; } + + virtual scalar_type get_C ( + ) const = 0; + + virtual long num_dimensions ( + ) const = 0; + + virtual void get_risk ( + matrix_type& current_solution, + scalar_type& risk_value, + matrix_type& risk_subgradient + ) const = 0; + + }; + +// ---------------------------------------------------------------------------------------- + + class oca + { + public: + + oca () + { + eps = 0.001; + max_iter = 1000000; + + sub_eps = 1e-5; + sub_max_iter = 20000; + + inactive_thresh = 15; + } + + void set_epsilon ( + double eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\t void oca::set_epsilon" + << "\n\t epsilon must be greater than 0" + << "\n\t eps_: " << eps_ + << "\n\t this: " << this + ); + + eps = eps_; + } + + double get_epsilon ( + ) const { return eps; } + + void set_max_iterations ( + unsigned long max_iter_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(max_iter_ > 0, + "\t void oca::set_max_iterations" + << "\n\t max iterations must be greater than 0" + << "\n\t max_iter_: " << max_iter_ + << "\n\t this: " << this + ); + + max_iter = max_iter_; + } + + unsigned long get_max_iterations ( + ) const { return max_iter; } + + void set_subproblem_epsilon ( + double eps_ + ) { sub_eps = eps_; } + + double get_subproblem_epsilon ( + ) const { return sub_eps; } + + void set_subproblem_max_iterations ( + unsigned long sub_max_iter_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(sub_max_iter_ > 0, + "\t void oca::set_subproblem_max_iterations" + << "\n\t max iterations must be greater than 0" + << "\n\t sub_max_iter_: " << sub_max_iter_ + << "\n\t this: " << this + ); + + sub_max_iter = sub_max_iter_; + } + + unsigned long get_subproblem_max_iterations ( + ) const { return sub_max_iter; } + + void set_inactive_plane_threshold ( + unsigned long inactive_thresh_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(inactive_thresh_ > 0, + "\t void oca::set_inactive_plane_threshold" + << "\n\t inactive threshold must be greater than 0" + << "\n\t inactive_thresh_: " << inactive_thresh_ + << "\n\t this: " << this + ); + + inactive_thresh = inactive_thresh_; + } + + unsigned long get_inactive_plane_threshold ( + ) const { return inactive_thresh; } + + template < + typename matrix_type + > + void operator() ( + const oca_problem& problem, + matrix_type& w + ) const + { + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef matrix vect_type; + + const scalar_type C = problem.get_C(); + + std::list planes; + std::vector bs, miss_count; + + vect_type temp, alpha, w_cur; + + w.set_size(problem.num_dimensions(), 1); + w = 0; + w_cur = w; + + // The best objective value seen so far. Note also + // that w always contains the best solution seen so far. + scalar_type best_obj = std::numeric_limits::max(); + + // This will hold the cutting plane objective value. This value is + // a lower bound on the true optimal objective value. + scalar_type cp_obj = 0; + + scalar_type R_lower_bound; + if (problem.R_has_lower_bound(R_lower_bound)) + { + // The flat lower bounding plane is always good to have if we know + // what it is. + bs.push_back(R_lower_bound); + planes.push_back(zeros_matrix(w.size(),1)); + miss_count.push_back(0); + } + + matrix K; + + for (unsigned long iter = 0; iter < max_iter; ++iter) + { + + // add the next cutting plane + scalar_type cur_risk; + planes.resize(planes.size()+1); + problem.get_risk(w_cur, cur_risk, planes.back()); + bs.push_back(cur_risk - dot(w_cur,planes.back())); + miss_count.push_back(0); + + // Check the objective value at w_cur and see if it is better than + // the best seen so far. + const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk; + if (cur_obj < best_obj) + { + best_obj = cur_obj; + w = w_cur; + } + + // check stopping condition and stop if we can + if (best_obj - cp_obj <= eps) + break; + + + // compute kernel matrix for all the planes + K.set_size(planes.size(), planes.size()); + long rr = 0; + for (typename std::list::iterator r = planes.begin(); r != planes.end(); ++r) + { + long cc = rr; + for (typename std::list::iterator c = r; c != planes.end(); ++c) + { + K(rr,cc) = dot(*r, *c); + K(cc,rr) = K(rr,cc); + ++cc; + } + ++rr; + } + + alpha = uniform_matrix(planes.size(),1, C/planes.size()); + + // solve the cutting plane subproblem for the next w_cur + solve_qp_using_smo(K, vector_to_matrix(bs), alpha, static_cast(sub_eps), sub_max_iter); + + // construct the w_cur that minimized the subproblem. + w_cur = 0; + rr = 0; + for (typename std::list::iterator i = planes.begin(); i != planes.end(); ++i) + { + if (alpha(rr) != 0) + { + w_cur -= alpha(rr)*(*i); + miss_count[rr] = 0; + } + else + { + miss_count[rr] += 1; + } + ++rr; + } + + // Compute the lower bound on the true objective given to us by the cutting + // plane subproblem. + cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs); + + // check stopping condition and stop if we can + if (best_obj - cp_obj <= eps) + break; + + // report current status + problem.optimization_status(best_obj, best_obj - cp_obj, planes.size()); + + // If it has been a while since a cutting plane was an active constraint then + // we should throw it away. + while (max(vector_to_matrix(miss_count)) >= inactive_thresh) + { + long idx = index_of_max(vector_to_matrix(miss_count)); + typename std::list::iterator i0 = planes.begin(); + advance(i0, idx); + planes.erase(i0); + bs.erase(bs.begin()+idx); + miss_count.erase(miss_count.begin()+idx); + } + + } + + // report current status + problem.optimization_status(best_obj, best_obj - cp_obj, planes.size()); + + } + + private: + + double eps; + double sub_eps; + + unsigned long max_iter; + unsigned long sub_max_iter; + + unsigned long inactive_thresh; + }; +} + +// ---------------------------------------------------------------------------------------- + +#endif // DLIB_OPTIMIZATIoN_OCA_H__ + diff --git a/dlib/optimization/optimization_oca_abstract.h b/dlib/optimization/optimization_oca_abstract.h new file mode 100644 index 000000000..02c04fe32 --- /dev/null +++ b/dlib/optimization/optimization_oca_abstract.h @@ -0,0 +1,240 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__ +#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__ + +// ---------------------------------------------------------------------------------------- + +namespace dlib +{ + template + class oca_problem + { + /*! + REQUIREMENTS ON matrix_type + - matrix_type == a dlib::matrix capable of storing column vectors + + WHAT THIS OBJECT REPRESENTS + This object is the interface used to define the optimization + problems solved by the oca optimizer defined later in this file. + + OCA solves optimization problems with the following form: + Minimize: f(w) == 0.5*dot(w,w) + C*R(w) + + Where R(w) is a convex function and C > 0 + !*/ + + public: + + typedef typename matrix_type::type scalar_type; + + virtual ~oca_problem() {} + + virtual void optimization_status ( + scalar_type current_objective_value, + scalar_type current_error_gap, + unsigned long num_cutting_planes + ) const {} + /*! + This function is called by the OCA optimizer each iteration. It + exists to allow the user to monitor the progress of the optimization. + !*/ + + virtual bool R_has_lower_bound ( + scalar_type& lower_bound + ) const { return false; } + /*! + ensures + - if (R(w) >= a constant for all values of w) then + - returns true + - #lower_bound == the constant that lower bounds R(w) + - else + - returns false + !*/ + + virtual scalar_type get_C ( + ) const = 0; + /*! + ensures + - returns the C parameter + !*/ + + virtual long num_dimensions ( + ) const = 0; + /*! + ensures + - returns the number of free variables in this optimization problem + !*/ + + virtual void get_risk ( + matrix_type& current_solution, + scalar_type& risk_value, + matrix_type& risk_subgradient + ) const = 0; + /*! + requires + - is_col_vector(current_solution) == true + - current_solution.size() == num_dimensions() + ensures + - #current_solution will be set to one of the following: + - current_solution (i.e. it won't be modified at all) + - The result of a line search passing through current_solution. + - #risk_value == R(#current_solution) + - #risk_subgradient == an element of the subgradient of R() at the + point #current_solution + - Note that risk_value and risk_subgradient are NOT multiplied by get_C() + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + class oca + { + /*! + INITIAL VALUE + - get_epsilon() == 0.001 + - get_max_iterations() == 1000000 + - get_subproblem_epsilon() == 1e-5 + - get_subproblem_max_iterations() == 20000 + - get_inactive_plane_threshold() == 15 + + WHAT THIS OBJECT REPRESENTS + This object is a tool for solving the optimization problem defined above + by the oca_problem abstract class. + + For reference, OCA solves optimization problems with the following form: + Minimize: f(w) == 0.5*dot(w,w) + C*R(w) + + Where R(w) is a convex function and C > 0 + + + For a detailed discussion you should consult the following papers: + Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization + Vojtěch Franc, Sören Sonnenburg; 10(Oct):2157--2192, 2009. + + Bundle Methods for Regularized Risk Minimization + Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311−365, 2010. + !*/ + public: + + oca ( + ); + /*! + ensures + - this object is properly initialized + !*/ + + template < + typename matrix_type + > + void operator() ( + const oca_problem& problem, + matrix_type& w + ) const; + /*! + ensures + - solves the given oca problem and stores the solution in #w + !*/ + + void set_epsilon ( + double eps_ + ); + /*! + requires + - eps > 0 + ensures + - #get_epsilon() == eps + !*/ + + double get_epsilon ( + ) const; + /*! + ensures + - returns the error epsilon that determines when training should stop. + Smaller values may result in a more accurate solution but may cause + the algorithm to take longer to execute. + !*/ + + void set_max_iterations ( + unsigned long max_iter + ); + /*! + requires + - max_iter > 0 + ensures + - #get_max_iterations() == max_iter + !*/ + + unsigned long get_max_iterations ( + ) const; + /*! + ensures + - returns the maximum number of iterations this object will perform + while attempting to solve an oca_problem. + !*/ + + void set_subproblem_epsilon ( + double eps + ); + /*! + requires + - eps > 0 + ensures + - #get_subproblem_epsilon() == eps + !*/ + + double get_subproblem_epsilon ( + ) const; + /*! + ensures + - returns the accuracy used in solving the quadratic programming + subproblem that is part of the overall OCA algorithm. + !*/ + + void set_subproblem_max_iterations ( + unsigned long sub_max_iter + ); + /*! + requires + - sub_max_iter > 0 + ensures + - #get_subproblem_max_iterations() == sub_max_iter + !*/ + + unsigned long get_subproblem_max_iterations ( + ) const; + /*! + ensures + - returns the maximum number of iterations this object will perform + while attempting to solve each quadratic programming subproblem. + !*/ + + void set_inactive_plane_threshold ( + unsigned long inactive_thresh + ); + /*! + requires + - inactive_thresh > 0 + ensures + - #get_inactive_plane_threshold() == inactive_thresh + !*/ + + unsigned long get_inactive_plane_threshold ( + ) const; + /*! + ensures + - As OCA runs it builds up a set of cutting planes. Typically + cutting planes become inactive after a certain point and can then + be removed. This function returns the number of iterations of + inactivity required before a cutting plane is removed. + !*/ + + }; +} + +// ---------------------------------------------------------------------------------------- + +#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_H__ + +