Moved the new OCA implementation into dlib proper.

--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403487
2010-02-25 03:43:17 +00:00 · 2010-02-25 03:43:17 +00:00 · b934a51286
parent e8baa7d1a4
commit b934a51286
3 changed files with 535 additions and 0 deletions
--- a/dlib/optimization.h
+++ b/dlib/optimization.h
@ -6,6 +6,7 @@
 #include "optimization/optimization.h"
 #include "optimization/optimization_bobyqa.h"
 #include "optimization/optimization_solve_qp_using_smo.h"
+#include "optimization/optimization_oca.h"

 #endif // DLIB_OPTIMIZATIOn_HEADER

--- a/dlib/optimization/optimization_oca.h
+++ b/dlib/optimization/optimization_oca.h
@ -0,0 +1,294 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_OPTIMIZATIoN_OCA_H__
+#define DLIB_OPTIMIZATIoN_OCA_H__
+
+#include "optimization_oca_abstract.h"
+
+#include "../matrix.h"
+#include "optimization_solve_qp_using_smo.h"
+#include <list>
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+    template <typename matrix_type>
+    class oca_problem
+    {
+    public:
+        typedef typename matrix_type::type scalar_type;
+
+        virtual ~oca_problem() {}
+
+        virtual void optimization_status (
+            scalar_type ,
+            scalar_type ,
+            unsigned long 
+        ) const {}
+
+        virtual bool R_has_lower_bound (
+            scalar_type& 
+        ) const { return false; }
+
+        virtual scalar_type get_C (
+        ) const = 0;
+
+        virtual long num_dimensions (
+        ) const = 0;
+
+        virtual void get_risk (
+            matrix_type& current_solution,
+            scalar_type& risk_value,
+            matrix_type& risk_subgradient
+        ) const = 0;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class oca
+    {
+    public:
+
+        oca () 
+        {
+            eps = 0.001;
+            max_iter = 1000000;
+
+            sub_eps = 1e-5;
+            sub_max_iter = 20000;
+
+            inactive_thresh = 15;
+        }
+
+        void set_epsilon (
+            double eps_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void oca::set_epsilon"
+                << "\n\t epsilon must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_; 
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        void set_max_iterations (
+            unsigned long max_iter_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(max_iter_ > 0,
+                "\t void oca::set_max_iterations"
+                << "\n\t max iterations must be greater than 0"
+                << "\n\t max_iter_: " << max_iter_
+                << "\n\t this: " << this
+                );
+
+            max_iter = max_iter_; 
+        }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iter; }
+
+        void set_subproblem_epsilon (
+            double eps_
+        ) { sub_eps = eps_; }
+
+        double get_subproblem_epsilon (
+        ) const { return sub_eps; }
+
+        void set_subproblem_max_iterations (
+            unsigned long sub_max_iter_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(sub_max_iter_ > 0,
+                "\t void oca::set_subproblem_max_iterations"
+                << "\n\t max iterations must be greater than 0"
+                << "\n\t sub_max_iter_: " << sub_max_iter_
+                << "\n\t this: " << this
+                );
+
+            sub_max_iter = sub_max_iter_; 
+        }
+
+        unsigned long get_subproblem_max_iterations (
+        ) const { return sub_max_iter; }
+
+        void set_inactive_plane_threshold (
+            unsigned long inactive_thresh_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(inactive_thresh_ > 0,
+                "\t void oca::set_inactive_plane_threshold"
+                << "\n\t inactive threshold must be greater than 0"
+                << "\n\t inactive_thresh_: " << inactive_thresh_
+                << "\n\t this: " << this
+                );
+
+            inactive_thresh = inactive_thresh_; 
+        }
+
+        unsigned long get_inactive_plane_threshold (
+        ) const { return inactive_thresh; }
+
+        template <
+            typename matrix_type
+            >
+        void operator() (
+            const oca_problem<matrix_type>& problem,
+            matrix_type& w
+        ) const
+        {
+            typedef typename matrix_type::type scalar_type;
+            typedef typename matrix_type::layout_type layout_type;
+            typedef typename matrix_type::mem_manager_type mem_manager_type;
+            typedef matrix<scalar_type,0,1,mem_manager_type, layout_type> vect_type;
+
+            const scalar_type C = problem.get_C();
+
+            std::list<vect_type> planes;
+            std::vector<scalar_type> bs, miss_count;
+
+            vect_type temp, alpha, w_cur;
+
+            w.set_size(problem.num_dimensions(), 1);
+            w = 0;
+            w_cur = w;
+
+            // The best objective value seen so far.   Note also
+            // that w always contains the best solution seen so far.
+            scalar_type best_obj = std::numeric_limits<scalar_type>::max();
+
+            // This will hold the cutting plane objective value.  This value is
+            // a lower bound on the true optimal objective value.
+            scalar_type cp_obj = 0;
+
+            scalar_type R_lower_bound;
+            if (problem.R_has_lower_bound(R_lower_bound))
+            {
+                // The flat lower bounding plane is always good to have if we know
+                // what it is.
+                bs.push_back(R_lower_bound);
+                planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
+                miss_count.push_back(0);
+            }
+
+            matrix<scalar_type,0,0,mem_manager_type, layout_type> K;
+
+            for (unsigned long iter = 0; iter < max_iter; ++iter)
+            {
+
+                // add the next cutting plane
+                scalar_type cur_risk;
+                planes.resize(planes.size()+1);
+                problem.get_risk(w_cur, cur_risk, planes.back());
+                bs.push_back(cur_risk - dot(w_cur,planes.back()));
+                miss_count.push_back(0);
+
+                // Check the objective value at w_cur and see if it is better than
+                // the best seen so far.
+                const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk;
+                if (cur_obj < best_obj)
+                {
+                    best_obj = cur_obj;
+                    w = w_cur;
+                }
+
+                // check stopping condition and stop if we can
+                if (best_obj - cp_obj <= eps)
+                    break;
+
+
+                // compute kernel matrix for all the planes
+                K.set_size(planes.size(), planes.size());
+                long rr = 0;
+                for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
+                {
+                    long cc = rr;
+                    for (typename std::list<vect_type>::iterator c = r; c != planes.end(); ++c)
+                    {
+                        K(rr,cc) = dot(*r, *c);
+                        K(cc,rr) = K(rr,cc);
+                        ++cc;
+                    }
+                    ++rr;
+                }
+
+                alpha = uniform_matrix<scalar_type>(planes.size(),1, C/planes.size());
+
+                // solve the cutting plane subproblem for the next w_cur
+                solve_qp_using_smo(K, vector_to_matrix(bs), alpha, static_cast<scalar_type>(sub_eps), sub_max_iter); 
+
+                // construct the w_cur that minimized the subproblem.
+                w_cur = 0;
+                rr = 0;
+                for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
+                {
+                    if (alpha(rr) != 0)
+                    {
+                        w_cur -= alpha(rr)*(*i);
+                        miss_count[rr] = 0;
+                    }
+                    else
+                    {
+                        miss_count[rr] += 1;
+                    }
+                    ++rr;
+                }
+
+                // Compute the lower bound on the true objective given to us by the cutting 
+                // plane subproblem.
+                cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs);
+
+                // check stopping condition and stop if we can
+                if (best_obj - cp_obj <= eps)
+                    break;
+
+                // report current status
+                problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
+
+                // If it has been a while since a cutting plane was an active constraint then
+                // we should throw it away.
+                while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
+                {
+                    long idx = index_of_max(vector_to_matrix(miss_count));
+                    typename std::list<vect_type>::iterator i0 = planes.begin();
+                    advance(i0, idx);
+                    planes.erase(i0);
+                    bs.erase(bs.begin()+idx);
+                    miss_count.erase(miss_count.begin()+idx);
+                }
+
+            }
+
+            // report current status
+            problem.optimization_status(best_obj, best_obj - cp_obj, planes.size());
+
+        }
+
+    private:
+
+        double eps;
+        double sub_eps;
+
+        unsigned long max_iter;
+        unsigned long sub_max_iter;
+
+        unsigned long inactive_thresh;
+    };
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_OPTIMIZATIoN_OCA_H__
+
--- a/dlib/optimization/optimization_oca_abstract.h
+++ b/dlib/optimization/optimization_oca_abstract.h
@ -0,0 +1,240 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
+#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+    template <typename matrix_type>
+    class oca_problem
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                - matrix_type == a dlib::matrix capable of storing column vectors
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is the interface used to define the optimization 
+                problems solved by the oca optimizer defined later in this file.
+
+                OCA solves optimization problems with the following form:
+                    Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
+
+                    Where R(w) is a convex function and C > 0
+        !*/
+
+    public:
+
+        typedef typename matrix_type::type scalar_type;
+
+        virtual ~oca_problem() {}
+
+        virtual void optimization_status (
+            scalar_type current_objective_value,
+            scalar_type current_error_gap,
+            unsigned long num_cutting_planes
+        ) const {}
+        /*!
+            This function is called by the OCA optimizer each iteration.  It
+            exists to allow the user to monitor the progress of the optimization.
+        !*/
+
+        virtual bool R_has_lower_bound (
+            scalar_type& lower_bound
+        ) const { return false; }
+        /*!
+            ensures
+                - if (R(w) >= a constant for all values of w) then
+                    - returns true
+                    - #lower_bound == the constant that lower bounds R(w)
+                - else
+                    - returns false
+        !*/
+
+        virtual scalar_type get_C (
+        ) const = 0;
+        /*!
+            ensures
+                - returns the C parameter
+        !*/
+
+        virtual long num_dimensions (
+        ) const = 0;
+        /*!
+            ensures
+                - returns the number of free variables in this optimization problem
+        !*/
+
+        virtual void get_risk (
+            matrix_type& current_solution,
+            scalar_type& risk_value,
+            matrix_type& risk_subgradient
+        ) const = 0;
+        /*!
+            requires
+                - is_col_vector(current_solution) == true
+                - current_solution.size() == num_dimensions()
+            ensures
+                - #current_solution will be set to one of the following:
+                    - current_solution (i.e. it won't be modified at all)
+                    - The result of a line search passing through current_solution.  
+                - #risk_value == R(#current_solution) 
+                - #risk_subgradient == an element of the subgradient of R() at the 
+                  point #current_solution
+                - Note that risk_value and risk_subgradient are NOT multiplied by get_C()
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class oca
+    {
+        /*!
+            INITIAL VALUE
+                - get_epsilon() == 0.001
+                - get_max_iterations() == 1000000
+                - get_subproblem_epsilon() == 1e-5
+                - get_subproblem_max_iterations() == 20000
+                - get_inactive_plane_threshold() == 15
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for solving the optimization problem defined above
+                by the oca_problem abstract class.  
+
+                For reference, OCA solves optimization problems with the following form:
+                    Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
+
+                    Where R(w) is a convex function and C > 0
+
+
+                For a detailed discussion you should consult the following papers:
+                    Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
+                        Vojtěch Franc, Sören Sonnenburg; 10(Oct):2157--2192, 2009. 
+
+                    Bundle Methods for Regularized Risk Minimization
+                        Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311−365, 2010. 
+        !*/
+    public:
+
+        oca (
+        ); 
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        template <
+            typename matrix_type
+            >
+        void operator() (
+            const oca_problem<matrix_type>& problem,
+            matrix_type& w
+        ) const;
+        /*!
+            ensures
+                - solves the given oca problem and stores the solution in #w
+        !*/
+
+        void set_epsilon (
+            double eps_
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        double get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but may cause
+                  the algorithm to take longer to execute.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ); 
+        /*!
+            requires
+                - max_iter > 0
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of iterations this object will perform
+                  while attempting to solve an oca_problem.
+        !*/
+
+        void set_subproblem_epsilon (
+            double eps
+        ); 
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_subproblem_epsilon() == eps 
+        !*/
+
+        double get_subproblem_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the accuracy used in solving the quadratic programming
+                  subproblem that is part of the overall OCA algorithm.
+        !*/
+
+        void set_subproblem_max_iterations (
+            unsigned long sub_max_iter
+        ); 
+        /*!
+            requires
+                - sub_max_iter > 0
+            ensures
+                - #get_subproblem_max_iterations() == sub_max_iter
+        !*/
+
+        unsigned long get_subproblem_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations this object will perform
+                  while attempting to solve each quadratic programming subproblem.
+        !*/
+
+        void set_inactive_plane_threshold (
+            unsigned long inactive_thresh
+        ); 
+        /*!
+            requires
+                - inactive_thresh > 0
+            ensures
+                - #get_inactive_plane_threshold() == inactive_thresh
+        !*/
+
+        unsigned long get_inactive_plane_threshold (
+        ) const; 
+        /*!
+            ensures
+                - As OCA runs it builds up a set of cutting planes.  Typically
+                  cutting planes become inactive after a certain point and can then
+                  be removed.  This function returns the number of iterations of
+                  inactivity required before a cutting plane is removed.
+        !*/
+
+    };
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_H__
+
+