From 61b6c1ff78a8ca1a0880b350294f9c259de73057 Mon Sep 17 00:00:00 2001 From: Davis King Date: Sun, 12 Nov 2017 15:16:25 -0500 Subject: [PATCH] Added solve_trust_region_subproblem_bounded() --- dlib/optimization/optimization_trust_region.h | 162 ++++++++++++++++++ .../optimization_trust_region_abstract.h | 51 ++++++ dlib/test/optimization.cpp | 23 +++ 3 files changed, 236 insertions(+) diff --git a/dlib/optimization/optimization_trust_region.h b/dlib/optimization/optimization_trust_region.h index 8638622db..5f0ad897f 100644 --- a/dlib/optimization/optimization_trust_region.h +++ b/dlib/optimization/optimization_trust_region.h @@ -225,6 +225,168 @@ namespace dlib return max_iter+1; } +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename EXP1, + typename EXP2, + typename EXP3 + > + bool bounds_violated ( + const matrix_exp& v, + const matrix_exp& l, + const matrix_exp& u + ) + { + DLIB_ASSERT(v.nr() == l.nr() && v.nr() == u.nr()); + DLIB_ASSERT(v.nc() == l.nc() && v.nc() == u.nc()); + for (long r = 0; r < v.nr(); ++r) + { + for (long c = 0; c < v.nc(); c++) + { + if (!(l(r,c) <= v(r,c) && v(r,c) <= u(r,c))) + return true; + } + } + return false; + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename EXP1, + typename EXP2, + typename T, long NR, long NC, typename MM, typename L, + typename EXP3 + > + void solve_trust_region_subproblem_bounded ( + const matrix_exp& B_, + const matrix_exp& g_, + const typename EXP1::type radius_, + matrix& p_, + double eps, + unsigned long max_iter, + const matrix_exp& lower_, + const matrix_exp& upper_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(B_.nr() == B_.nc() && is_col_vector(g_) && g_.size() == B_.nr(), + "\t unsigned long solve_trust_region_subproblem_bounded()" + << "\n\t invalid arguments were given to this function" + << "\n\t B_.nr(): " << B_.nr() + << "\n\t B_.nc(): " << B_.nc() + << "\n\t is_col_vector(g_): " << is_col_vector(g_) + << "\n\t g_.size(): " << g_.size() + ); + DLIB_ASSERT(radius_ > 0 && eps > 0 && max_iter > 0, + "\t unsigned long solve_trust_region_subproblem_bounded()" + << "\n\t invalid arguments were given to this function" + << "\n\t radius_: " << radius_ + << "\n\t eps: " << eps + << "\n\t max_iter: " << max_iter + ); + DLIB_ASSERT(is_col_vector(lower_) && lower_.size() == g_.size()); + DLIB_ASSERT(is_col_vector(upper_) && upper_.size() == g_.size()); + DLIB_ASSERT(max(upper_-lower_) >= 0); + // make sure the problem is feasible. That is, there should be a point inside the + // lower and upper bounds that has a norm <= radius_ + DLIB_ASSERT(length(clamp(zeros_matrix(lower_),lower_,upper_)) <= radius_, + "The lower and upper bounds are incompatible with the radius since there is no point within the bounds with a norm less than the radius."); + + // We are going to solve this by greedily finding the most violated bound constraint, + // locking that variable to its constrained value, removing it from the problem, + // and then resolving. We do that until no more constraint violations are present. + + solve_trust_region_subproblem(B_,g_,radius_,p_,eps,max_iter); + + + // just stop here if all the bounds are satisfied. + if (!impl::bounds_violated(p_, lower_, upper_)) + return; + + matrix B = matrix_cast(B_); + matrix g = matrix_cast(g_); + double radius = radius_; + matrix p = matrix_cast(p_); + matrix lower = matrix_cast(lower_); + matrix upper = matrix_cast(upper_); + + // keep a table that tells us how to map any reduced QP back to the original QP + std::vector idxs(g.size()); + for (size_t i = 0; i < idxs.size(); ++i) + idxs[i] = i; + + + // while we haven't found a p that satisfies the bounds constraints + while(impl::bounds_violated(p, lower, upper) ) + { + // Find the most violated variable and fix its value to a constant (the bound + // value). + long most_violated_idx = 0; + double max_violation = 0; + double bounded_value = 0; + for (long i = 0; i < lower.size(); ++i) + { + if (!(lower(i) <= p(i) && p(i) <= upper(i))) + { + if (lower(i)-p(i) > max_violation) + { + max_violation = lower(i)-p(i); + most_violated_idx = i; + bounded_value = lower(i); + } + else if (p(i)-upper(i) > max_violation) + { + max_violation = p(i)-upper(i); + most_violated_idx = i; + bounded_value = upper(i); + } + } + } + + // assign this variable to its final value. + p_(idxs[most_violated_idx]) = bounded_value; + + + // now reduce the QP by removing the variable p_(idxs[most_violated_idx]). + idxs.erase(idxs.begin()+most_violated_idx); + // we are out of variables to remove since everything is at bounds. + if (idxs.size() == 0) + break; + + lower = remove_row(lower,most_violated_idx); + upper = remove_row(upper,most_violated_idx); + g += colm(B,most_violated_idx)*bounded_value; + g = remove_row(g,most_violated_idx); + p = remove_row(p,most_violated_idx); + B = removerc(B,most_violated_idx, most_violated_idx); + + // Removing a variable changes the radius, so we have to subtract the bounded + // value from the radius so as to not change the effective radius for the whole + // problem. + double squared_radius = radius*radius - bounded_value*bounded_value; + if (squared_radius <= 0) + { + p = 0; + break; + } + radius = std::sqrt(squared_radius); + + + solve_trust_region_subproblem(B,g,radius,p,eps,max_iter); + } + + // assign the non-bound-constrained variables to their final values + for (size_t i = 0; i < idxs.size(); ++i) + p_(idxs[i]) = p(i); + + } + +// ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < diff --git a/dlib/optimization/optimization_trust_region_abstract.h b/dlib/optimization/optimization_trust_region_abstract.h index ea96d15c8..e705fcd90 100644 --- a/dlib/optimization/optimization_trust_region_abstract.h +++ b/dlib/optimization/optimization_trust_region_abstract.h @@ -47,6 +47,57 @@ namespace dlib the radius constraint is active and std::abs(length(#p)-radius)/radius <= eps. !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename EXP1, + typename EXP2, + typename T, long NR, long NC, typename MM, typename L, + typename EXP3 + > + void solve_trust_region_subproblem_bounded ( + const matrix_exp& B, + const matrix_exp& g, + const typename EXP1::type radius, + matrix& p, + double eps, + unsigned long max_iter, + const matrix_exp& lower, + const matrix_exp& upper + ); + /*! + requires + - B == trans(B) + (i.e. B should be a symmetric matrix) + - B.nr() == B.nc() + - is_col_vector(g) == true + - is_col_vector(lower) == true + - is_col_vector(upper) == true + - g.size() == B.nr() + - lower.size() == B.nr() + - upper.size() == B.nr() + - p is capable of containing a column vector the size of g + (i.e. p = g; should be a legal expression) + - radius > 0 + - eps > 0 + - max_iter > 0 + - min(upper-lower) >= 0 + - length(clamp(zeros_matrix(lower),lower,upper)) <= radius + (i.e. the lower and upper bounds can't exclude all points with the desired radius.) + ensures + - This function solves the following optimization problem: + Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p + subject to the following constraint: + - length(p) <= radius + - lower(i) <= p(i) <= upper(i), for all i + - Solves the problem to eps accuracy. We do this by greedily finding the most + violated bound constraint, locking that variable to its constrained value, removing + it from the problem, and then resolving. We do that until no more constraint + violations are present. Each time we just call solve_trust_region_subproblem() + to get the solution and pass eps and max_iter directly to these calls to + solve_trust_region_subproblem(). + !*/ + // ---------------------------------------------------------------------------------------- class function_model diff --git a/dlib/test/optimization.cpp b/dlib/test/optimization.cpp index 2acc3485b..b47449abe 100644 --- a/dlib/test/optimization.cpp +++ b/dlib/test/optimization.cpp @@ -1182,6 +1182,28 @@ namespace off = 1.0; DLIB_TEST(std::abs( poly_min_extrap(off*off, -2*off, (1-off)*(1-off)) - off) < 1e-13); } + void test_solve_trust_region_subproblem_bounded() + { + print_spinner(); + matrix H(2,2); + H = 1, 0, + 0, 1; + matrix g, lower, upper, p, true_p; + g = {0, 0}; + + double radius = 0.5; + lower = {0.5, 0}; + upper = {10, 10}; + + + solve_trust_region_subproblem_bounded(H,g, radius, p, 0.001, 500, lower, upper); + true_p = { 0.5, 0}; + DLIB_TEST_MSG(length(p-true_p) < 1e-12, p); + + } + +// ---------------------------------------------------------------------------------------- + class optimization_tester : public tester { public: @@ -1200,6 +1222,7 @@ namespace test_box_constrained_optimizers(lbfgs_search_strategy(5)); test_poly_min_extract_2nd(); optimization_test(); + test_solve_trust_region_subproblem_bounded(); } } a;