This commit is contained in:
Davis King 2016-03-08 15:19:43 -05:00
commit ad6a56e55c
7 changed files with 250 additions and 7 deletions

2
CMakeLists.txt Normal file
View File

@ -0,0 +1,2 @@
cmake_minimum_required(VERSION 2.8.4)
add_subdirectory(dlib)

View File

@ -188,6 +188,13 @@ if (NOT TARGET dlib)
stack_trace.cpp
)
set(dlib_needed_libraries)
if(UNIX)
set(CMAKE_THREAD_PREFER_PTHREAD ON)
find_package(Threads REQUIRED)
set(dlib_needed_libraries ${dlib_needed_libraries} ${CMAKE_THREAD_LIBS_INIT})
endif()
# we want to link to the right stuff depending on our platform.
if (WIN32 AND NOT CYGWIN) ###############################################################################
if (DLIB_NO_GUI_SUPPORT)
@ -196,9 +203,6 @@ if (NOT TARGET dlib)
set (dlib_needed_libraries ws2_32 winmm comctl32 gdi32 imm32)
endif()
elseif(APPLE) ############################################################################
find_library(pthreadlib pthread)
set (dlib_needed_libraries ${pthreadlib})
if (NOT DLIB_NO_GUI_SUPPORT)
find_package(X11 QUIET)
if (X11_FOUND)
@ -235,9 +239,6 @@ if (NOT TARGET dlib)
mark_as_advanced(pthreadlib xlib xlib_path x11_path)
else () ##################################################################################
find_library(pthreadlib pthread)
set (dlib_needed_libraries ${pthreadlib})
# link to the nsl library if it exists. this is something you need sometimes
find_library(nsllib nsl)
if (nsllib)
@ -575,7 +576,7 @@ if (NOT TARGET dlib)
ARCHIVE DESTINATION lib)
endif()
install(DIRECTORY ${CMAKE_SOURCE_DIR}/ DESTINATION include/dlib
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION include/dlib
FILES_MATCHING PATTERN "*.h"
REGEX "${CMAKE_CURRENT_BINARY_DIR}" EXCLUDE)

View File

@ -404,6 +404,153 @@ namespace dlib
return iter+1;
}
// ----------------------------------------------------------------------------------------
template <
typename EXP1,
typename EXP2,
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp_box_constrained (
const matrix_exp<EXP1>& _Q,
const matrix_exp<EXP2>& _b,
matrix<T,NR,NC,MM,L>& alpha,
matrix<T,NR,NC,MM,L>& lower,
matrix<T,NR,NC,MM,L>& upper,
T eps,
unsigned long max_iter
)
{
const_temp_matrix<EXP1> Q(_Q);
const_temp_matrix<EXP2> b(_b);
// make sure requires clause is not broken
DLIB_ASSERT(Q.nr() == Q.nc() &&
alpha.size() == lower.size() &&
alpha.size() == upper.size() &&
is_col_vector(b) &&
is_col_vector(alpha) &&
is_col_vector(lower) &&
is_col_vector(upper) &&
b.size() == alpha.size() &&
b.size() == Q.nr() &&
alpha.size() > 0 &&
0 <= min(alpha-lower) &&
0 <= max(upper-alpha) &&
eps > 0 &&
max_iter > 0,
"\t unsigned long solve_qp_box_constrained()"
<< "\n\t Invalid arguments were given to this function"
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(b): " << is_col_vector(b)
<< "\n\t is_col_vector(alpha): " << is_col_vector(alpha)
<< "\n\t is_col_vector(lower): " << is_col_vector(lower)
<< "\n\t is_col_vector(upper): " << is_col_vector(upper)
<< "\n\t b.size(): " << b.size()
<< "\n\t alpha.size(): " << alpha.size()
<< "\n\t lower.size(): " << lower.size()
<< "\n\t upper.size(): " << upper.size()
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t min(alpha-lower): " << min(alpha-lower)
<< "\n\t max(upper-alpha): " << max(upper-alpha)
<< "\n\t eps: " << eps
<< "\n\t max_iter: " << max_iter
);
// Compute f'(alpha) (i.e. the gradient of f(alpha)) for the current alpha.
matrix<T,NR,NC,MM,L> df = Q*alpha + b;
matrix<T,NR,NC,MM,L> QQ = reciprocal_max(diag(Q));
// First we use a coordinate descent method to initialize alpha.
double max_df = 0;
for (long iter = 0; iter < alpha.size()*2; ++iter)
{
max_df = 0;
long best_r =0;
// find the best alpha to optimize.
for (long r = 0; r < Q.nr(); ++r)
{
if (alpha(r) <= lower(r) && df(r) > 0)
;//alpha(r) = lower(r);
else if (alpha(r) >= upper(r) && df(r) < 0)
;//alpha(r) = upper(r);
else if (std::abs(df(r)) > max_df)
{
best_r = r;
max_df = std::abs(df(r));
}
}
// now optimize alpha(best_r)
const long r = best_r;
const T old_alpha = alpha(r);
alpha(r) = -(df(r)-Q(r,r)*alpha(r))*QQ(r);
if (alpha(r) < lower(r))
alpha(r) = lower(r);
else if (alpha(r) > upper(r))
alpha(r) = upper(r);
const T delta = old_alpha-alpha(r);
// Now update the gradient. We will perform the equivalent of: df = Q*alpha + b;
for(long k = 0; k < df.nr(); ++k)
df(k) -= Q(r,k)*delta;
}
//cout << "max_df: " << max_df << endl;
//cout << "objective value: " << 0.5*trans(alpha)*Q*alpha + trans(b)*alpha << endl;
// Now do the main iteration block of this solver. The coordinate descent method
// we used above can improve the objective rapidly in the beginning. However,
// Nesterov's method has more rapid convergence once it gets going so this is what
// we use for the main iteration.
matrix<T,NR,NC,MM,L> v, v_old;
v = alpha;
// We need to get an upper bound on the Lipschitz constant for this QP. Since that
// is just the max eigenvalue of Q we can do it using Gershgorin disks.
const T lipschitz_bound = max(diag(Q) + (sum_cols(abs(Q)) - abs(diag(Q))));
double lambda = 0;
unsigned long iter;
for (iter = 0; iter < max_iter; ++iter)
{
const double next_lambda = (1 + std::sqrt(1+4*lambda*lambda))/2;
const double gamma = (1-lambda)/next_lambda;
lambda = next_lambda;
v_old = v;
df = Q*alpha + b;
// now take a projected gradient step using Nesterov's method.
v = clamp(alpha - 1.0/lipschitz_bound * df, lower, upper);
alpha = clamp((1-gamma)*v + gamma*v_old, lower, upper);
// check for convergence every 10 iterations
if (iter%10 == 0)
{
max_df = 0;
for (long r = 0; r < Q.nr(); ++r)
{
if (alpha(r) <= lower(r) && df(r) > 0)
;//alpha(r) = lower(r);
else if (alpha(r) >= upper(r) && df(r) < 0)
;//alpha(r) = upper(r);
else if (std::abs(df(r)) > max_df)
max_df = std::abs(df(r));
}
if (max_df < eps)
break;
}
}
//cout << "max_df: " << max_df << endl;
//cout << "objective value: " << 0.5*trans(alpha)*Q*alpha + trans(b)*alpha << endl;
return iter+1;
}
// ----------------------------------------------------------------------------------------
}

View File

@ -113,6 +113,55 @@ namespace dlib
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
template <
typename EXP1,
typename EXP2,
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp_box_constrained (
const matrix_exp<EXP1>& Q,
const matrix_exp<EXP2>& b,
matrix<T,NR,NC,MM,L>& alpha,
matrix<T,NR,NC,MM,L>& lower,
matrix<T,NR,NC,MM,L>& upper,
T eps,
unsigned long max_iter
);
/*!
requires
- Q.nr() == Q.nc()
- alpha.size() == lower.size() == upper.size()
- is_col_vector(b) == true
- is_col_vector(alpha) == true
- is_col_vector(lower) == true
- is_col_vector(upper) == true
- b.size() == alpha.size() == Q.nr()
- alpha.size() > 0
- 0 <= min(alpha-lower)
- 0 <= max(upper-alpha)
- eps > 0
- max_iter > 0
ensures
- This function solves the following quadratic program:
Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha + trans(b)*alpha
subject to the following box constraints on alpha:
- 0 <= min(alpha-lower)
- 0 <= max(upper-alpha)
Where f is convex. This means that Q should be positive-semidefinite.
- The solution to the above QP will be stored in #alpha.
- This function uses a combination of a SMO algorithm along with Nesterov's
method as the main iteration of the solver. It starts the algorithm with the
given alpha and it works on the problem until the derivative of f(alpha) is
smaller than eps for each element of alpha or the alpha value is at a box
constraint. So eps controls how accurate the solution is and smaller values
result in better solutions.
- At most max_iter iterations of optimization will be performed.
- returns the number of iterations performed. If this method fails to
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
}

View File

@ -6,6 +6,7 @@
#include <sstream>
#include <dlib/control.h>
#include <dlib/optimization.h>
#include "tester.h"
namespace
@ -314,6 +315,27 @@ namespace
initial_state = A*initial_state + B*control + C;
//cout << control(0) << "\t" << trans(initial_state);
}
{
// also just generally test our QP solver.
matrix<double,20,20> Q = gaussian_randm(20,20,5);
Q = Q*trans(Q);
matrix<double,20,1> b = randm(20,1)-0.5;
matrix<double,20,1> alpha, lower, upper, alpha2;
alpha = 0;
alpha2 = 0;
lower = -4;
upper = 3;
solve_qp_box_using_smo(Q,b,alpha,lower, upper, 0.000000001, 500000);
solve_qp_box_constrained(Q,b,alpha2,lower, upper, 0.000000001, 50000);
dlog << LINFO << trans(alpha);
dlog << LINFO << trans(alpha2);
dlog << LINFO << "objective value: " << 0.5*trans(alpha)*Q*alpha + trans(b)*alpha;
dlog << LINFO << "objective value2: " << 0.5*trans(alpha2)*Q*alpha + trans(b)*alpha2;
DLIB_TEST_MSG(max(abs(alpha-alpha2)) < 1e-7, max(abs(alpha-alpha2)));
}
}
} a;

View File

@ -41,6 +41,7 @@
<section>
<name>Special Purpose Optimizers</name>
<item>solve_qp_box_constrained</item>
<item>solve_qp_using_smo</item>
<item>solve_qp2_using_smo</item>
<item>solve_qp3_using_smo</item>
@ -429,6 +430,26 @@ subject to the following constraint:
</component>
<!-- ************************************************************************* -->
<component>
<name>solve_qp_box_constrained</name>
<file>dlib/optimization.h</file>
<spec_file link="true">dlib/optimization/optimization_solve_qp_using_smo_abstract.h</spec_file>
<description>
This function solves the following quadratic program:
<pre>
Minimize: f(alpha) == 0.5*trans(alpha)*Q*alpha + trans(b)*alpha
subject to the following box constraints on alpha:
0 &lt;= min(alpha-lower)
0 &lt;= max(upper-alpha)
Where f is convex. This means that Q should be positive-semidefinite.
</pre>
</description>
</component>
<!-- ************************************************************************* -->
<component>

View File

@ -153,6 +153,7 @@
<term file="optimization.html" name="find_min_single_variable" include="dlib/optimization.h"/>
<term file="optimization.html" name="find_min_using_approximate_derivatives" include="dlib/optimization.h"/>
<term file="optimization.html" name="find_min_bobyqa" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp_box_constrained" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp_using_smo" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp2_using_smo" include="dlib/optimization.h"/>
<term file="optimization.html" name="solve_qp3_using_smo" include="dlib/optimization.h"/>