diff --git a/dlib/matrix/matrix_la.h b/dlib/matrix/matrix_la.h index 2a59e11ea..50e5ec276 100644 --- a/dlib/matrix/matrix_la.h +++ b/dlib/matrix/matrix_la.h @@ -815,6 +815,224 @@ convergence: #endif } +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template + void find_matrix_range ( + const matrix& A, + unsigned long l, + matrix& Q, + unsigned long q = 0 + ) + /*! + requires + - A.nr() >= l + ensures + - #Q.nr() == A.nr() + - #Q.nc() == l + - #Q == an orthonormal matrix whose range approximates the range of the + matrix A. + - This function implements the randomized subspace iteration defined + in the algorithm 4.4 box of the paper: + Finding Structure with Randomness: Probabilistic Algorithms for + Constructing Approximate Matrix Decompositions by Halko et al. + - q defines the number of extra subspace iterations this algorithm will + perform. Often q == 0 is fine, but performing more iterations can lead to a + more accurate approximation of the range of A if A has slowly decaying + singular values. In these cases, using a q of 1 or 2 is good. + !*/ + { + DLIB_ASSERT(A.nr() >= (long)l, "Invalid inputs were given to this function."); + matrix G = matrix_cast(gaussian_randm(A.nc(), l)); + matrix Y = A*G; + + Q = qr_decomposition >(Y).get_q(); + + // Do some extra iterations of the power method to make sure we get Q into the + // span of the most important singular vectors of A. + if (q != 0) + { + matrix Z; + for (unsigned long itr = 0; itr < q; ++itr) + { + Z = trans(A)*Q; + Z = qr_decomposition >(Z).get_q(); + + Y = A*Z; + Q = qr_decomposition >(Y).get_q(); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template + void svd_fast ( + const matrix& A, + matrix& u, + matrix& w, + matrix& v, + unsigned long l, + unsigned long q = 0 + ) + { + const unsigned long k = std::min(l, std::min(A.nr(),A.nc())); + + DLIB_ASSERT(l > 0 && A.size() > 0, + "\t void svd_fast()" + << "\n\t Invalid inputs were given to this function." + << "\n\t l: " << l + << "\n\t A.size(): " << A.size() + ); + + matrix Q; + find_matrix_range(A, k, Q, q); + + // Compute trans(B) = trans(Q)*A. The reason we store B transposed + // is so that when we take its SVD later using svd3() it doesn't consume + // a whole lot of RAM. That is, we make sure the square matrix coming out + // of svd3() has size lxl rather than the potentially much larger nxn. + matrix B = trans(A)*Q; + svd3(B, v,w,u); + u = Q*u; + } + +// ---------------------------------------------------------------------------------------- + + template + void find_matrix_range ( + const std::vector& A, + unsigned long l, + matrix& Q, + unsigned long q = 0 + ) + /*! + requires + - A.size() >= l + ensures + - #Q.nr() == A.size() + - #Q.nc() == l + - #Q == an orthonormal matrix whose range approximates the range of the + matrix A. In this case, we interpret A as a matrix of A.size() rows, + where each row is defined by a sparse vector. + - This function implements the randomized subspace iteration defined + in the algorithm 4.4 box of the paper: + Finding Structure with Randomness: Probabilistic Algorithms for + Constructing Approximate Matrix Decompositions by Halko et al. + - q defines the number of extra subspace iterations this algorithm will + perform. Often q == 0 is fine, but performing more iterations can lead to a + more accurate approximation of the range of A if A has slowly decaying + singular values. In these cases, using a q of 1 or 2 is good. + !*/ + { + DLIB_ASSERT(A.size() >= l, "Invalid inputs were given to this function."); + matrix Y(A.size(), l); + + // Compute Y = A*gaussian_randm() + for (long r = 0; r < Y.nr(); ++r) + { + for (long c = 0; c < Y.nc(); ++c) + { + Y(r,c) = dot(A[r], gaussian_randm(std::numeric_limits::max(), 1, c)); + } + } + + Q = qr_decomposition >(Y).get_q(); + + // Do some extra iterations of the power method to make sure we get Q into the + // span of the most important singular vectors of A. + if (q != 0) + { + const unsigned long n = max_index_plus_one(A); + matrix Z(n, l); + for (unsigned long itr = 0; itr < q; ++itr) + { + // Compute Z = trans(A)*Q + Z = 0; + for (unsigned long m = 0; m < A.size(); ++m) + { + for (unsigned long r = 0; r < l; ++r) + { + typename sparse_vector_type::const_iterator i; + for (i = A[m].begin(); i != A[m].end(); ++i) + { + const unsigned long c = i->first; + const T val = i->second; + + Z(c,r) += Q(m,r)*val; + } + } + } + + Z = qr_decomposition >(Z).get_q(); + + // Compute Y = A*Z + for (long r = 0; r < Y.nr(); ++r) + { + for (long c = 0; c < Y.nc(); ++c) + { + Y(r,c) = dot(A[r], colm(Z,c)); + } + } + + Q = qr_decomposition >(Y).get_q(); + } + } + } + +// ---------------------------------------------------------------------------------------- + + template + void svd_fast ( + const std::vector& A, + matrix& u, + matrix& w, + matrix& v, + unsigned long l, + unsigned long q = 0 + ) + { + const long n = max_index_plus_one(A); + const unsigned long k = std::min(l, std::min(A.size(),n)); + + DLIB_ASSERT(l > 0 && A.size() > 0 && n > 0, + "\t void svd_fast()" + << "\n\t Invalid inputs were given to this function." + << "\n\t l: " << l + << "\n\t n (i.e. max_index_plus_one(A)): " << n + << "\n\t A.size(): " << A.size() + ); + + matrix Q; + find_matrix_range(A, k, Q, q); + + // Compute trans(B) = trans(Q)*A. The reason we store B transposed + // is so that when we take its SVD later using svd3() it doesn't consume + // a whole lot of RAM. That is, we make sure the square matrix coming out + // of svd3() has size lxl rather than the potentially much larger nxn. + matrix B(n,k); + B = 0; + for (unsigned long m = 0; m < A.size(); ++m) + { + for (unsigned long r = 0; r < k; ++r) + { + typename sparse_vector_type::const_iterator i; + for (i = A[m].begin(); i != A[m].end(); ++i) + { + const unsigned long c = i->first; + const T val = i->second; + + B(c,r) += Q(m,r)*val; + } + } + } + + svd3(B, v,w,u); + u = Q*u; + } + +// ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < diff --git a/dlib/matrix/matrix_la_abstract.h b/dlib/matrix/matrix_la_abstract.h index 4c6d51b4c..73624c66d 100644 --- a/dlib/matrix/matrix_la_abstract.h +++ b/dlib/matrix/matrix_la_abstract.h @@ -133,6 +133,108 @@ namespace dlib from LAPACK is used to compute the SVD. !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + void svd_fast ( + const matrix& A, + matrix& u, + matrix& w, + matrix& v, + unsigned long l, + unsigned long q = 0 + ); + /*! + requires + - l > 0 + - A.size() > 0 + (i.e. A can't be an empty matrix) + ensures + - computes the singular value decomposition of A. + - Lets define some constants we use to document the behavior of svd_fast(): + - Let m = A.nr() + - Let n = A.nc() + - Let k = min(l, min(m,n)) + - Therefore, A represents an m by n matrix and svd_fast() is designed + to find a rank-k representation of it. + - if (the rank of A is <= k) then + - A == #u*diagm(#w)*trans(#v) + - else + - A is approximated by #u*diagm(#w)*trans(#v) + (i.e. In this case A can't be represented with a rank-k matrix, so the + matrix you get by trying to reconstruct A from the output of the SVD is + not exactly the same.) + - trans(#u)*#u == identity matrix + - trans(#v)*#v == identity matrix + - #w == the top k singular values of the matrix A (in no particular order). + - #u.nr() == m + - #u.nc() == k + - #w.nr() == k + - #w.nc() == 1 + - #v.nr() == n + - #v.nc() == k + - This function implements the randomized subspace iteration defined in the + algorithm 4.4 and 5.1 boxes of the paper: + Finding Structure with Randomness: Probabilistic Algorithms for + Constructing Approximate Matrix Decompositions by Halko et al. + Therefore, it is very fast and suitable for use with very large matrices. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename sparse_vector_type, + typename T + > + void svd_fast ( + const std::vector& A, + matrix& u, + matrix& w, + matrix& v, + unsigned long l, + unsigned long q = 0 + ); + /*! + requires + - A contains a set of sparse vectors. See dlib/svm/sparse_vector_abstract.h + for a definition of what constitutes a sparse vector. + - l > 0 + - max_index_plus_one(A) > 0 + (i.e. A can't be an empty matrix) + ensures + - computes the singular value decomposition of A. In this case, we interpret A + as a matrix of A.size() rows, where each row is defined by a sparse vector. + - Lets define some constants we use to document the behavior of svd_fast(): + - Let m = A.size() + - Let n = max_index_plus_one(A) + - Let k = min(l, min(m,n)) + - Therefore, A represents an m by n matrix and svd_fast() is designed + to find a rank-k representation of it. + - if (the rank of A is <= k) then + - A == #u*diagm(#w)*trans(#v) + - else + - A is approximated by #u*diagm(#w)*trans(#v) + (i.e. In this case A can't be represented with a rank-k matrix, so the + matrix you get by trying to reconstruct A from the output of the SVD is + not exactly the same.) + - trans(#u)*#u == identity matrix + - trans(#v)*#v == identity matrix + - #w == the top k singular values of the matrix A (in no particular order). + - #u.nr() == m + - #u.nc() == k + - #w.nr() == k + - #w.nc() == 1 + - #v.nr() == n + - #v.nc() == k + - This function implements the randomized subspace iteration defined in the + algorithm 4.4 and 5.1 boxes of the paper: + Finding Structure with Randomness: Probabilistic Algorithms for + Constructing Approximate Matrix Decompositions by Halko et al. + Therefore, it is very fast and suitable for use with very large matrices. + !*/ + // ---------------------------------------------------------------------------------------- const matrix real_eigenvalues (