Made the running covariance objects work with sparse vectors.

This commit is contained in:
Davis King 2013-06-15 15:20:03 -04:00
parent ca6788677c
commit 1d86feaab1
3 changed files with 331 additions and 26 deletions

View File

@ -8,6 +8,7 @@
#include <cmath>
#include "../algs.h"
#include "../matrix.h"
#include "../sparse_vector.h"
namespace dlib
{
@ -646,9 +647,55 @@ namespace dlib
return static_cast<long>(total_count);
}
template <typename EXP>
void add (
const matrix_exp<EXP>& val
void set_dimension (
long size
)
{
// make sure requires clause is not broken
DLIB_ASSERT( size > 0,
"\t void running_covariance::set_dimension()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t size: " << size
<< "\n\t this: " << this
);
clear();
vect_size = size;
total_sum.set_size(size);
total_cov.set_size(size,size);
total_sum = 0;
total_cov = 0;
}
template <typename T>
typename disable_if<is_matrix<T> >::type add (
const T& val
)
{
// make sure requires clause is not broken
DLIB_ASSERT(((long)max_index_plus_one(val) <= in_vector_size() && in_vector_size() > 0),
"\t void running_covariance::add()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t max_index_plus_one(val): " << max_index_plus_one(val)
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
for (typename T::const_iterator i = val.begin(); i != val.end(); ++i)
{
total_sum(i->first) += i->second;
for (typename T::const_iterator j = val.begin(); j != val.end(); ++j)
{
total_cov(i->first, j->first) += i->second*j->second;
}
}
++total_count;
}
template <typename T>
typename enable_if<is_matrix<T> >::type add (
const T& val
)
{
// make sure requires clause is not broken
@ -810,16 +857,149 @@ namespace dlib
return y_vect_size;
}
void set_dimensions (
long x_size,
long y_size
)
{
// make sure requires clause is not broken
DLIB_ASSERT( x_size > 0 && y_size > 0,
"\t void running_cross_covariance::set_dimensions()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t x_size: " << x_size
<< "\n\t y_size: " << y_size
<< "\n\t this: " << this
);
clear();
x_vect_size = x_size;
y_vect_size = y_size;
sum_x.set_size(x_size);
sum_y.set_size(y_size);
total_cov.set_size(x_size,y_size);
sum_x = 0;
sum_y = 0;
total_cov = 0;
}
long current_n (
) const
{
return static_cast<long>(total_count);
}
template <typename EXP>
void add (
const matrix_exp<EXP>& x,
const matrix_exp<EXP>& y
template <typename T, typename U>
typename enable_if_c<!is_matrix<T>::value && !is_matrix<U>::value>::type add (
const T& x,
const U& y
)
{
// make sure requires clause is not broken
DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) &&
((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) ,
"\t void running_cross_covariance::add()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t max_index_plus_one(x): " << max_index_plus_one(x)
<< "\n\t max_index_plus_one(y): " << max_index_plus_one(y)
<< "\n\t x_vector_size(): " << x_vector_size()
<< "\n\t y_vector_size(): " << y_vector_size()
<< "\n\t this: " << this
);
for (typename T::const_iterator i = x.begin(); i != x.end(); ++i)
{
sum_x(i->first) += i->second;
for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
{
total_cov(i->first, j->first) += i->second*j->second;
}
}
// do sum_y += y
for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
{
sum_y(j->first) += j->second;
}
++total_count;
}
template <typename T, typename U>
typename enable_if_c<is_matrix<T>::value && !is_matrix<U>::value>::type add (
const T& x,
const U& y
)
{
// make sure requires clause is not broken
DLIB_ASSERT( (is_col_vector(x) && x.size() == x_vector_size() && x_vector_size() > 0) &&
((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) ,
"\t void running_cross_covariance::add()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t is_col_vector(x): " << is_col_vector(x)
<< "\n\t x.size(): " << x.size()
<< "\n\t max_index_plus_one(y): " << max_index_plus_one(y)
<< "\n\t x_vector_size(): " << x_vector_size()
<< "\n\t y_vector_size(): " << y_vector_size()
<< "\n\t this: " << this
);
sum_x += x;
for (long i = 0; i < x.size(); ++i)
{
for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
{
total_cov(i, j->first) += x(i)*j->second;
}
}
// do sum_y += y
for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
{
sum_y(j->first) += j->second;
}
++total_count;
}
template <typename T, typename U>
typename enable_if_c<!is_matrix<T>::value && is_matrix<U>::value>::type add (
const T& x,
const U& y
)
{
// make sure requires clause is not broken
DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) &&
(is_col_vector(y) && y.size() == (long)y_vector_size() && y_vector_size() > 0) ,
"\t void running_cross_covariance::add()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t max_index_plus_one(x): " << max_index_plus_one(x)
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t y.size(): " << y.size()
<< "\n\t x_vector_size(): " << x_vector_size()
<< "\n\t y_vector_size(): " << y_vector_size()
<< "\n\t this: " << this
);
for (typename T::const_iterator i = x.begin(); i != x.end(); ++i)
{
sum_x(i->first) += i->second;
for (long j = 0; j < y.size(); ++j)
{
total_cov(i->first, j) += i->second*y(j);
}
}
sum_y += y;
++total_count;
}
template <typename T, typename U>
typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value>::type add (
const T& x,
const U& y
)
{
// make sure requires clause is not broken

View File

@ -6,6 +6,7 @@
#include <limits>
#include <cmath>
#include "../matrix/matrix_abstract.h"
#include "../svm/sparse_vector_abstract.h"
namespace dlib
{
@ -472,18 +473,41 @@ namespace dlib
- returns 0
!*/
void add (
const matrix_exp& val
void set_dimension (
long size
);
/*!
requires
- is_col_vector(val) == true
- if (in_vector_size() != 0) then
- val.size() == in_vector_size()
- size > 0
ensures
- #in_vector_size() == size
- #current_n() == 0
!*/
template <typename T>
void add (
const T& val
);
/*!
requires
- val must represent a column vector. It can either be a dlib::matrix
object or some kind of unsorted sparse vector type. See the top of
dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector.
- val must have a number of dimensions which is compatible with the current
setting of in_vector_size(). In particular, this means that the
following must hold:
- if (val is a dlib::matrix) then
- in_vector_size() == 0 || val.size() == val_vector_size()
- else
- max_index_plus_one(val) <= in_vector_size()
- in_vector_size() > 0
(i.e. you must call set_dimension() prior to calling add() if
you want to use sparse vectors.)
ensures
- updates the mean and covariance stored in this object so that
the new value is factored into them.
- #in_vector_size() == val.size()
- if (val is a dlib::matrix) then
- #in_vector_size() == val.size()
!*/
const column_matrix mean (
@ -586,6 +610,20 @@ namespace dlib
- returns 0
!*/
void set_dimensions (
long x_size,
long y_size
);
/*!
requires
- x_size > 0
- y_size > 0
ensures
- #x_vector_size() == x_size
- #y_vector_size() == y_size
- #current_n() == 0
!*/
long current_n (
) const;
/*!
@ -593,26 +631,38 @@ namespace dlib
- returns the number of samples that have been presented to this object.
!*/
template <typename EXP>
template <typename T, typename U>
void add (
const matrix_exp<EXP>& x,
const matrix_exp<EXP>& y
const T& x,
const U& y
);
/*!
requires
- is_col_vector(x) == true
- is_col_vector(y) == true
- x.size() != 0
- y.size() != 0
- if (x_vector_size() != 0) then
- x.size() == x_vector_size()
- if (y_vector_size() != 0) then
- y.size() == y_vector_size()
- x and y must represent column vectors. They can either be dlib::matrix
objects or some kind of unsorted sparse vector type. See the top of
dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector.
- x and y must have a number of dimensions which is compatible with the
current setting of x_vector_size() and y_vector_size(). In particular,
this means that the following must hold:
- if (x or y is a sparse vector type) then
- x_vector_size() > 0 && y_vector_size() > 0
(i.e. you must call set_dimensions() prior to calling add() if
you want to use sparse vectors.)
- if (x is a dlib::matrix) then
- x_vector_size() == 0 || x.size() == x_vector_size()
- else
- max_index_plus_one(x) <= x_vector_size()
- if (y is a dlib::matrix) then
- y_vector_size() == 0 || y.size() == y_vector_size()
- else
- max_index_plus_one(y) <= y_vector_size()
ensures
- updates the mean and cross-covariance matrices stored in this object so
that the new (x,y) vector pair is factored into them.
- #x_vector_size() == x.size()
- #y_vector_size() == y.size()
- if (x is a dlib::matrix) then
- #x_vector_size() == x.size()
- if (y is a dlib::matrix) then
- #y_vector_size() == y.size()
!*/
const column_matrix mean_x (

View File

@ -171,6 +171,80 @@ namespace
DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14);
}
std::map<unsigned long,double> dense_to_sparse (
const matrix<double,0,1>& x
)
{
std::map<unsigned long,double> temp;
for (long i = 0; i < x.size(); ++i)
temp[i] = x(i);
return temp;
}
void test_running_cross_covariance_sparse()
{
running_cross_covariance<matrix<double> > rcc1, rcc2;
running_covariance<matrix<double> > rc1, rc2;
matrix<double,0,1> xm, ym;
const int num = 40;
rc1.set_dimension(4);
rc2.set_dimension(4);
rcc1.set_dimensions(4,5);
rcc2.set_dimensions(4,5);
dlib::rand rnd;
for (int i = 0; i < num; ++i)
{
matrix<double,0,1> x = randm(4,1,rnd);
matrix<double,0,1> y = randm(5,1,rnd);
xm += x/num;
ym += y/num;
if (i < 15)
{
rcc1.add(x,dense_to_sparse(y));
rc1.add(x);
}
else if (i < 30)
{
rcc2.add(dense_to_sparse(x),y);
rc2.add(dense_to_sparse(x));
}
else
{
rcc2.add(dense_to_sparse(x),dense_to_sparse(y));
rc2.add(x);
}
}
rnd.clear();
matrix<double> cov, cov2;
for (int i = 0; i < num; ++i)
{
matrix<double,0,1> x = randm(4,1,rnd);
matrix<double,0,1> y = randm(5,1,rnd);
cov += (x-xm)*trans(y-ym);
cov2 += (x-xm)*trans(x-xm);
}
cov /= num-1;
cov2 /= num-1;
running_cross_covariance<matrix<double> > rcc = rcc1 + rcc2;
cout << rcc.covariance_xy()-cov << endl;
DLIB_TEST_MSG(max(abs(rcc.covariance_xy()-cov)) < 1e-14, max(abs(rcc.covariance_xy()-cov)));
DLIB_TEST(max(abs(rcc.mean_x()-xm)) < 1e-14);
DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14);
running_covariance<matrix<double> > rc = rc1 + rc2;
DLIB_TEST(max(abs(rc.covariance()-cov2)) < 1e-14);
DLIB_TEST(max(abs(rc.mean()-xm)) < 1e-14);
}
void test_running_covariance (
)
{
@ -498,6 +572,7 @@ namespace
test_random_subset_selector2();
test_running_covariance();
test_running_cross_covariance();
test_running_cross_covariance_sparse();
test_running_stats();
test_skewness_and_kurtosis_1();
test_skewness_and_kurtosis_2();