Added a bunch of unit tests for the various forms of structured svm.

This commit is contained in:
Davis King 2011-05-20 20:40:59 -04:00
parent 8c03fbd6da
commit 8904db50cc
3 changed files with 619 additions and 0 deletions

View File

@ -101,6 +101,7 @@ set (tests
svm_c_linear.cpp
svm.cpp
svm_multiclass_linear.cpp
svm_struct.cpp
symmetric_matrix_cache.cpp
thread_pool.cpp
threads.cpp

View File

@ -111,6 +111,7 @@ SRC += string.cpp
SRC += svm_c_linear.cpp
SRC += svm.cpp
SRC += svm_multiclass_linear.cpp
SRC += svm_struct.cpp
SRC += symmetric_matrix_cache.cpp
SRC += thread_pool.cpp
SRC += threads.cpp

617
dlib/test/svm_struct.cpp Normal file
View File

@ -0,0 +1,617 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <sstream>
#include <string>
#include <cstdlib>
#include <ctime>
#include <dlib/svm_threaded.h>
#include "tester.h"
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
logger dlog("test.svm_struct");
template <
typename matrix_type,
typename sample_type,
typename label_type
>
class test_multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type,
std::vector<std::pair<unsigned long,typename matrix_type::type> > >
{
public:
typedef typename matrix_type::type scalar_type;
typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type;
test_multiclass_svm_problem (
const std::vector<sample_type>& samples_,
const std::vector<label_type>& labels_
) :
structural_svm_problem_threaded<matrix_type,
std::vector<std::pair<unsigned long,typename matrix_type::type> > >(2),
samples(samples_),
labels(labels_),
dims(10+1) // +1 for the bias
{
for (int i = 0; i < 10; ++i)
{
distinct_labels.push_back(i);
}
}
virtual long get_num_dimensions (
) const
{
return dims*10;
}
virtual long get_num_samples (
) const
{
return static_cast<long>(samples.size());
}
virtual void get_truth_joint_feature_vector (
long idx,
feature_vector_type& psi
) const
{
sparse_vector::assign(psi, samples[idx]);
// Add a constant -1 to account for the bias term.
psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
// Find which distinct label goes with this psi.
const long label_idx = index_of_max(vector_to_matrix(distinct_labels) == labels[idx]);
offset_feature_vector(psi, dims*label_idx);
}
virtual void separation_oracle (
const long idx,
const matrix_type& current_solution,
scalar_type& loss,
feature_vector_type& psi
) const
{
scalar_type best_val = -std::numeric_limits<scalar_type>::infinity();
unsigned long best_idx = 0;
// Figure out which label is the best. That is, what label maximizes
// LOSS(idx,y) + F(x,y). Note that y in this case is given by distinct_labels[i].
for (unsigned long i = 0; i < distinct_labels.size(); ++i)
{
using dlib::sparse_vector::dot;
using dlib::dot;
// Compute the F(x,y) part:
// perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias
scalar_type temp = dot(rowm(current_solution, range(i*dims, (i+1)*dims-2)), samples[idx]) - current_solution((i+1)*dims-1);
// Add the LOSS(idx,y) part:
if (labels[idx] != distinct_labels[i])
temp += 1;
// Now temp == LOSS(idx,y) + F(x,y). Check if it is the biggest we have seen.
if (temp > best_val)
{
best_val = temp;
best_idx = i;
}
}
sparse_vector::assign(psi, samples[idx]);
// add a constant -1 to account for the bias term
psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
offset_feature_vector(psi, dims*best_idx);
if (distinct_labels[best_idx] == labels[idx])
loss = 0;
else
loss = 1;
}
private:
void offset_feature_vector (
feature_vector_type& sample,
const unsigned long val
) const
{
if (val != 0)
{
for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i)
{
i->first += val;
}
}
}
const std::vector<sample_type>& samples;
const std::vector<label_type>& labels;
std::vector<label_type> distinct_labels;
const long dims;
};
// ----------------------------------------------------------------------------------------
template <
typename K,
typename label_type_ = typename K::scalar_type
>
class test_svm_multiclass_linear_trainer2
{
public:
typedef label_type_ label_type;
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
test_svm_multiclass_linear_trainer2 (
) :
C(10),
eps(1e-4),
verbose(false)
{
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels
) const
{
scalar_type svm_objective = 0;
return train(all_samples, all_labels, svm_objective);
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels,
scalar_type& svm_objective
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
"\t trained_function_type test_svm_multiclass_linear_trainer2::train(all_samples,all_labels)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t all_samples.size(): " << all_samples.size()
<< "\n\t all_labels.size(): " << all_labels.size()
);
typedef matrix<scalar_type,0,1> w_type;
w_type weights;
std::vector<sample_type> samples1(all_samples.begin(), all_samples.begin()+all_samples.size()/2);
std::vector<sample_type> samples2(all_samples.begin()+all_samples.size()/2, all_samples.end());
std::vector<label_type> labels1(all_labels.begin(), all_labels.begin()+all_labels.size()/2);
std::vector<label_type> labels2(all_labels.begin()+all_labels.size()/2, all_labels.end());
test_multiclass_svm_problem<w_type, sample_type, label_type> problem1(samples1, labels1);
test_multiclass_svm_problem<w_type, sample_type, label_type> problem2(samples2, labels2);
problem1.set_max_cache_size(3);
problem2.set_max_cache_size(0);
svm_struct_processing_node node1(problem1, 12345, 3);
svm_struct_processing_node node2(problem2, 12346, 0);
solver.set_inactive_plane_threshold(50);
solver.set_subproblem_epsilon(1e-4);
svm_struct_controller_node controller;
controller.set_c(C);
controller.set_epsilon(eps);
if (verbose)
controller.be_verbose();
controller.add_processing_node("127.0.0.1", 12345);
controller.add_processing_node("127.0.0.1", 12346);
svm_objective = controller(solver, weights);
trained_function_type df;
const long dims = sparse_vector::max_index_plus_one(all_samples);
df.labels = select_all_distinct_labels(all_labels);
df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
df.b = colm(reshape(weights, df.labels.size(), dims+1), dims);
return df;
}
private:
scalar_type C;
scalar_type eps;
bool verbose;
mutable oca solver;
};
// ----------------------------------------------------------------------------------------
template <
typename K,
typename label_type_ = typename K::scalar_type
>
class test_svm_multiclass_linear_trainer3
{
public:
typedef label_type_ label_type;
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
test_svm_multiclass_linear_trainer3 (
) :
C(10),
eps(1e-4),
verbose(false)
{
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels
) const
{
scalar_type svm_objective = 0;
return train(all_samples, all_labels, svm_objective);
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels,
scalar_type& svm_objective
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
"\t trained_function_type test_svm_multiclass_linear_trainer3::train(all_samples,all_labels)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t all_samples.size(): " << all_samples.size()
<< "\n\t all_labels.size(): " << all_labels.size()
);
typedef matrix<scalar_type,0,1> w_type;
w_type weights;
test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels);
problem.set_max_cache_size(0);
problem.set_c(C);
problem.set_epsilon(eps);
if (verbose)
problem.be_verbose();
solver.set_inactive_plane_threshold(50);
solver.set_subproblem_epsilon(1e-4);
svm_objective = solver(problem, weights);
trained_function_type df;
const long dims = sparse_vector::max_index_plus_one(all_samples);
df.labels = select_all_distinct_labels(all_labels);
df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
df.b = colm(reshape(weights, df.labels.size(), dims+1), dims);
return df;
}
private:
scalar_type C;
scalar_type eps;
bool verbose;
mutable oca solver;
};
// ----------------------------------------------------------------------------------------
template <
typename K,
typename label_type_ = typename K::scalar_type
>
class test_svm_multiclass_linear_trainer4
{
public:
typedef label_type_ label_type;
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
test_svm_multiclass_linear_trainer4 (
) :
C(10),
eps(1e-4),
verbose(false)
{
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels
) const
{
scalar_type svm_objective = 0;
return train(all_samples, all_labels, svm_objective);
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels,
scalar_type& svm_objective
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
"\t trained_function_type test_svm_multiclass_linear_trainer4::train(all_samples,all_labels)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t all_samples.size(): " << all_samples.size()
<< "\n\t all_labels.size(): " << all_labels.size()
);
typedef matrix<scalar_type,0,1> w_type;
w_type weights;
test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels);
problem.set_max_cache_size(3);
problem.set_c(C);
problem.set_epsilon(eps);
if (verbose)
problem.be_verbose();
solver.set_inactive_plane_threshold(50);
solver.set_subproblem_epsilon(1e-4);
svm_objective = solver(problem, weights);
trained_function_type df;
const long dims = sparse_vector::max_index_plus_one(all_samples);
df.labels = select_all_distinct_labels(all_labels);
df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
df.b = colm(reshape(weights, df.labels.size(), dims+1), dims);
return df;
}
private:
scalar_type C;
scalar_type eps;
bool verbose;
mutable oca solver;
};
// ----------------------------------------------------------------------------------------
template <
typename K,
typename label_type_ = typename K::scalar_type
>
class test_svm_multiclass_linear_trainer5
{
public:
typedef label_type_ label_type;
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
test_svm_multiclass_linear_trainer5 (
) :
C(10),
eps(1e-4),
verbose(false)
{
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels
) const
{
scalar_type svm_objective = 0;
return train(all_samples, all_labels, svm_objective);
}
trained_function_type train (
const std::vector<sample_type>& all_samples,
const std::vector<label_type>& all_labels,
scalar_type& svm_objective
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
"\t trained_function_type test_svm_multiclass_linear_trainer5::train(all_samples,all_labels)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t all_samples.size(): " << all_samples.size()
<< "\n\t all_labels.size(): " << all_labels.size()
);
typedef matrix<scalar_type,0,1> w_type;
w_type weights;
multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels);
problem.set_max_cache_size(3);
problem.set_c(C);
problem.set_epsilon(eps);
if (verbose)
problem.be_verbose();
solver.set_inactive_plane_threshold(50);
solver.set_subproblem_epsilon(1e-4);
svm_objective = solver(problem, weights);
trained_function_type df;
const long dims = sparse_vector::max_index_plus_one(all_samples);
df.labels = select_all_distinct_labels(all_labels);
df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
df.b = colm(reshape(weights, df.labels.size(), dims+1), dims);
return df;
}
private:
scalar_type C;
scalar_type eps;
bool verbose;
mutable oca solver;
};
// ----------------------------------------------------------------------------------------
typedef matrix<double,10,1> sample_type;
typedef double scalar_type;
void make_dataset (
std::vector<sample_type>& samples,
std::vector<scalar_type>& labels
)
{
samples.clear();
labels.clear();
dlib::rand rnd;
for (int i = 0; i < 10; ++i)
{
for (int j = 0; j < 100; ++j)
{
sample_type samp;
samp = 0;
samp(i) = 10*rnd.get_random_double()+1;
samples.push_back(samp);
labels.push_back(i);
}
}
}
// ----------------------------------------------------------------------------------------
class test_svm_struct : public tester
{
public:
test_svm_struct (
) :
tester ("test_svm_struct",
"Runs tests on the structural svm components.")
{}
void perform_test (
)
{
typedef linear_kernel<sample_type> kernel_type;
svm_multiclass_linear_trainer<kernel_type> trainer1;
test_svm_multiclass_linear_trainer2<kernel_type> trainer2;
test_svm_multiclass_linear_trainer3<kernel_type> trainer3;
test_svm_multiclass_linear_trainer4<kernel_type> trainer4;
test_svm_multiclass_linear_trainer5<kernel_type> trainer5;
trainer1.set_epsilon(1e-4);
trainer1.set_c(10);
std::vector<sample_type> samples;
std::vector<scalar_type> labels;
make_dataset(samples, labels);
multiclass_linear_decision_function<kernel_type,double> df1, df2, df3, df4, df5;
double obj1, obj2, obj3, obj4, obj5;
// Solve a multiclass SVM a whole bunch of different ways and make sure
// they all give the same answer.
print_spinner();
df1 = trainer1.train(samples, labels, obj1);
print_spinner();
df2 = trainer2.train(samples, labels, obj2);
print_spinner();
df3 = trainer3.train(samples, labels, obj3);
print_spinner();
df4 = trainer4.train(samples, labels, obj4);
print_spinner();
df5 = trainer5.train(samples, labels, obj5);
print_spinner();
dlog << LINFO << "obj1: "<< obj1;
dlog << LINFO << "obj2: "<< obj2;
dlog << LINFO << "obj3: "<< obj3;
dlog << LINFO << "obj4: "<< obj4;
dlog << LINFO << "obj5: "<< obj5;
DLIB_TEST(std::abs(obj1 - obj2) < 1e-2);
DLIB_TEST(std::abs(obj1 - obj3) < 1e-2);
DLIB_TEST(std::abs(obj1 - obj4) < 1e-2);
DLIB_TEST(std::abs(obj1 - obj5) < 1e-2);
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df2.weights));
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df3.weights));
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df4.weights));
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df5.weights));
DLIB_TEST(max(abs(df1.weights - df2.weights)) < 1e-2);
DLIB_TEST(max(abs(df1.weights - df3.weights)) < 1e-2);
DLIB_TEST(max(abs(df1.weights - df4.weights)) < 1e-2);
DLIB_TEST(max(abs(df1.weights - df5.weights)) < 1e-2);
dlog << LINFO << "b error: "<< max(abs(df1.b - df2.b));
dlog << LINFO << "b error: "<< max(abs(df1.b - df3.b));
dlog << LINFO << "b error: "<< max(abs(df1.b - df4.b));
dlog << LINFO << "b error: "<< max(abs(df1.b - df5.b));
DLIB_TEST(max(abs(df1.b - df2.b)) < 1e-2);
DLIB_TEST(max(abs(df1.b - df3.b)) < 1e-2);
DLIB_TEST(max(abs(df1.b - df4.b)) < 1e-2);
DLIB_TEST(max(abs(df1.b - df5.b)) < 1e-2);
matrix<double> res = test_multiclass_decision_function(df1, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
res = test_multiclass_decision_function(df2, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
res = test_multiclass_decision_function(df3, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
res = test_multiclass_decision_function(df4, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
res = test_multiclass_decision_function(df5, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
}
} a;
}