From 54fded51bf93cbb5018c15f58172eb2ef1e0ff8f Mon Sep 17 00:00:00 2001 From: Davis King Date: Fri, 3 Jun 2011 22:15:14 -0400 Subject: [PATCH] Added an example showing how to create custom kernels. --HG-- rename : examples/krr_classification_ex.cpp => examples/using_custom_kernels_ex.cpp --- examples/CMakeLists.txt | 1 + examples/using_custom_kernels_ex.cpp | 207 +++++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 examples/using_custom_kernels_ex.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 77cfeb32c..07a13d2ca 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -91,4 +91,5 @@ add_example(thread_function_ex) add_example(thread_pool_ex) add_example(threads_ex) add_example(timer_ex) +add_example(using_custom_kernels_ex) add_example(xml_parser_ex) diff --git a/examples/using_custom_kernels_ex.cpp b/examples/using_custom_kernels_ex.cpp new file mode 100644 index 000000000..2e1b97ec9 --- /dev/null +++ b/examples/using_custom_kernels_ex.cpp @@ -0,0 +1,207 @@ +// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +/* + + This is an example showing how to defined custom kernel functions for use with + the machine learning tools in the dlib C++ Library. + + This example assumes you are somewhat familiar with the machine learning + tools in dlib. In particular, you should be familiar with the krr_trainer + and the matrix object. So you may want to read the krr_classification_ex.cpp + and matrix_ex.cpp example programs if you haven't already. +*/ + + +#include +#include "dlib/svm.h" + +using namespace std; +using namespace dlib; + +// ---------------------------------------------------------------------------------------- + +/* + Here we define our new kernel. It is the UKF kernel from + Facilitating the applications of support vector machine by using a new kernel + by Rui Zhang and Wenjian Wang. + + + + In the context of the dlib library a kernel function object is an object with + an interface with the following properties: + - a public typedef named sample_type + - a public typedef named scalar_type which should be a float, double, or + long double type. + - an overloaded operator() that operates on two items of sample_type + and returns a scalar_type. + - a public typedef named mem_manager_type that is an implementation of + dlib/memory_manager/memory_manager_kernel_abstract.h or + dlib/memory_manager_global/memory_manager_global_kernel_abstract.h or + dlib/memory_manager_stateless/memory_manager_stateless_kernel_abstract.h + - an overloaded == operator that tells you if two kernels are + identical or not. + + Below we define such a beast for the UKF kernel. In this case we are expecting the + sample type (i.e. the T type) to be a dlib::matrix. However, note that you can design + kernels which operate on any type you like so long as you meet the above requirements. +*/ + +template < typename T > +struct ukf_kernel +{ + typedef typename T::type scalar_type; + typedef T sample_type; + // If your sample type, the T, doesn't have a memory manager then + // you can use dlib::default_memory_manager here. + typedef typename T::mem_manager_type mem_manager_type; + + ukf_kernel(const scalar_type g) : sigma(g) {} + ukf_kernel() : sigma(0.1) {} + + scalar_type sigma; + + scalar_type operator() ( + const sample_type& a, + const sample_type& b + ) const + { + // This is the formula for the UKF kernel from the above referenced paper. + return 1/(length_squared(a-b) + sigma); + } + + bool operator== ( + const ukf_kernel& k + ) const + { + return sigma == k.sigma; + } +}; + +// ---------------------------------------------------------------------------------------- + +/* + Here we define serialize() and deserialize() functions for our new kernel. Defining + these function is optional. However, if you don't define them you won't be able + to save your learned decision_function objects to disk. +*/ + +template < typename T > +void serialize ( const ukf_kernel& item, std::ostream& out) +{ + // save the state of the kernel to the output stream + serialize(item.sigma, out); +} + +template < typename T > +void deserialize ( ukf_kernel& item, std::istream& in ) +{ + deserialize(item.sigma, in); +} + +// ---------------------------------------------------------------------------------------- + +/* + This next thing, the kernel_derivative specialization is OPTIONAL. You only need + to define it if you want to use the dlib::reduced2() or dlib::approximate_distance_function() + routines. If so, then you need to supply code for computing the derivative of your kernel as + shown below. Note also that you can only do this if your kernel operates on dlib::matrix + objects which represent column vectors. +*/ + +namespace dlib +{ + template < typename T > + struct kernel_derivative > + { + typedef typename T::type scalar_type; + typedef T sample_type; + typedef typename T::mem_manager_type mem_manager_type; + + kernel_derivative(const ukf_kernel& k_) : k(k_){} + + sample_type operator() (const sample_type& x, const sample_type& y) const + { + // return the derivative of the ukf kernel with respect to the second argument (i.e. y) + return 2*(x-y)*std::pow(k(x,y),2); + } + + const ukf_kernel& k; + }; +} + +// ---------------------------------------------------------------------------------------- + +int main() +{ + // We are going to be working with 2 dimensional samples and trying to perform + // binary classification on them using our new ukf_kernel. + typedef matrix sample_type; + + typedef ukf_kernel kernel_type; + + + // Now lets generate some training data + std::vector samples; + std::vector labels; + for (double r = -20; r <= 20; r += 0.9) + { + for (double c = -20; c <= 20; c += 0.9) + { + sample_type samp; + samp(0) = r; + samp(1) = c; + samples.push_back(samp); + + // if this point is less than 13 from the origin + if (sqrt((double)r*r + c*c) <= 13) + labels.push_back(+1); + else + labels.push_back(-1); + + } + } + cout << "samples generated: " << samples.size() << endl; + cout << " number of +1 samples: " << sum(vector_to_matrix(labels) > 0) << endl; + cout << " number of -1 samples: " << sum(vector_to_matrix(labels) < 0) << endl; + + + // A valid kernel must always give rise to kernel matrices which are symmetric + // and positive semidefinite (i.e. have nonnegative eigenvalues). This next + // bit of code makes a kernel matrix and checks if this is true. + const matrix K = kernel_matrix(kernel_type(0.1), randomly_subsample(samples, 500)); + cout << "\nIs it symmetric? (this value should be 0): "<< min(abs(K - trans(K))) << endl; + cout << "Smallest eigenvalue (should be >= 0): " << min(real_eigenvalues(K)) << endl; + + + // here we make an instance of the krr_trainer object that uses our new kernel. + krr_trainer trainer; + trainer.use_classification_loss_for_loo_cv(); + + + // Finally, lets test how good our new kernel is by doing some leave-one-out cross-validation. + cout << "\ndoing leave-one-out cross-validation" << endl; + for (double sigma = 0.01; sigma <= 100; sigma *= 3) + { + // tell the trainer the parameters we want to use + trainer.set_kernel(kernel_type(sigma)); + + double loo_error; + trainer.train(samples, labels, loo_error); + + // Print sigma and the fraction of samples misclassified during LOO cross-validation. + cout << "sigma: " << sigma << " LOO error: " << loo_error << endl; + } + + + + + const kernel_type kern(10); + // Since it is very easy to make a mistake while coding a derivative it is a good idea + // to compare your derivative function against a numerical approximation and see if + // the results are similar. If they are very different then you probably made a + // mistake. So here we compare the results at a test point. + cout << "\nThese vectors should match, if they don't then we coded the kernel_derivative wrong!" << endl; + cout << "approximate derivative: \n" << derivative(kern)(samples[0],samples[100]) << endl; + cout << "exact derivative: \n" << kernel_derivative(kern)(samples[0],samples[100]) << endl; + +} +