Added an example showing how to create custom kernels.

--HG-- rename : examples/krr_classification_ex.cpp => examples/using_custom_kernels_ex.cpp
2011-06-03 22:15:14 -04:00 · 2011-06-03 22:15:14 -04:00 · 54fded51bf
parent d068afb165
commit 54fded51bf
2 changed files with 208 additions and 0 deletions
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -91,4 +91,5 @@ add_example(thread_function_ex)
 add_example(thread_pool_ex)
 add_example(threads_ex)
 add_example(timer_ex)
+add_example(using_custom_kernels_ex)
 add_example(xml_parser_ex)
--- a/examples/using_custom_kernels_ex.cpp
+++ b/examples/using_custom_kernels_ex.cpp
@ -0,0 +1,207 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+
+    This is an example showing how to defined custom kernel functions for use with 
+    the machine learning tools in the dlib C++ Library.
+
+    This example assumes you are somewhat familiar with the machine learning
+    tools in dlib.  In particular, you should be familiar with the krr_trainer
+    and the matrix object.  So you may want to read the krr_classification_ex.cpp
+    and matrix_ex.cpp example programs if you haven't already.
+*/
+
+
+#include <iostream>
+#include "dlib/svm.h"
+
+using namespace std;
+using namespace dlib;
+
+// ----------------------------------------------------------------------------------------
+
+/*
+    Here we define our new kernel.  It is the UKF kernel from 
+        Facilitating the applications of support vector machine by using a new kernel
+        by Rui Zhang and Wenjian Wang.
+
+
+    
+    In the context of the dlib library a kernel function object is an object with 
+    an interface with the following properties:
+        - a public typedef named sample_type
+        - a public typedef named scalar_type which should be a float, double, or 
+          long double type.
+        - an overloaded operator() that operates on two items of sample_type 
+          and returns a scalar_type.  
+        - a public typedef named mem_manager_type that is an implementation of 
+          dlib/memory_manager/memory_manager_kernel_abstract.h or
+          dlib/memory_manager_global/memory_manager_global_kernel_abstract.h or
+          dlib/memory_manager_stateless/memory_manager_stateless_kernel_abstract.h 
+        - an overloaded == operator that tells you if two kernels are
+          identical or not.
+
+    Below we define such a beast for the UKF kernel.  In this case we are expecting the 
+    sample type (i.e. the T type) to be a dlib::matrix.  However, note that you can design 
+    kernels which operate on any type you like so long as you meet the above requirements.
+*/
+
+template < typename T >
+struct ukf_kernel
+{
+    typedef typename T::type             scalar_type;
+    typedef          T                   sample_type;
+    // If your sample type, the T, doesn't have a memory manager then
+    // you can use dlib::default_memory_manager here.
+    typedef typename T::mem_manager_type mem_manager_type;
+
+    ukf_kernel(const scalar_type g) : sigma(g) {}
+    ukf_kernel() : sigma(0.1) {}
+
+    scalar_type sigma;
+
+    scalar_type operator() (
+        const sample_type& a,
+        const sample_type& b
+    ) const
+    { 
+        // This is the formula for the UKF kernel from the above referenced paper.
+        return 1/(length_squared(a-b) + sigma);
+    }
+
+    bool operator== (
+        const ukf_kernel& k
+    ) const
+    {
+        return sigma == k.sigma;
+    }
+};
+
+// ----------------------------------------------------------------------------------------
+
+/*
+    Here we define serialize() and deserialize() functions for our new kernel.  Defining
+    these function is optional.  However, if you don't define them you won't be able
+    to save your learned decision_function objects to disk. 
+*/
+
+template < typename T >
+void serialize ( const ukf_kernel<T>& item, std::ostream& out)
+{
+    // save the state of the kernel to the output stream
+    serialize(item.sigma, out);
+}
+
+template < typename T >
+void deserialize ( ukf_kernel<T>& item, std::istream& in )
+{
+    deserialize(item.sigma, in);
+}
+
+// ----------------------------------------------------------------------------------------
+
+/*
+    This next thing, the kernel_derivative specialization is OPTIONAL.  You only need
+    to define it if you want to use the dlib::reduced2() or dlib::approximate_distance_function() 
+    routines.  If so, then you need to supply code for computing the derivative of your kernel as 
+    shown below.  Note also that you can only do this if your kernel operates on dlib::matrix
+    objects which represent column vectors.
+*/
+
+namespace dlib
+{
+    template < typename T >
+    struct kernel_derivative<ukf_kernel<T> >
+    {
+        typedef typename T::type             scalar_type;
+        typedef          T                   sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const ukf_kernel<T>& k_) : k(k_){}
+
+        sample_type operator() (const sample_type& x, const sample_type& y) const
+        {
+            // return the derivative of the ukf kernel with respect to the second argument (i.e. y)
+            return 2*(x-y)*std::pow(k(x,y),2);
+        }
+
+        const ukf_kernel<T>& k;
+    };
+}
+
+// ----------------------------------------------------------------------------------------
+
+int main()
+{
+    // We are going to be working with 2 dimensional samples and trying to perform
+    // binary classification on them using our new ukf_kernel.
+    typedef matrix<double, 2, 1> sample_type;
+
+    typedef ukf_kernel<sample_type> kernel_type;
+
+
+    // Now lets generate some training data
+    std::vector<sample_type> samples;
+    std::vector<double> labels;
+    for (double r = -20; r <= 20; r += 0.9)
+    {
+        for (double c = -20; c <= 20; c += 0.9)
+        {
+            sample_type samp;
+            samp(0) = r;
+            samp(1) = c;
+            samples.push_back(samp);
+
+            // if this point is less than 13 from the origin
+            if (sqrt((double)r*r + c*c) <= 13)
+                labels.push_back(+1);
+            else
+                labels.push_back(-1);
+
+        }
+    }
+    cout << "samples generated: " << samples.size() << endl;
+    cout << "  number of +1 samples: " << sum(vector_to_matrix(labels) > 0) << endl;
+    cout << "  number of -1 samples: " << sum(vector_to_matrix(labels) < 0) << endl;
+
+
+    // A valid kernel must always give rise to kernel matrices which are symmetric 
+    // and positive semidefinite (i.e. have nonnegative eigenvalues).  This next
+    // bit of code makes a kernel matrix and checks if this is true.
+    const matrix<double> K = kernel_matrix(kernel_type(0.1), randomly_subsample(samples, 500));
+    cout << "\nIs it symmetric? (this value should be 0): "<< min(abs(K - trans(K))) << endl;
+    cout << "Smallest eigenvalue (should be >= 0):      "  << min(real_eigenvalues(K)) << endl;
+
+
+    // here we make an instance of the krr_trainer object that uses our new kernel.
+    krr_trainer<kernel_type> trainer;
+    trainer.use_classification_loss_for_loo_cv();
+
+
+    // Finally, lets test how good our new kernel is by doing some leave-one-out cross-validation.
+    cout << "\ndoing leave-one-out cross-validation" << endl;
+    for (double sigma = 0.01; sigma <= 100; sigma *= 3)
+    {
+        // tell the trainer the parameters we want to use
+        trainer.set_kernel(kernel_type(sigma));
+
+        double loo_error;
+        trainer.train(samples, labels, loo_error);
+
+        // Print sigma and the fraction of samples misclassified during LOO cross-validation.
+        cout << "sigma: " << sigma << "     LOO error: " << loo_error << endl;
+    }
+
+
+
+
+    const kernel_type kern(10);
+    // Since it is very easy to make a mistake while coding a derivative it is a good idea
+    // to compare your derivative function against a numerical approximation and see if
+    // the results are similar.  If they are very different then you probably made a 
+    // mistake.  So here we compare the results at a test point. 
+    cout << "\nThese vectors should match, if they don't then we coded the kernel_derivative wrong!" << endl;
+    cout << "approximate derivative: \n" << derivative(kern)(samples[0],samples[100]) << endl;
+    cout << "exact derivative: \n" << kernel_derivative<kernel_type>(kern)(samples[0],samples[100]) << endl;
+
+}
+