dlib/examples/multiclass_classification_e...

// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
    This is an example illustrating the use of the multiclass classification tools  
    from the dlib C++ Library.  Specifically, this example will make points from 
    three classes and show you how to train a multiclass classifier to recognize 
    these three classes.

    The classes are as follows:
        - class 1: points very close to the origin
        - class 2: points on the circle of radius 10 around the origin
        - class 3: points that are on a circle of radius 4 but not around the origin at all
*/

#include <dlib/svm_threaded.h>

#include <iostream>
#include <vector>

#include <dlib/rand.h>

using namespace std;
using namespace dlib;

// Our data will be 2-dimensional data. So declare an appropriate type to contain these points.
typedef matrix<double,2,1> sample_type;

// ----------------------------------------------------------------------------------------

void generate_data (
    std::vector<sample_type>& samples,
    std::vector<double>& labels
);
/*!
    ensures
        - make some 3 class data as described above.  
        - Create 60 points from class 1
        - Create 70 points from class 2
        - Create 80 points from class 3
!*/

// ----------------------------------------------------------------------------------------

int main()
{
    try
    {
        std::vector<sample_type> samples;
        std::vector<double> labels;

        // First, get our labeled set of training data
        generate_data(samples, labels);

        cout << "samples.size(): "<< samples.size() << endl;

        // The main object in this example program is the one_vs_one_trainer.  It is essentially 
        // a container class for regular binary classifier trainer objects.  In particular, it 
        // uses the any_trainer object to store any kind of trainer object that implements a 
        // .train(samples,labels) function which returns some kind of learned decision function.  
        // It uses these binary classifiers to construct a voting multiclass classifier.  If 
        // there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of 
        // labels, which then vote on the label of a sample.
        //
        // In this example program we will work with a one_vs_one_trainer object which stores any 
        // kind of trainer that uses our sample_type samples.
        typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer;


        // Finally, make the one_vs_one_trainer.
        ovo_trainer trainer;


        // Next, we will make two different binary classification trainer objects.  One
        // which uses kernel ridge regression and RBF kernels and another which uses a
        // support vector machine and polynomial kernels.  The particular details don't matter.
        // The point of this part of the example is that you can use any kind of trainer object
        // with the one_vs_one_trainer.
        typedef polynomial_kernel<sample_type> poly_kernel;
        typedef radial_basis_kernel<sample_type> rbf_kernel;

        // make the binary trainers and set some parameters
        krr_trainer<rbf_kernel> rbf_trainer;
        svm_nu_trainer<poly_kernel> poly_trainer;
        poly_trainer.set_kernel(poly_kernel(0.1, 1, 2));
        rbf_trainer.set_kernel(rbf_kernel(0.1));


        // Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer
        // to solve the individual binary classification subproblems.
        trainer.set_trainer(rbf_trainer);
        // We can also get more specific.  Here we tell the one_vs_one_trainer to use the
        // poly_trainer to solve the class 1 vs class 2 subproblem.  All the others will
        // still be solved with the rbf_trainer.
        trainer.set_trainer(poly_trainer, 1, 2);

        // Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup.
        // As an aside, always shuffle the order of the samples before doing cross validation.  
        // For a discussion of why this is a good idea see the svm_ex.cpp example.
        randomize_samples(samples, labels);
        cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl;
        // The output is shown below.  It is the confusion matrix which describes the results.  Each row 
        // corresponds to a class of data and each column to a prediction.  Reading from top to bottom, 
        // the rows correspond to the class labels if the labels have been listed in sorted order.  So the
        // top row corresponds to class 1, the middle row to class 2, and the bottom row to class 3.  The
        // columns are organized similarly, with the left most column showing how many samples were predicted
        // as members of class 1.
        // 
        // So in the results below we can see that, for the class 1 samples, 60 of them were correctly predicted
        // to be members of class 1 and 0 were incorrectly classified.  Similarly, the other two classes of data
        // are perfectly classified.
        /*
            cross validation: 
            60  0  0 
            0 70  0 
            0  0 80 
        */

        // Next, if you wanted to obtain the decision rule learned by a one_vs_one_trainer you 
        // would store it into a one_vs_one_decision_function.
        one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels);

        cout << "predicted label: "<< df(samples[0])  << ", true label: "<< labels[0] << endl;
        cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl;
        // The output is:
        /*
            predicted label: 2, true label: 2
            predicted label: 1, true label: 1
        */


        // If you want to save a one_vs_one_decision_function to disk, you can do
        // so.  However, you must declare what kind of decision functions it contains. 
        one_vs_one_decision_function<ovo_trainer, 
        decision_function<poly_kernel>,  // This is the output of the poly_trainer
        decision_function<rbf_kernel>    // This is the output of the rbf_trainer
        > df2, df3;


        // Put df into df2 and then save df2 to disk.  Note that we could have also said
        // df2 = trainer.train(samples, labels);  But doing it this way avoids retraining.
        df2 = df;
        serialize("df.dat") << df2;

        // load the function back in from disk and store it in df3.  
        deserialize("df.dat") >> df3;


        // Test df3 to see that this worked.
        cout << endl;
        cout << "predicted label: "<< df3(samples[0])  << ", true label: "<< labels[0] << endl;
        cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl;
        // Test df3 on the samples and labels and print the confusion matrix.
        cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl;


        // Finally, if you want to get the binary classifiers from inside a multiclass decision
        // function you can do it by calling get_binary_decision_functions() like so:
        one_vs_one_decision_function<ovo_trainer>::binary_function_table functs;
        functs = df.get_binary_decision_functions();
        cout << "number of binary decision functions in df: " << functs.size() << endl;
        // The functs object is a std::map which maps pairs of labels to binary decision
        // functions.  So we can access the individual decision functions like so:
        decision_function<poly_kernel> df_1_2 = any_cast<decision_function<poly_kernel> >(functs[make_unordered_pair(1,2)]);
        decision_function<rbf_kernel>  df_1_3 = any_cast<decision_function<rbf_kernel>  >(functs[make_unordered_pair(1,3)]);
        // df_1_2 contains the binary decision function that votes for class 1 vs. 2.
        // Similarly, df_1_3 contains the classifier that votes for 1 vs. 3.

        // Note that the multiclass decision function doesn't know what kind of binary
        // decision functions it contains.  So we have to use any_cast to explicitly cast
        // them back into the concrete type.  If you make a mistake and try to any_cast a
        // binary decision function into the wrong type of function any_cast will throw a
        // bad_any_cast exception.
    }
    catch (std::exception& e)
    {
        cout << "exception thrown!" << endl;
        cout << e.what() << endl;
    }
}

// ----------------------------------------------------------------------------------------

void generate_data (
    std::vector<sample_type>& samples,
    std::vector<double>& labels
)
{
    const long num = 50;

    sample_type m;

    dlib::rand rnd;


    // make some samples near the origin
    double radius = 0.5;
    for (long i = 0; i < num+10; ++i)
    {
        double sign = 1;
        if (rnd.get_random_double() < 0.5)
            sign = -1;
        m(0) = 2*radius*rnd.get_random_double()-radius;
        m(1) = sign*sqrt(radius*radius - m(0)*m(0));

        // add this sample to our set of training samples 
        samples.push_back(m);
        labels.push_back(1);
    }

    // make some samples in a circle around the origin but far away
    radius = 10.0;
    for (long i = 0; i < num+20; ++i)
    {
        double sign = 1;
        if (rnd.get_random_double() < 0.5)
            sign = -1;
        m(0) = 2*radius*rnd.get_random_double()-radius;
        m(1) = sign*sqrt(radius*radius - m(0)*m(0));

        // add this sample to our set of training samples 
        samples.push_back(m);
        labels.push_back(2);
    }

    // make some samples in a circle around the point (25,25) 
    radius = 4.0;
    for (long i = 0; i < num+30; ++i)
    {
        double sign = 1;
        if (rnd.get_random_double() < 0.5)
            sign = -1;
        m(0) = 2*radius*rnd.get_random_double()-radius;
        m(1) = sign*sqrt(radius*radius - m(0)*m(0));

        // translate this point away from the origin
        m(0) += 25;
        m(1) += 25;

        // add this sample to our set of training samples 
        samples.push_back(m);
        labels.push_back(3);
    }
}

// ----------------------------------------------------------------------------------------
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt`
			`/*`
			`This is an example illustrating the use of the multiclass classification tools`
			`from the dlib C++ Library. Specifically, this example will make points from`
			`three classes and show you how to train a multiclass classifier to recognize`
			`these three classes.`

			`The classes are as follows:`
			`- class 1: points very close to the origin`
			`- class 2: points on the circle of radius 10 around the origin`
			`- class 3: points that are on a circle of radius 4 but not around the origin at all`
			`*/`

Fixed broken #include 2013-11-18 08:18:52 +08:00			`#include <dlib/svm_threaded.h>`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00
			`#include <iostream>`
			`#include <vector>`

All I did in this change was switch from using #include "" syntax to #include <> syntax. 2012-12-08 22:32:13 +08:00			`#include <dlib/rand.h>`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00
			`using namespace std;`
			`using namespace dlib;`

			`// Our data will be 2-dimensional data. So declare an appropriate type to contain these points.`
			`typedef matrix<double,2,1> sample_type;`

			`// ----------------------------------------------------------------------------------------`

			`void generate_data (`
			`std::vector<sample_type>& samples,`
			`std::vector<double>& labels`
			`);`
			`/*!`
			`ensures`
			`- make some 3 class data as described above.`
			`- Create 60 points from class 1`
			`- Create 70 points from class 2`
			`- Create 80 points from class 3`
			`!*/`

			`// ----------------------------------------------------------------------------------------`

			`int main()`
			`{`
Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00			`try`
			`{`
			`std::vector<sample_type> samples;`
			`std::vector<double> labels;`

			`// First, get our labeled set of training data`
			`generate_data(samples, labels);`

			`cout << "samples.size(): "<< samples.size() << endl;`

			`// The main object in this example program is the one_vs_one_trainer. It is essentially`
			`// a container class for regular binary classifier trainer objects. In particular, it`
			`// uses the any_trainer object to store any kind of trainer object that implements a`
			`// .train(samples,labels) function which returns some kind of learned decision function.`
			`// It uses these binary classifiers to construct a voting multiclass classifier. If`
			`// there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of`
			`// labels, which then vote on the label of a sample.`
			`//`
			`// In this example program we will work with a one_vs_one_trainer object which stores any`
			`// kind of trainer that uses our sample_type samples.`
			`typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer;`


			`// Finally, make the one_vs_one_trainer.`
			`ovo_trainer trainer;`


			`// Next, we will make two different binary classification trainer objects. One`
			`// which uses kernel ridge regression and RBF kernels and another which uses a`
			`// support vector machine and polynomial kernels. The particular details don't matter.`
			`// The point of this part of the example is that you can use any kind of trainer object`
			`// with the one_vs_one_trainer.`
			`typedef polynomial_kernel<sample_type> poly_kernel;`
			`typedef radial_basis_kernel<sample_type> rbf_kernel;`

			`// make the binary trainers and set some parameters`
			`krr_trainer<rbf_kernel> rbf_trainer;`
			`svm_nu_trainer<poly_kernel> poly_trainer;`
			`poly_trainer.set_kernel(poly_kernel(0.1, 1, 2));`
			`rbf_trainer.set_kernel(rbf_kernel(0.1));`


			`// Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer`
			`// to solve the individual binary classification subproblems.`
			`trainer.set_trainer(rbf_trainer);`
			`// We can also get more specific. Here we tell the one_vs_one_trainer to use the`
			`// poly_trainer to solve the class 1 vs class 2 subproblem. All the others will`
			`// still be solved with the rbf_trainer.`
			`trainer.set_trainer(poly_trainer, 1, 2);`

Fixing grammar in comments. 2014-02-23 05:07:17 +08:00			`// Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup.`
Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00			`// As an aside, always shuffle the order of the samples before doing cross validation.`
			`// For a discussion of why this is a good idea see the svm_ex.cpp example.`
			`randomize_samples(samples, labels);`
			`cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl;`
			`// The output is shown below. It is the confusion matrix which describes the results. Each row`
			`// corresponds to a class of data and each column to a prediction. Reading from top to bottom,`
			`// the rows correspond to the class labels if the labels have been listed in sorted order. So the`
			`// top row corresponds to class 1, the middle row to class 2, and the bottom row to class 3. The`
			`// columns are organized similarly, with the left most column showing how many samples were predicted`
			`// as members of class 1.`
			`//`
			`// So in the results below we can see that, for the class 1 samples, 60 of them were correctly predicted`
			`// to be members of class 1 and 0 were incorrectly classified. Similarly, the other two classes of data`
			`// are perfectly classified.`
			`/*`
			`cross validation:`
			`60 0 0`
			`0 70 0`
			`0 0 80`
			`*/`

			`// Next, if you wanted to obtain the decision rule learned by a one_vs_one_trainer you`
			`// would store it into a one_vs_one_decision_function.`
			`one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels);`

			`cout << "predicted label: "<< df(samples[0]) << ", true label: "<< labels[0] << endl;`
			`cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl;`
			`// The output is:`
			`/*`
			`predicted label: 2, true label: 2`
			`predicted label: 1, true label: 1`
			`*/`


			`// If you want to save a one_vs_one_decision_function to disk, you can do`
			`// so. However, you must declare what kind of decision functions it contains.`
			`one_vs_one_decision_function<ovo_trainer,`
			`decision_function<poly_kernel>, // This is the output of the poly_trainer`
			`decision_function<rbf_kernel> // This is the output of the rbf_trainer`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`> df2, df3;`


Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00			`// Put df into df2 and then save df2 to disk. Note that we could have also said`
			`// df2 = trainer.train(samples, labels); But doing it this way avoids retraining.`
			`df2 = df;`
Made the examples use the new simplified file serialization API. 2014-05-09 09:07:56 +08:00			`serialize("df.dat") << df2;`
Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00
			`// load the function back in from disk and store it in df3.`
Made the examples use the new simplified file serialization API. 2014-05-09 09:07:56 +08:00			`deserialize("df.dat") >> df3;`
Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00

			`// Test df3 to see that this worked.`
			`cout << endl;`
			`cout << "predicted label: "<< df3(samples[0]) << ", true label: "<< labels[0] << endl;`
			`cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl;`
			`// Test df3 on the samples and labels and print the confusion matrix.`
			`cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl;`

Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00



Added code showing how to get the individual decision functions out of a multiclass decision function object. 2013-11-18 02:47:26 +08:00			`// Finally, if you want to get the binary classifiers from inside a multiclass decision`
			`// function you can do it by calling get_binary_decision_functions() like so:`
			`one_vs_one_decision_function<ovo_trainer>::binary_function_table functs;`
			`functs = df.get_binary_decision_functions();`
			`cout << "number of binary decision functions in df: " << functs.size() << endl;`
			`// The functs object is a std::map which maps pairs of labels to binary decision`
			`// functions. So we can access the individual decision functions like so:`
			`decision_function<poly_kernel> df_1_2 = any_cast<decision_function<poly_kernel> >(functs[make_unordered_pair(1,2)]);`
			`decision_function<rbf_kernel> df_1_3 = any_cast<decision_function<rbf_kernel> >(functs[make_unordered_pair(1,3)]);`
			`// df_1_2 contains the binary decision function that votes for class 1 vs. 2.`
			`// Similarly, df_1_3 contains the classifier that votes for 1 vs. 3.`

			`// Note that the multiclass decision function doesn't know what kind of binary`
			`// decision functions it contains. So we have to use any_cast to explicitly cast`
			`// them back into the concrete type. If you make a mistake and try to any_cast a`
			`// binary decision function into the wrong type of function any_cast will throw a`
			`// bad_any_cast exception.`
			`}`
			`catch (std::exception& e)`
			`{`
			`cout << "exception thrown!" << endl;`
			`cout << e.what() << endl;`
			`}`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`}`

			`// ----------------------------------------------------------------------------------------`

			`void generate_data (`
			`std::vector<sample_type>& samples,`
			`std::vector<double>& labels`
			`)`
			`{`
			`const long num = 50;`

			`sample_type m;`

Renamed rand::float_1a to rand. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404251 2011-05-05 05:24:12 +08:00			`dlib::rand rnd;`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00

			`// make some samples near the origin`
			`double radius = 0.5;`
			`for (long i = 0; i < num+10; ++i)`
			`{`
			`double sign = 1;`
			`if (rnd.get_random_double() < 0.5)`
			`sign = -1;`
			`m(0) = 2radiusrnd.get_random_double()-radius;`
			`m(1) = signsqrt(radiusradius - m(0)*m(0));`

Fixed grammar/confusing sentence. 2013-11-16 22:15:52 +08:00			`// add this sample to our set of training samples`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`samples.push_back(m);`
			`labels.push_back(1);`
			`}`

			`// make some samples in a circle around the origin but far away`
			`radius = 10.0;`
			`for (long i = 0; i < num+20; ++i)`
			`{`
			`double sign = 1;`
			`if (rnd.get_random_double() < 0.5)`
			`sign = -1;`
			`m(0) = 2radiusrnd.get_random_double()-radius;`
			`m(1) = signsqrt(radiusradius - m(0)*m(0));`

Fixed grammar/confusing sentence. 2013-11-16 22:15:52 +08:00			`// add this sample to our set of training samples`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`samples.push_back(m);`
			`labels.push_back(2);`
			`}`

			`// make some samples in a circle around the point (25,25)`
			`radius = 4.0;`
			`for (long i = 0; i < num+30; ++i)`
			`{`
			`double sign = 1;`
			`if (rnd.get_random_double() < 0.5)`
			`sign = -1;`
			`m(0) = 2radiusrnd.get_random_double()-radius;`
			`m(1) = signsqrt(radiusradius - m(0)*m(0));`

			`// translate this point away from the origin`
			`m(0) += 25;`
			`m(1) += 25;`

Fixed grammar/confusing sentence. 2013-11-16 22:15:52 +08:00			`// add this sample to our set of training samples`
Added a multiclass example program --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%404052 2010-12-31 02:59:13 +08:00			`samples.push_back(m);`
			`labels.push_back(3);`
			`}`
			`}`

			`// ----------------------------------------------------------------------------------------`