diff --git a/examples/kkmeans_ex.cpp b/examples/kkmeans_ex.cpp index 42259d136..95d8f3b7d 100644 --- a/examples/kkmeans_ex.cpp +++ b/examples/kkmeans_ex.cpp @@ -41,28 +41,18 @@ int main() typedef radial_basis_kernel kernel_type; - // Here we declare an instance of the kcentroid object. The first argument to the constructor - // is the kernel we wish to use. The second is a parameter that determines the numerical - // accuracy with which the object will perform part of the learning algorithm. Generally - // smaller values give better results but cause the algorithm to run slower. You just have - // to play with it to decide what balance of speed and accuracy is right for your problem. - // Here we have set it to 0.01. - // - // Also, since we are using the radial basis kernel we have to pick the RBF width parameter. - // Here we have it set to 0.1. But in general, a reasonable way of picking this value is - // to start with some initial guess and to just run all the data through the resulting - // kcentroid. Then print out kc.dictionary_size() to see how many support vectors the - // kcentroid object is using. A good rule of thumb is that you should have somewhere - // in the range of 10-100 support vectors (but this rule isn't carved in stone). - // So if you aren't in that range then you can change the RBF parameter. Making it - // smaller will decrease the dictionary size and making it bigger will increase the - // dictionary size. - // - // So what I often do is I set the kcentroid's second parameter to 0.01 or 0.001. Then - // I find an RBF kernel parameter that gives me the number of support vectors that I - // feel is appropriate for the problem I'm trying to solve. Again, this just comes down - // to playing with it and getting a feel for how things work. - kcentroid kc(kernel_type(0.1),0.01); + // Here we declare an instance of the kcentroid object. It is the object used to + // represent each of the centers used for clustering. The kcentroid has 4 parameters + // you need to set. The first argument to the constructor is the kernel we wish to + // use. The second is a parameter that determines the numerical accuracy with which + // the object will perform part of the learning algorithm. Generally, smaller values + // give better results but cause the algorithm to attempt to use more support vectors + // (and thus run slower and use more memory). The third argument, however, is the + // maximum number of support vectors a kcentroid is allowed to use. So you can use + // it to control the complexity. Finally, the last argument should always be set to + // false when using a kcentroid for clustering (see the kcentroid docs for details on + // this parameter). + kcentroid kc(kernel_type(0.1),0.01, 8, false); // Now we make an instance of the kkmeans object and tell it to use kcentroid objects // that are configured with the parameters from the kc object we defined above. @@ -145,6 +135,14 @@ int main() cout << test(samples[i+2*num]) << "\n"; } + // Now print out how many support vectors each center used. Note that + // the maximum number of 8 was reached. If you went back to the kcentroid + // constructor and changed the 8 to some bigger number you would see that these + // numbers would go up. However, 8 is all we need to correctly cluster this dataset. + cout << "num sv for center 0: " << test.get_kcentroid(0).dictionary_size() << endl; + cout << "num sv for center 1: " << test.get_kcentroid(1).dictionary_size() << endl; + cout << "num sv for center 2: " << test.get_kcentroid(2).dictionary_size() << endl; + }