mirror of https://github.com/davisking/dlib.git
Added spectral_cluster()
This commit is contained in:
parent
2e5d2c46c6
commit
f99e940b28
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "clustering/modularity_clustering.h"
|
||||
#include "clustering/chinese_whispers.h"
|
||||
#include "clustering/spectral_cluster.h"
|
||||
#include "svm/kkmeans.h"
|
||||
|
||||
#endif // DLIB_CLuSTERING_
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SPECTRAL_CLUSTEr_H_
|
||||
#define DLIB_SPECTRAL_CLUSTEr_H_
|
||||
|
||||
#include "spectral_cluster_abstract.h"
|
||||
#include <vector>
|
||||
#include "../matrix.h"
|
||||
#include "../svm/kkmeans.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <
|
||||
typename kernel_type,
|
||||
typename vector_type
|
||||
>
|
||||
std::vector<unsigned long> spectral_cluster (
|
||||
const kernel_type& k,
|
||||
const vector_type& samples,
|
||||
const unsigned long num_clusters
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(num_clusters > 0,
|
||||
"\t std::vector<unsigned long> spectral_cluster(k,samples,num_clusters)"
|
||||
<< "\n\t num_clusters can't be 0."
|
||||
);
|
||||
|
||||
if (num_clusters == 1)
|
||||
{
|
||||
// nothing to do, just assign everything to the 0 cluster.
|
||||
return std::vector<unsigned long>(samples.size(), 0);
|
||||
}
|
||||
|
||||
// compute the similarity matrix.
|
||||
matrix<double> K(samples.size(), samples.size());
|
||||
for (long r = 0; r < K.nr(); ++r)
|
||||
for (long c = r+1; c < K.nc(); ++c)
|
||||
K(r,c) = K(c,r) = (double)k(samples[r], samples[c]);
|
||||
for (long r = 0; r < K.nr(); ++r)
|
||||
K(r,r) = 0;
|
||||
|
||||
matrix<double,0,1> D(K.nr());
|
||||
for (long r = 0; r < K.nr(); ++r)
|
||||
D(r) = sum(rowm(K,r));
|
||||
D = sqrt(reciprocal(D));
|
||||
K = diagm(D)*K*diagm(D);
|
||||
matrix<double> u,w,v;
|
||||
// Use the normal SVD routine unless the matrix is really big, then use the fast
|
||||
// approximate version.
|
||||
if (K.nr() < 1000)
|
||||
svd3(K,u,w,v);
|
||||
else
|
||||
svd_fast(K,u,w,v, num_clusters+100, 5);
|
||||
// Pick out the eigenvectors associated with the largest eigenvalues.
|
||||
rsort_columns(v,w);
|
||||
v = colm(v, range(0,num_clusters-1));
|
||||
// Now build the normalized spectral vectors, one for each input vector.
|
||||
std::vector<matrix<double,0,1> > spec_samps, centers;
|
||||
for (long r = 0; r < v.nr(); ++r)
|
||||
{
|
||||
spec_samps.push_back(trans(rowm(v,r)));
|
||||
spec_samps.back() /= length(spec_samps.back());
|
||||
}
|
||||
// Finally do the K-means clustering
|
||||
pick_initial_centers(num_clusters, centers, spec_samps);
|
||||
find_clusters_using_kmeans(spec_samps, centers);
|
||||
// And then compute the cluster assignments based on the output of K-means.
|
||||
std::vector<unsigned long> assignments;
|
||||
for (unsigned long i = 0; i < spec_samps.size(); ++i)
|
||||
assignments.push_back(nearest_center(centers, spec_samps[i]));
|
||||
|
||||
return assignments;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SPECTRAL_CLUSTEr_H_
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
|
||||
#ifdef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <
|
||||
typename kernel_type,
|
||||
typename vector_type
|
||||
>
|
||||
std::vector<unsigned long> spectral_cluster (
|
||||
const kernel_type& k,
|
||||
const vector_type& samples,
|
||||
const unsigned long num_clusters
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- samples must be something with an interface compatible with std::vector.
|
||||
- The following expression must evaluate to a double or float:
|
||||
k(samples[i], samples[j])
|
||||
- num_clusters > 0
|
||||
ensures
|
||||
- Performs the spectral clustering algorithm described in the paper:
|
||||
On spectral clustering: Analysis and an algorithm by Ng, Jordan, and Weiss.
|
||||
and returns the results.
|
||||
- This function clusters the input data samples into num_clusters clusters and
|
||||
returns a vector that indicates which cluster each sample falls into. In
|
||||
particular, we return an array A such that:
|
||||
- A.size() == samples.size()
|
||||
- A[i] == the cluster assignment of samples[i].
|
||||
- for all valid i: 0 <= A[i] < num_clusters
|
||||
- The "similarity" of samples[i] with samples[j] is given by
|
||||
k(samples[i],samples[j]). This means that k() should output a number >= 0
|
||||
and the number should be larger for samples that are more similar.
|
||||
!*/
|
||||
}
|
||||
|
||||
#endif // DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
|
||||
|
||||
|
Loading…
Reference in New Issue