mirror of https://github.com/davisking/dlib.git
Cleaned up code and comments.
In particular, these new functions don't need to be inside the face recognition class. So I moved them out. I also fixed many incorrect copy/pasted comments and clarified parts of the example code.
This commit is contained in:
parent
5cf80dda6a
commit
532552627a
|
@ -42,9 +42,9 @@ from skimage import io
|
||||||
if len(sys.argv) != 5:
|
if len(sys.argv) != 5:
|
||||||
print(
|
print(
|
||||||
"Call this program like this:\n"
|
"Call this program like this:\n"
|
||||||
" ./face_clustering.py shape_predictor_68_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
|
" ./face_clustering.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
|
||||||
"You can download a trained facial shape predictor and recognition model from:\n"
|
"You can download a trained facial shape predictor and recognition model from:\n"
|
||||||
" http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n"
|
" http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n"
|
||||||
" http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
|
" http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ facerec = dlib.face_recognition_model_v1(face_rec_model_path)
|
||||||
descriptors = []
|
descriptors = []
|
||||||
images = []
|
images = []
|
||||||
|
|
||||||
# Now process all the images
|
# Now find all the faces and compute 128D face descriptors for each face.
|
||||||
for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
|
for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
|
||||||
print("Processing file: {}".format(f))
|
print("Processing file: {}".format(f))
|
||||||
img = io.imread(f)
|
img = io.imread(f)
|
||||||
|
@ -78,34 +78,17 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
|
||||||
for k, d in enumerate(dets):
|
for k, d in enumerate(dets):
|
||||||
# Get the landmarks/parts for the face in box d.
|
# Get the landmarks/parts for the face in box d.
|
||||||
shape = sp(img, d)
|
shape = sp(img, d)
|
||||||
# Draw the face landmarks on the screen so we can see what face is currently being processed.
|
|
||||||
|
|
||||||
# Compute the 128D vector that describes the face in img identified by
|
# Compute the 128D vector that describes the face in img identified by
|
||||||
# shape. In general, if two face descriptor vectors have a Euclidean
|
# shape.
|
||||||
# distance between them less than 0.6 then they are from the same
|
|
||||||
# person, otherwise they are from different people. Here we just print
|
|
||||||
# the vector to the screen.
|
|
||||||
face_descriptor = facerec.compute_face_descriptor(img, shape)
|
face_descriptor = facerec.compute_face_descriptor(img, shape)
|
||||||
descriptors.append(face_descriptor)
|
descriptors.append(face_descriptor)
|
||||||
images.append((img, shape))
|
images.append((img, shape))
|
||||||
# It should also be noted that you can also call this function like this:
|
|
||||||
# face_descriptor = facerec.compute_face_descriptor(img, shape, 100)
|
|
||||||
# The version of the call without the 100 gets 99.13% accuracy on LFW
|
|
||||||
# while the version with 100 gets 99.38%. However, the 100 makes the
|
|
||||||
# call 100x slower to execute, so choose whatever version you like. To
|
|
||||||
# explain a little, the 3rd argument tells the code how many times to
|
|
||||||
# jitter/resample the image. When you set it to 100 it executes the
|
|
||||||
# face descriptor extraction 100 times on slightly modified versions of
|
|
||||||
# the face and returns the average result. You could also pick a more
|
|
||||||
# middle value, such as 10, which is only 10x slower but still gets an
|
|
||||||
# LFW accuracy of 99.3%.
|
|
||||||
|
|
||||||
labels = facerec.cluster(descriptors, 0.5)
|
# Now let's cluster the faces.
|
||||||
label_classes = list(set(labels))
|
labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
|
||||||
label_classes.sort()
|
num_classes = len(set(labels))
|
||||||
num_classes = len(label_classes)
|
|
||||||
print("Number of clusters: {}".format(num_classes))
|
print("Number of clusters: {}".format(num_classes))
|
||||||
print("Labels classes: {}".format(str(label_classes)))
|
|
||||||
|
|
||||||
# Find biggest class
|
# Find biggest class
|
||||||
biggest_class = None
|
biggest_class = None
|
||||||
|
@ -116,8 +99,8 @@ for i in range(0, num_classes):
|
||||||
biggest_class_length = class_length
|
biggest_class_length = class_length
|
||||||
biggest_class = i
|
biggest_class = i
|
||||||
|
|
||||||
print("Biggest class: {}".format(biggest_class))
|
print("Biggest cluster id number: {}".format(biggest_class))
|
||||||
print("Biggest class length: {}".format(biggest_class_length))
|
print("Number of faces in biggest cluster: {}".format(biggest_class_length))
|
||||||
|
|
||||||
# Find the indices for the biggest class
|
# Find the indices for the biggest class
|
||||||
indices = []
|
indices = []
|
||||||
|
@ -125,17 +108,18 @@ for i, label in enumerate(labels):
|
||||||
if label == biggest_class:
|
if label == biggest_class:
|
||||||
indices.append(i)
|
indices.append(i)
|
||||||
|
|
||||||
print("Biggest class indices: {}".format(str(indices)))
|
print("Indices of images in the biggest cluster: {}".format(str(indices)))
|
||||||
|
|
||||||
# Ensure output directory exists
|
# Ensure output directory exists
|
||||||
if not os.path.isdir(output_folder_path):
|
if not os.path.isdir(output_folder_path):
|
||||||
os.makedirs(output_folder_path)
|
os.makedirs(output_folder_path)
|
||||||
|
|
||||||
# Save the extracted faces
|
# Save the extracted faces
|
||||||
|
print("Saving faces in largest cluster to output folder...")
|
||||||
for i, index in enumerate(indices):
|
for i, index in enumerate(indices):
|
||||||
img, shape = images[index]
|
img, shape = images[index]
|
||||||
file_path = os.path.join(output_folder_path, "face_" + str(i))
|
file_path = os.path.join(output_folder_path, "face_" + str(i))
|
||||||
facerec.save_image_chip(img, shape, file_path)
|
dlib.save_face_chip(img, shape, file_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -39,79 +39,6 @@ public:
|
||||||
cropper->set_max_rotation_degrees(3);
|
cropper->set_max_rotation_degrees(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
boost::python::list cluster(boost::python::list descriptors, float threshold)
|
|
||||||
{
|
|
||||||
boost::python::list clusters;
|
|
||||||
|
|
||||||
size_t num_descriptors = len(descriptors);
|
|
||||||
|
|
||||||
// In particular, one simple thing we can do is face clustering. This next bit of code
|
|
||||||
// creates a graph of connected faces and then uses the Chinese whispers graph clustering
|
|
||||||
// algorithm to identify how many people there are and which faces belong to whom.
|
|
||||||
std::vector<sample_pair> edges;
|
|
||||||
std::vector<unsigned long> labels;
|
|
||||||
for (size_t i = 0; i < num_descriptors; ++i)
|
|
||||||
{
|
|
||||||
for (size_t j = i+1; j < num_descriptors; ++j)
|
|
||||||
{
|
|
||||||
// Faces are connected in the graph if they are close enough. Here we check if
|
|
||||||
// the distance between two face descriptors is less than 0.6, which is the
|
|
||||||
// decision threshold the network was trained to use. Although you can
|
|
||||||
// certainly use any other threshold you find useful.
|
|
||||||
matrix<double,0,1> first_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[i]);
|
|
||||||
matrix<double,0,1> second_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[j]);
|
|
||||||
|
|
||||||
if (length(first_descriptor-second_descriptor) < threshold)
|
|
||||||
edges.push_back(sample_pair(i,j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const auto num_clusters = chinese_whispers(edges, labels);
|
|
||||||
for (size_t i = 0; i < labels.size(); ++i)
|
|
||||||
{
|
|
||||||
clusters.append(labels[i]);
|
|
||||||
}
|
|
||||||
return clusters;
|
|
||||||
}
|
|
||||||
|
|
||||||
void save_image_chip (
|
|
||||||
object img,
|
|
||||||
const full_object_detection& face,
|
|
||||||
const std::string& chip_filename
|
|
||||||
)
|
|
||||||
{
|
|
||||||
std::vector<full_object_detection> faces(1, face);
|
|
||||||
save_image_chips(img, faces, chip_filename);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void save_image_chips (
|
|
||||||
object img,
|
|
||||||
const std::vector<full_object_detection>& faces,
|
|
||||||
const std::string& chip_filename
|
|
||||||
)
|
|
||||||
{
|
|
||||||
int num_faces = faces.size();
|
|
||||||
std::vector<chip_details> dets;
|
|
||||||
for (auto& f : faces)
|
|
||||||
dets.push_back(get_face_chip_details(f, 150, 0.25));
|
|
||||||
dlib::array<matrix<rgb_pixel>> face_chips;
|
|
||||||
extract_image_chips(numpy_rgb_image(img), dets, face_chips);
|
|
||||||
int i=0;
|
|
||||||
for (auto& chip : face_chips) {
|
|
||||||
i++;
|
|
||||||
if(num_faces > 1)
|
|
||||||
{
|
|
||||||
const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
|
|
||||||
save_jpeg(chip, file_name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const std::string& file_name = chip_filename + ".jpg";
|
|
||||||
save_jpeg(chip, file_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
matrix<double,0,1> compute_face_descriptor (
|
matrix<double,0,1> compute_face_descriptor (
|
||||||
object img,
|
object img,
|
||||||
const full_object_detection& face,
|
const full_object_detection& face,
|
||||||
|
@ -215,6 +142,78 @@ private:
|
||||||
anet_type net;
|
anet_type net;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
boost::python::list chinese_whispers_clustering(boost::python::list descriptors, float threshold)
|
||||||
|
{
|
||||||
|
boost::python::list clusters;
|
||||||
|
|
||||||
|
size_t num_descriptors = len(descriptors);
|
||||||
|
|
||||||
|
// This next bit of code creates a graph of connected objects and then uses the Chinese
|
||||||
|
// whispers graph clustering algorithm to identify how many objects there are and which
|
||||||
|
// objects belong to which cluster.
|
||||||
|
std::vector<sample_pair> edges;
|
||||||
|
std::vector<unsigned long> labels;
|
||||||
|
for (size_t i = 0; i < num_descriptors; ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = i+1; j < num_descriptors; ++j)
|
||||||
|
{
|
||||||
|
matrix<double,0,1>& first_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[i]);
|
||||||
|
matrix<double,0,1>& second_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[j]);
|
||||||
|
|
||||||
|
if (length(first_descriptor-second_descriptor) < threshold)
|
||||||
|
edges.push_back(sample_pair(i,j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const auto num_clusters = chinese_whispers(edges, labels);
|
||||||
|
for (size_t i = 0; i < labels.size(); ++i)
|
||||||
|
{
|
||||||
|
clusters.append(labels[i]);
|
||||||
|
}
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
void save_face_chips (
|
||||||
|
object img,
|
||||||
|
const std::vector<full_object_detection>& faces,
|
||||||
|
const std::string& chip_filename
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int num_faces = faces.size();
|
||||||
|
std::vector<chip_details> dets;
|
||||||
|
for (auto& f : faces)
|
||||||
|
dets.push_back(get_face_chip_details(f, 150, 0.25));
|
||||||
|
dlib::array<matrix<rgb_pixel>> face_chips;
|
||||||
|
extract_image_chips(numpy_rgb_image(img), dets, face_chips);
|
||||||
|
int i=0;
|
||||||
|
for (auto& chip : face_chips)
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
if(num_faces > 1)
|
||||||
|
{
|
||||||
|
const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
|
||||||
|
save_jpeg(chip, file_name);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const std::string& file_name = chip_filename + ".jpg";
|
||||||
|
save_jpeg(chip, file_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void save_face_chip (
|
||||||
|
object img,
|
||||||
|
const full_object_detection& face,
|
||||||
|
const std::string& chip_filename
|
||||||
|
)
|
||||||
|
{
|
||||||
|
std::vector<full_object_detection> faces(1, face);
|
||||||
|
save_face_chips(img, faces, chip_filename);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -230,18 +229,19 @@ void bind_face_recognition()
|
||||||
.def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0),
|
.def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0),
|
||||||
"Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. "
|
"Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. "
|
||||||
"If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
|
"If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
|
||||||
)
|
|
||||||
.def("save_image_chip", &face_recognition_model_v1::save_image_chip, (arg("img"),arg("face"),arg("chip_filename")),
|
|
||||||
"Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix"
|
|
||||||
)
|
|
||||||
.def("save_image_chips", &face_recognition_model_v1::save_image_chips, (arg("img"),arg("faces"),arg("chip_filename")),
|
|
||||||
"Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix"
|
|
||||||
)
|
|
||||||
.def("cluster", &face_recognition_model_v1::cluster, (arg("descriptors"), arg("threshold")),
|
|
||||||
"Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using chinese_whispers."
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def("save_face_chip", &save_face_chip, (arg("img"),arg("face"),arg("chip_filename")),
|
||||||
|
"Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix. The face will be rotated upright and scaled to 150x150 pixels."
|
||||||
|
);
|
||||||
|
def("save_face_chips", &save_face_chips, (arg("img"),arg("faces"),arg("chip_filename")),
|
||||||
|
"Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix. The faces will be rotated upright and scaled to 150x150 pixels."
|
||||||
|
);
|
||||||
|
def("chinese_whispers_clustering", &chinese_whispers_clustering, (arg("descriptors"), arg("threshold")),
|
||||||
|
"Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers."
|
||||||
|
);
|
||||||
|
|
||||||
{
|
{
|
||||||
typedef std::vector<full_object_detection> type;
|
typedef std::vector<full_object_detection> type;
|
||||||
class_<type>("full_object_detections", "An array of full_object_detection objects.")
|
class_<type>("full_object_detections", "An array of full_object_detection objects.")
|
||||||
|
|
Loading…
Reference in New Issue