2014-02-24 05:56:14 +08:00
|
|
|
#!/usr/bin/python
|
|
|
|
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
|
|
|
#
|
2014-12-11 17:44:50 +08:00
|
|
|
# This example program shows how you can use dlib to make an object
|
|
|
|
# detector for things like faces, pedestrians, and any other semi-rigid
|
|
|
|
# object. In particular, we go though the steps to train the kind of sliding
|
|
|
|
# window object detector first published by Dalal and Triggs in 2005 in the
|
|
|
|
# paper Histograms of Oriented Gradients for Human Detection.
|
2014-02-24 05:56:14 +08:00
|
|
|
#
|
2015-10-27 20:25:43 +08:00
|
|
|
#
|
|
|
|
# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
|
|
|
|
# You can install dlib using the command:
|
|
|
|
# pip install dlib
|
|
|
|
#
|
|
|
|
# Alternatively, if you want to compile dlib yourself then go into the dlib
|
|
|
|
# root folder and run:
|
|
|
|
# python setup.py install
|
|
|
|
# or
|
|
|
|
# python setup.py install --yes USE_AVX_INSTRUCTIONS
|
|
|
|
# if you have a CPU that supports AVX instructions, since this makes some
|
|
|
|
# things run faster.
|
|
|
|
#
|
|
|
|
# Compiling dlib should work on any operating system so long as you have
|
|
|
|
# CMake and boost-python installed. On Ubuntu, this can be done easily by
|
|
|
|
# running the command:
|
2014-12-11 17:44:50 +08:00
|
|
|
# sudo apt-get install libboost-python-dev cmake
|
2015-03-08 03:14:47 +08:00
|
|
|
#
|
|
|
|
# Also note that this example requires scikit-image which can be installed
|
|
|
|
# via the command:
|
2015-10-27 20:50:44 +08:00
|
|
|
# pip install scikit-image
|
2015-03-08 03:14:47 +08:00
|
|
|
# Or downloaded from http://scikit-image.org/download.html.
|
|
|
|
|
2014-12-11 17:44:50 +08:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import glob
|
|
|
|
|
|
|
|
import dlib
|
2014-02-24 05:56:14 +08:00
|
|
|
from skimage import io
|
|
|
|
|
2014-12-11 17:44:50 +08:00
|
|
|
|
2014-02-24 05:56:14 +08:00
|
|
|
# In this example we are going to train a face detector based on the small
|
|
|
|
# faces dataset in the examples/faces directory. This means you need to supply
|
|
|
|
# the path to this faces folder as a command line argument so we will know
|
|
|
|
# where it is.
|
2014-12-11 17:44:50 +08:00
|
|
|
if len(sys.argv) != 2:
|
|
|
|
print(
|
|
|
|
"Give the path to the examples/faces directory as the argument to this "
|
|
|
|
"program. For example, if you are in the python_examples folder then "
|
|
|
|
"execute this program by running:\n"
|
|
|
|
" ./train_object_detector.py ../examples/faces")
|
2014-03-10 01:37:30 +08:00
|
|
|
exit()
|
2014-02-24 05:56:14 +08:00
|
|
|
faces_folder = sys.argv[1]
|
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
|
2014-02-24 07:16:15 +08:00
|
|
|
# Now let's do the training. The train_simple_object_detector() function has a
|
2014-02-24 05:56:14 +08:00
|
|
|
# bunch of options, all of which come with reasonable default values. The next
|
|
|
|
# few lines goes over some of these options.
|
|
|
|
options = dlib.simple_object_detector_training_options()
|
|
|
|
# Since faces are left/right symmetric we can tell the trainer to train a
|
|
|
|
# symmetric detector. This helps it get the most value out of the training
|
|
|
|
# data.
|
|
|
|
options.add_left_right_image_flips = True
|
|
|
|
# The trainer is a kind of support vector machine and therefore has the usual
|
|
|
|
# SVM C parameter. In general, a bigger C encourages it to fit the training
|
|
|
|
# data better but might lead to overfitting. You must find the best C value
|
|
|
|
# empirically by checking how well the trained detector works on a test set of
|
2014-03-03 00:59:31 +08:00
|
|
|
# images you haven't trained on. Don't just leave the value set at 5. Try a
|
2014-02-24 05:56:14 +08:00
|
|
|
# few different C values and see what works best for your data.
|
2014-12-11 17:44:50 +08:00
|
|
|
options.C = 5
|
2014-02-24 05:56:14 +08:00
|
|
|
# Tell the code how many CPU cores your computer has for the fastest training.
|
|
|
|
options.num_threads = 4
|
2014-12-11 17:44:50 +08:00
|
|
|
options.be_verbose = True
|
2014-02-24 05:56:14 +08:00
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
|
|
|
|
training_xml_path = os.path.join(faces_folder, "training.xml")
|
|
|
|
testing_xml_path = os.path.join(faces_folder, "testing.xml")
|
2014-02-24 05:56:14 +08:00
|
|
|
# This function does the actual training. It will save the final detector to
|
|
|
|
# detector.svm. The input is an XML file that lists the images in the training
|
|
|
|
# dataset and also contains the positions of the face boxes. To create your
|
|
|
|
# own XML files you can use the imglab tool which can be found in the
|
|
|
|
# tools/imglab folder. It is a simple graphical tool for labeling objects in
|
|
|
|
# images with boxes. To see how to use it read the tools/imglab/README.txt
|
|
|
|
# file. But for this example, we just use the training.xml file included with
|
|
|
|
# dlib.
|
2014-12-11 17:44:50 +08:00
|
|
|
dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
|
2014-02-24 05:56:14 +08:00
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
|
|
|
|
|
2014-02-24 05:56:14 +08:00
|
|
|
# Now that we have a face detector we can test it. The first statement tests
|
2014-12-11 01:32:32 +08:00
|
|
|
# it on the training data. It will print(the precision, recall, and then)
|
2014-02-24 05:56:14 +08:00
|
|
|
# average precision.
|
2014-12-11 17:44:50 +08:00
|
|
|
print("") # Print blank line to create gap from previous output
|
|
|
|
print("Training accuracy: {}".format(
|
2014-12-13 00:22:57 +08:00
|
|
|
dlib.test_simple_object_detector(training_xml_path, "detector.svm")))
|
2014-02-24 05:56:14 +08:00
|
|
|
# However, to get an idea if it really worked without overfitting we need to
|
|
|
|
# run it on images it wasn't trained on. The next line does this. Happily, we
|
|
|
|
# see that the object detector works perfectly on the testing images.
|
2014-12-11 17:44:50 +08:00
|
|
|
print("Testing accuracy: {}".format(
|
2014-12-13 00:22:57 +08:00
|
|
|
dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
|
2014-02-24 05:56:14 +08:00
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-02-24 05:56:14 +08:00
|
|
|
# Now let's use the detector as you would in a normal application. First we
|
|
|
|
# will load it from disk.
|
|
|
|
detector = dlib.simple_object_detector("detector.svm")
|
|
|
|
|
|
|
|
# We can look at the HOG filter we learned. It should look like a face. Neat!
|
|
|
|
win_det = dlib.image_window()
|
|
|
|
win_det.set_image(detector)
|
|
|
|
|
2014-02-24 07:16:15 +08:00
|
|
|
# Now let's run the detector over the images in the faces folder and display the
|
2014-02-24 05:56:14 +08:00
|
|
|
# results.
|
2014-12-11 17:44:50 +08:00
|
|
|
print("Showing detections on the images in the faces folder...")
|
2014-02-24 05:56:14 +08:00
|
|
|
win = dlib.image_window()
|
2014-12-11 23:01:08 +08:00
|
|
|
for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
|
2014-12-11 17:44:50 +08:00
|
|
|
print("Processing file: {}".format(f))
|
2014-02-24 05:56:14 +08:00
|
|
|
img = io.imread(f)
|
|
|
|
dets = detector(img)
|
2014-12-11 17:44:50 +08:00
|
|
|
print("Number of faces detected: {}".format(len(dets)))
|
|
|
|
for k, d in enumerate(dets):
|
|
|
|
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
|
|
|
|
k, d.left(), d.top(), d.right(), d.bottom()))
|
2014-02-24 05:56:14 +08:00
|
|
|
|
|
|
|
win.clear_overlay()
|
|
|
|
win.set_image(img)
|
|
|
|
win.add_overlay(dets)
|
2015-03-23 06:45:08 +08:00
|
|
|
dlib.hit_enter_to_continue()
|
2014-02-24 05:56:14 +08:00
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-08-13 07:47:41 +08:00
|
|
|
# Finally, note that you don't have to use the XML based input to
|
|
|
|
# train_simple_object_detector(). If you have already loaded your training
|
|
|
|
# images and bounding boxes for the objects then you can call it as shown
|
|
|
|
# below.
|
|
|
|
|
|
|
|
# You just need to put your images into a list.
|
2014-12-11 17:44:50 +08:00
|
|
|
images = [io.imread(faces_folder + '/2008_002506.jpg'),
|
|
|
|
io.imread(faces_folder + '/2009_004587.jpg')]
|
2014-08-13 07:47:41 +08:00
|
|
|
# Then for each image you make a list of rectangles which give the pixel
|
|
|
|
# locations of the edges of the boxes.
|
2014-12-11 17:44:50 +08:00
|
|
|
boxes_img1 = ([dlib.rectangle(left=329, top=78, right=437, bottom=186),
|
|
|
|
dlib.rectangle(left=224, top=95, right=314, bottom=185),
|
|
|
|
dlib.rectangle(left=125, top=65, right=214, bottom=155)])
|
|
|
|
boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
|
|
|
|
dlib.rectangle(left=266, top=280, right=328, bottom=342)])
|
2014-08-13 07:47:41 +08:00
|
|
|
# And then you aggregate those lists of boxes into one big list and then call
|
|
|
|
# train_simple_object_detector().
|
|
|
|
boxes = [boxes_img1, boxes_img2]
|
|
|
|
|
2014-12-11 23:01:08 +08:00
|
|
|
detector2 = dlib.train_simple_object_detector(images, boxes, options)
|
2014-12-28 04:30:56 +08:00
|
|
|
# We could save this detector to disk by uncommenting the following.
|
2014-12-11 23:01:08 +08:00
|
|
|
#detector2.save('detector2.svm')
|
2014-08-13 07:47:41 +08:00
|
|
|
|
2014-12-28 04:30:56 +08:00
|
|
|
# Now let's look at its HOG filter!
|
2014-08-13 07:47:41 +08:00
|
|
|
win_det.set_image(detector2)
|
2015-03-23 06:45:08 +08:00
|
|
|
dlib.hit_enter_to_continue()
|
2014-08-13 07:47:41 +08:00
|
|
|
|
2014-12-11 01:42:05 +08:00
|
|
|
# Note that you don't have to use the XML based input to
|
|
|
|
# test_simple_object_detector(). If you have already loaded your training
|
|
|
|
# images and bounding boxes for the objects then you can call it as shown
|
|
|
|
# below.
|
2014-12-11 23:01:08 +08:00
|
|
|
print("\nTraining accuracy: {}".format(
|
|
|
|
dlib.test_simple_object_detector(images, boxes, detector2)))
|