From bbf3d987b2d172add17cd366f09cfa9400693693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Ram=C3=A9?= Date: Sun, 27 Aug 2017 23:40:31 +0100 Subject: [PATCH] improvements to cnn face detection python interface (#780) * improvements to cnn face detection interface * mmod rectangle object renaming. possibility to set batch size in multi image detection. Added check to make sure images are all the same size. --- dlib/image_processing/full_object_detection.h | 6 ++ python_examples/cnn_face_detector.py | 27 ++++-- tools/python/src/cnn_face_detector.cpp | 97 ++++++++++++++++--- 3 files changed, 112 insertions(+), 18 deletions(-) diff --git a/dlib/image_processing/full_object_detection.h b/dlib/image_processing/full_object_detection.h index 7c72b9a57..ddf195845 100644 --- a/dlib/image_processing/full_object_detection.h +++ b/dlib/image_processing/full_object_detection.h @@ -140,6 +140,12 @@ namespace dlib bool ignore = false; operator rectangle() const { return rect; } + bool operator == (const mmod_rect& rhs) const + { + return rect == rhs.rect + && detection_confidence == rhs.detection_confidence + && ignore == rhs.ignore; + } }; inline mmod_rect ignored_mmod_rect(const rectangle& r) diff --git a/python_examples/cnn_face_detector.py b/python_examples/cnn_face_detector.py index 1cf74f3de..477cafdae 100644 --- a/python_examples/cnn_face_detector.py +++ b/python_examples/cnn_face_detector.py @@ -39,7 +39,6 @@ # Or downloaded from http://scikit-image.org/download.html. import sys - import dlib from skimage import io @@ -51,7 +50,7 @@ if len(sys.argv) < 3: " http://dlib.net/files/mmod_human_face_detector.dat.bz2") exit() -cnn_face_detection_model = dlib.cnn_face_detection_model_v1(sys.argv[1]) +cnn_face_detector = dlib.cnn_face_detection_model_v1(sys.argv[1]) win = dlib.image_window() for f in sys.argv[2:]: @@ -60,13 +59,27 @@ for f in sys.argv[2:]: # The 1 in the second argument indicates that we should upsample the image # 1 time. This will make everything bigger and allow us to detect more # faces. - dets = cnn_face_detection_model.cnn_face_detector(img, 1) + dets = cnn_face_detector(img, 1) + ''' + This detector returns a mmod_rectangles object. This object contains a list of mmod_rectangle objects. + These objects can be accessed by simply iterating over the mmod_rectangles object + The mmod_rectangle object has two member variables, a dlib.rectangle object, and a confidence score. + + It is also possible to pass a list of images to the detector. + - like this: dets = cnn_face_detector([image list], upsample_num, batch_size = 128) + + In this case it will return a mmod_rectangless object. + This object behaves just like a list of lists and can be iterated over. + ''' print("Number of faces detected: {}".format(len(dets))) for i, d in enumerate(dets): - print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( - i, d.left(), d.top(), d.right(), d.bottom())) + print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format( + i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence)) + + rects = dlib.rectangles() + rects.extend([d.rect for d in dets]) win.clear_overlay() win.set_image(img) - win.add_overlay(dets) - dlib.hit_enter_to_continue() + win.add_overlay(rects) + dlib.hit_enter_to_continue() \ No newline at end of file diff --git a/tools/python/src/cnn_face_detector.cpp b/tools/python/src/cnn_face_detector.cpp index 691f345ce..686d3c578 100644 --- a/tools/python/src/cnn_face_detector.cpp +++ b/tools/python/src/cnn_face_detector.cpp @@ -2,10 +2,7 @@ // License: Boost Software License See LICENSE.txt for the full license. #include -#include #include -#include -#include #include #include #include "indexing.h" @@ -14,9 +11,6 @@ using namespace dlib; using namespace std; using namespace boost::python; -typedef matrix cv; - - class cnn_face_detection_model_v1 { @@ -27,13 +21,13 @@ public: deserialize(model_filename) >> net; } - std::vector cnn_face_detector ( + std::vector detect ( object pyimage, const int upsample_num_times ) { pyramid_down<2> pyr; - std::vector rects; + std::vector rects; // Copy the data into dlib based objects matrix image; @@ -59,12 +53,69 @@ public: // if the image was upscaled. for (auto&& d : dets) { d.rect = pyr.rect_down(d.rect, upsample_num_times); - rects.push_back(d.rect); + rects.push_back(d); } return rects; } + std::vector > detect_mult ( + boost::python::list& imgs, + const int upsample_num_times, + const int batch_size = 128 + ) + { + pyramid_down<2> pyr; + std::vector > dimgs; + dimgs.reserve(len(imgs)); + + for(int i = 0; i < len(imgs); i++) + { + // Copy the data into dlib based objects + matrix image; + object tmp = boost::python::extract(imgs[i]); + if (is_gray_python_image(tmp)) + assign_image(image, numpy_gray_image(tmp)); + else if (is_rgb_python_image(tmp)) + assign_image(image, numpy_rgb_image(tmp)); + else + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + + for(int i = 0; i < upsample_num_times; i++) + { + pyramid_up(image); + } + dimgs.push_back(image); + } + + for(int i = 1; i < dimgs.size(); i++) + { + if + ( + dimgs[i - 1].nc() != dimgs[i].nc() || + dimgs[i - 1].nr() != dimgs[i].nr() + ) + throw dlib::error("Images in list must all have the same dimensions."); + + } + + auto dets = net(dimgs, batch_size); + std::vector > all_rects; + + for(auto&& im_dets : dets) + { + std::vector rects; + rects.reserve(im_dets.size()); + for (auto&& d : im_dets) { + d.rect = pyr.rect_down(d.rect, upsample_num_times); + rects.push_back(d); + } + all_rects.push_back(rects); + } + + return all_rects; + } + private: template using con5d = con; @@ -78,7 +129,6 @@ private: net_type net; }; - // ---------------------------------------------------------------------------------------- void bind_cnn_face_detection() @@ -86,10 +136,35 @@ void bind_cnn_face_detection() using boost::python::arg; { class_("cnn_face_detection_model_v1", "This object detects human faces in an image. The constructor loads the face detection model from a file. You can download a pre-trained model from http://dlib.net/files/mmod_human_face_detector.dat.bz2.", init()) - .def("cnn_face_detector", &cnn_face_detection_model_v1::cnn_face_detector, (arg("img"), arg("upsample_num_times")=0), + .def( + "__call__", + &cnn_face_detection_model_v1::detect, + (arg("img"), arg("upsample_num_times")=0), "Find faces in an image using a deep learning model.\n\ - Upsamples the image upsample_num_times before running the face \n\ detector." + ) + .def( + "__call__", + &cnn_face_detection_model_v1::detect_mult, + (arg("imgs"), arg("upsample_num_times")=0, arg("batch_size")=128), + "takes a list of images as input returning a 2d list of mmod rectangles" ); } + { + typedef mmod_rect type; + class_("mmod_rectangle", "Wrapper around a rectangle object and a detection confidence score.") + .def_readwrite("rect", &type::rect) + .def_readwrite("confidence", &type::detection_confidence); + } + { + typedef std::vector type; + class_("mmod_rectangles", "An array of mmod rectangle objects.") + .def(vector_indexing_suite()); + } + { + typedef std::vector > type; + class_("mmod_rectangless", "A 2D array of mmod rectangle objects.") + .def(vector_indexing_suite()); + } }