diff --git a/dlib/image_processing/scan_image.h b/dlib/image_processing/scan_image.h index e1f3b19f8..fde2f74e4 100644 --- a/dlib/image_processing/scan_image.h +++ b/dlib/image_processing/scan_image.h @@ -22,20 +22,20 @@ namespace dlib inline rectangle bounding_box_of_rects ( const std::vector >& rects, - const point& origin + const point& position ) /*! ensures - returns the smallest rectangle that contains all the rectangles in rects. That is, returns the rectangle that - contains translate_rect(rects[i].second,origin) for all valid i. + contains translate_rect(rects[i].second,position) for all valid i. !*/ { rectangle rect; for (unsigned long i = 0; i < rects.size(); ++i) { - rect += translate_rect(rects[i].second,origin); + rect += translate_rect(rects[i].second,position); } return rect; @@ -72,7 +72,7 @@ namespace dlib double sum_of_rects_in_images ( const image_array_type& images, const std::vector >& rects, - const point& origin + const point& position ) { DLIB_ASSERT(all_images_same_size(images), @@ -101,13 +101,89 @@ namespace dlib for (unsigned long i = 0; i < rects.size(); ++i) { const typename image_array_type::type& img = images[rects[i].first]; - const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,origin)); + const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position)); temp += sum(matrix_cast(subm(array_to_matrix(img), rect))); } return static_cast(temp); } +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + double sum_of_rects_in_images_movable_parts ( + const image_array_type& images, + const rectangle& window, + const std::vector >& fixed_rects, + const std::vector >& movable_rects, + const point& position + ) + { + DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0), + "\t double sum_of_rects_in_images_movable_parts()" + << "\n\t Invalid arguments given to this function." + << "\n\t all_images_same_size(images): " << all_images_same_size(images) + << "\n\t center(window): " << center(window) + ); +#ifdef ENABLE_ASSERTS + for (unsigned long i = 0; i < fixed_rects.size(); ++i) + { + DLIB_ASSERT(fixed_rects[i].first < images.size(), + "\t double sum_of_rects_in_images_movable_parts()" + << "\n\t fixed_rects["<::type ptype; + + ptype temp = 0; + + // compute TOTAL_FIXED part + for (unsigned long i = 0; i < fixed_rects.size(); ++i) + { + const typename image_array_type::type& img = images[fixed_rects[i].first]; + const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position)); + temp += sum(matrix_cast(subm(array_to_matrix(img), rect))); + } + + if (images.size() > 0) + { + // compute TOTAL_MOVABLE part + array2d tempimg(images[0].nr(), images[0].nc()); + for (unsigned long i = 0; i < movable_rects.size(); ++i) + { + const typename image_array_type::type& img = images[movable_rects[i].first]; + + assign_all_pixels(tempimg, 0); + sum_filter(img, tempimg, movable_rects[i].second); + + const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position)); + if (rect.is_empty() == false) + temp += std::max(0,max(matrix_cast(subm(array_to_matrix(tempimg), rect)))); + } + } + + return static_cast(temp); + } + // ---------------------------------------------------------------------------------------- template < @@ -188,6 +264,120 @@ namespace dlib } } +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + void scan_image_movable_parts ( + std::vector >& dets, + const image_array_type& images, + const rectangle& window, + const std::vector >& fixed_rects, + const std::vector >& movable_rects, + const double thresh, + const unsigned long max_dets + ) + { + DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) && + center(window) == point(0,0) && window.area() > 0, + "\t void scan_image_movable_parts()" + << "\n\t Invalid arguments given to this function." + << "\n\t all_images_same_size(images): " << all_images_same_size(images) + << "\n\t center(window): " << center(window) + << "\n\t window.area(): " << window.area() + << "\n\t images.size(): " << images.size() + ); +#ifdef ENABLE_ASSERTS + for (unsigned long i = 0; i < fixed_rects.size(); ++i) + { + DLIB_ASSERT(fixed_rects[i].first < images.size(), + "\t void scan_image_movable_parts()" + << "\n\t Invalid arguments given to this function." + << "\n\t fixed_rects["< 0, + "\t void scan_image_movable_parts()" + << "\n\t Invalid arguments given to this function." + << "\n\t movable_rects["<::type ptype; + + array2d accum(images[0].nr(), images[0].nc()); + assign_all_pixels(accum, 0); + + for (unsigned long i = 0; i < fixed_rects.size(); ++i) + sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second); + + array2d temp(accum.nr(), accum.nc()); + for (unsigned long i = 0; i < movable_rects.size(); ++i) + { + const rectangle rect = movable_rects[i].second; + assign_all_pixels(temp, 0); + sum_filter(images[movable_rects[i].first], temp, rect); + max_filter(temp, accum, window.width(), window.height(), 0); + } + + // TODO, make this block its own function and reuse it in scan_image(). + unsigned long count = 0; + dlib::rand rnd; + for (long r = 0; r < accum.nr(); ++r) + { + for (long c = 0; c < accum.nc(); ++c) + { + const ptype cur_sum = accum[r][c]; + if (cur_sum >= thresh) + { + ++count; + + if (dets.size() < max_dets) + { + dets.push_back(std::make_pair(cur_sum, point(c,r))); + } + else + { + // The idea here is to cause us to randomly sample possible detection + // locations throughout the image rather than just stopping the detection + // procedure once we hit the max_dets limit. So this method will result + // in a random subsample of all the detections >= thresh being in dets + // at the end of scan_image_movable_parts(). + const unsigned long random_index = rnd.get_random_32bit_number()%count; + if (random_index < dets.size()) + { + dets[random_index] = std::make_pair(cur_sum, point(c,r)); + } + } + } + } + } + } + // ---------------------------------------------------------------------------------------- } diff --git a/dlib/image_processing/scan_image_abstract.h b/dlib/image_processing/scan_image_abstract.h index 91107ded6..055a0eb96 100644 --- a/dlib/image_processing/scan_image_abstract.h +++ b/dlib/image_processing/scan_image_abstract.h @@ -38,7 +38,7 @@ namespace dlib double sum_of_rects_in_images ( const image_array_type& images, const std::vector >& rects, - const point& origin + const point& position ); /*! requires @@ -50,11 +50,52 @@ namespace dlib (i.e. all the rectangles must reference valid elements of images) ensures - returns the sum of the pixels inside the given rectangles. To be precise, - let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, origin) + let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position) from the image images[rects[i].first]. Then this function returns the sum of RECT_SUM[i] for all the valid values of i. !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + double sum_of_rects_in_images_movable_parts ( + const image_array_type& images, + const rectangle& window, + const std::vector >& fixed_rects, + const std::vector >& movable_rects, + const point& position + ); + /*! + requires + - image_array_type == an implementation of array/array_kernel_abstract.h + - image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h + - image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel) + - all_images_same_size(images) == true + - center(window) == point(0,0) + - for all valid i: + - fixed_rects[i].first < images.size() + (i.e. all the rectangles must reference valid elements of images) + - for all valid i: + - movable_rects[i].first < images.size() + (i.e. all the rectangles must reference valid elements of images) + - center(movable_rects[i].second) == point(0,0) + ensures + - returns the sum of the pixels inside fixed_rects as well as the sum of the pixels + inside movable_rects when these latter rectangles are placed at their highest + scoring locations inside the given window. To be precise: + - let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x) + from the image images[r.first]. + - let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing + over all the X such that translate_rect(window,position).contains(X) == true. + + - let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position) + - let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0) + + Then this function returns TOTAL_FIXED + TOTAL_MOVABLE. + !*/ + // ---------------------------------------------------------------------------------------- template < @@ -90,6 +131,54 @@ namespace dlib test. !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type + > + void scan_image_movable_parts ( + std::vector >& dets, + const image_array_type& images, + const rectangle& window, + const std::vector >& fixed_rects, + const std::vector >& movable_rects, + const double thresh, + const unsigned long max_dets + ); + /*! + requires + - image_array_type == an implementation of array/array_kernel_abstract.h + - image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h + - image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel) + - images.size() > 0 + - all_images_same_size(images) == true + - center(window) == point(0,0) + - window.area() > 0 + - for all valid i: + - fixed_rects[i].first < images.size() + (i.e. all the rectangles must reference valid elements of images) + - for all valid i: + - movable_rects[i].first < images.size() + (i.e. all the rectangles must reference valid elements of images) + - center(movable_rects[i].second) == point(0,0) + - movable_rects[i].second.area() > 0 + ensures + - Scans the given window over the images and reports the locations with a score bigger + than thresh. + - Specifically, we have: + - #dets.size() <= max_dets + (note that dets is cleared before new detections are added by scan_image_movable_parts()) + - for all valid i: + - #dets[i].first == sum_of_rects_in_images_movable_parts(images, + window, + fixed_rects, + movable_rects, + #dets[i].second) >= thresh + - if (there are more than max_dets locations that pass the above threshold test) then + - #dets == a random subsample of all the locations which passed the threshold + test. + !*/ + // ---------------------------------------------------------------------------------------- }