diff --git a/dlib/image_transforms/thresholding.h b/dlib/image_transforms/thresholding.h index e4fb02c4a..02ff01d04 100644 --- a/dlib/image_transforms/thresholding.h +++ b/dlib/image_transforms/thresholding.h @@ -15,6 +15,89 @@ namespace dlib const unsigned char on_pixel = 255; const unsigned char off_pixel = 0; +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + typename pixel_traits::pixel_type>::basic_pixel_type + partition_pixels ( + const image_type& img + ) + { + COMPILE_TIME_ASSERT( pixel_traits::pixel_type>::has_alpha == false ); + COMPILE_TIME_ASSERT( pixel_traits::pixel_type>::is_unsigned == true ); + + + matrix hist; + get_histogram(img,hist); + + // create integral histograms + matrix cum_hist(hist.size()+1), cum_histi(hist.size()+1); + cum_hist(0) = 0; + cum_histi(0) = 0; + for (long i = 0; i < hist.size(); ++i) + { + cum_hist(i+1) = cum_hist(i) + hist(i); + cum_histi(i+1) = cum_histi(i) + hist(i)*(double)i; + } + + auto histsum = [&](long begin, long end) + { + return cum_hist(end)-cum_hist(begin); + }; + auto histsumi = [&](long begin, long end) + { + return cum_histi(end)-cum_histi(begin); + }; + + // If we split the pixels into two groups, those < thresh (the left group) and + // those >= thresh (the right group), what would the sum of absolute deviations of + // each pixel from the mean of its group be? total_abs(thresh) computes that + // value. + auto total_abs = [&](unsigned long thresh) + { + auto left_avg = histsumi(0,thresh); + auto tmp = histsum(0,thresh); + if (tmp != 0) + left_avg /= tmp; + auto right_avg = histsumi(thresh,hist.size()); + tmp = histsum(thresh,hist.size()); + if (tmp != 0) + right_avg /= tmp; + + + const long left_idx = (long)std::ceil(left_avg); + const long right_idx = (long)std::ceil(right_avg); + + double score = 0; + score += left_avg*histsum(0,left_idx) - histsumi(0,left_idx); + score -= left_avg*histsum(left_idx,thresh) - histsumi(left_idx,thresh); + score += right_avg*histsum(thresh,right_idx) - histsumi(thresh,right_idx); + score -= right_avg*histsum(right_idx,hist.size()) - histsumi(right_idx,hist.size()); + return score; + }; + + + unsigned long thresh = 0; + double min_sad = std::numeric_limits::infinity(); + for (long i = 0; i < hist.size(); ++i) + { + double sad = total_abs(i); + // The 1e-13 here is to avoid jumping to a higher threshold because of rounding + // error in total_abs() reporting a very slightly larger value. Really this + // probably doesn't matter for any real application, but it makes the behavior + // of the code more stable which is always nice. + if (sad+1e-13*sad < min_sad) + { + min_sad = sad; + thresh = i; + } + } + + return thresh; + } + // ---------------------------------------------------------------------------------------- template < @@ -69,6 +152,18 @@ namespace dlib threshold_image(img,img,thresh); } +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + void threshold_image ( + image_type& img + ) + { + threshold_image(img,img,partition_pixels(img)); + } + // ---------------------------------------------------------------------------------------- template < diff --git a/dlib/image_transforms/thresholding_abstract.h b/dlib/image_transforms/thresholding_abstract.h index e7c1e8826..e21906908 100644 --- a/dlib/image_transforms/thresholding_abstract.h +++ b/dlib/image_transforms/thresholding_abstract.h @@ -13,6 +13,29 @@ namespace dlib const unsigned char on_pixel = 255; const unsigned char off_pixel = 0; +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + typename pixel_traits::pixel_type>::basic_pixel_type + partition_pixels ( + const image_type& img + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits::pixel_type>::max() <= 65535 + - pixel_traits::pixel_type>::has_alpha == false + - pixel_traits::pixel_type>::is_unsigned == true + ensures + - Finds a threshold value that would be reasonable to use with + threshold_image(img, threshold). It does this by finding the threshold that + partitions the pixels in img into two groups such that the sum of absolute + deviations between each pixel and the mean of its group is minimized. + !*/ + // ---------------------------------------------------------------------------------------- template < @@ -26,8 +49,10 @@ namespace dlib ); /*! requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h - pixel_traits::pixel_type>::grayscale == true - pixel_traits::pixel_type>::has_alpha == false - pixel_traits::pixel_type>::has_alpha == false @@ -53,48 +78,17 @@ namespace dlib - calls threshold_image(img,img,thresh); !*/ -// ---------------------------------------------------------------------------------------- - - template < - typename in_image_type, - typename out_image_type - > - void auto_threshold_image ( - const in_image_type& in_img, - out_image_type& out_img - ); - /*! - requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - pixel_traits::pixel_type>::max() <= 65535 - - pixel_traits::pixel_type>::has_alpha == false - - pixel_traits::pixel_type>::is_unsigned == true - - pixel_traits::pixel_type>::grayscale == true - - pixel_traits::pixel_type>::has_alpha == false - - pixel_traits::pixel_type>::is_unsigned == true - ensures - - #out_img == the thresholded version of in_img (in_img is converted to a grayscale - intensity image if it is color). Pixels in in_img with grayscale values >= thresh - have an output value of on_pixel and all others have a value of off_pixel. - - The thresh value used is determined by performing a k-means clustering - on the input image histogram with a k of 2. The point between the two - means found is used as the thresh value. - - #out_img.nc() == in_img.nc() - - #out_img.nr() == in_img.nr() - !*/ - template < typename image_type > - void auto_threshold_image ( + void threshold_image ( image_type& img ); /*! requires - - it is valid to call auto_threshold_image(img,img); + - it is valid to call threshold_image(img,img,thresh); ensures - - calls auto_threshold_image(img,img); + - calls threshold_image(img,img,partition_pixels(img)); !*/ // ---------------------------------------------------------------------------------------- @@ -111,8 +105,10 @@ namespace dlib ); /*! requires - - in_image_type == is an implementation of array2d/array2d_kernel_abstract.h - - out_image_type == is an implementation of array2d/array2d_kernel_abstract.h + - in_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - out_image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h - pixel_traits::pixel_type>::grayscale == true - pixel_traits::pixel_type>::has_alpha == false - pixel_traits::pixel_type>::has_alpha == false diff --git a/dlib/test/image.cpp b/dlib/test/image.cpp index 01f1410cf..0b0d41a0b 100644 --- a/dlib/test/image.cpp +++ b/dlib/test/image.cpp @@ -1812,6 +1812,86 @@ namespace } +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + typename pixel_traits::pixel_type>::basic_pixel_type + test_partition_pixels ( + const image_type& img + ) + { + matrix hist; + get_histogram(img,hist); + + auto average1 = [&](unsigned long thresh) + { + double accum = 0; + double cnt = 0; + for (unsigned long i = 0; i < thresh; ++i) + { + accum += hist(i)*i; + cnt += hist(i); + } + + if (cnt != 0) + return accum/cnt; + else + return 0.0; + }; + + auto average2 = [&](unsigned long thresh) + { + double accum = 0; + double cnt = 0; + for (long i = thresh; i < hist.size(); ++i) + { + accum += hist(i)*i; + cnt += hist(i); + } + + if (cnt != 0) + return accum/cnt; + else + return 0.0; + }; + + + auto total_abs = [&](unsigned long thresh) + { + auto a = average1(thresh); + auto b = average2(thresh); + + double score = 0; + for (long i = 0; i < hist.size(); ++i) + { + if (i < (long)thresh) + score += std::abs(a-i)*hist(i); + else + score += std::abs(b-i)*hist(i); + } + return score; + }; + + + unsigned long thresh = 0; + double min_sad = total_abs(0); + for (long i = 1; i < hist.size(); ++i) + { + double sad = total_abs(i); + //cout << "TRUTH: i:" << i << " total: "<< total_abs(i) << endl; + if (sad+1e-13*sad < min_sad) + { + //cout << "CHANGE TRUTH: i:" << i << " total: "<< total_abs(i)-min_sad << endl; + min_sad = sad; + thresh = i; + } + } + + return thresh; + } + // ---------------------------------------------------------------------------------------- class image_tester : public tester @@ -1894,6 +1974,20 @@ namespace draw_line(img, point(20,19), point(59,19), 00); DLIB_TEST(sum(matrix_cast(mat(img))) == 0); } + + { + matrix img(4,7); + + dlib::rand rnd; + for (int round = 0; round < 100; ++round) + { + print_spinner(); + for (auto& p : img) + p = rnd.get_random_8bit_number(); + + DLIB_CASSERT(test_partition_pixels(img) == partition_pixels(img)) + } + } } } a;