Added partition_pixels() and made threshold_image() use it to find the default

threshold if none is given.  Also depreciated auto_threshold_image() since
using partition_pixels() to pick the threshold is generally better.
This commit is contained in:
Davis King 2018-05-07 23:11:54 -04:00
parent 940546f878
commit 802d641103
3 changed files with 223 additions and 38 deletions

View File

@ -15,6 +15,89 @@ namespace dlib
const unsigned char on_pixel = 255;
const unsigned char off_pixel = 0;
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type
partition_pixels (
const image_type& img
)
{
COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false );
COMPILE_TIME_ASSERT( pixel_traits<typename image_traits<image_type>::pixel_type>::is_unsigned == true );
matrix<unsigned long,1> hist;
get_histogram(img,hist);
// create integral histograms
matrix<double,1> cum_hist(hist.size()+1), cum_histi(hist.size()+1);
cum_hist(0) = 0;
cum_histi(0) = 0;
for (long i = 0; i < hist.size(); ++i)
{
cum_hist(i+1) = cum_hist(i) + hist(i);
cum_histi(i+1) = cum_histi(i) + hist(i)*(double)i;
}
auto histsum = [&](long begin, long end)
{
return cum_hist(end)-cum_hist(begin);
};
auto histsumi = [&](long begin, long end)
{
return cum_histi(end)-cum_histi(begin);
};
// If we split the pixels into two groups, those < thresh (the left group) and
// those >= thresh (the right group), what would the sum of absolute deviations of
// each pixel from the mean of its group be? total_abs(thresh) computes that
// value.
auto total_abs = [&](unsigned long thresh)
{
auto left_avg = histsumi(0,thresh);
auto tmp = histsum(0,thresh);
if (tmp != 0)
left_avg /= tmp;
auto right_avg = histsumi(thresh,hist.size());
tmp = histsum(thresh,hist.size());
if (tmp != 0)
right_avg /= tmp;
const long left_idx = (long)std::ceil(left_avg);
const long right_idx = (long)std::ceil(right_avg);
double score = 0;
score += left_avg*histsum(0,left_idx) - histsumi(0,left_idx);
score -= left_avg*histsum(left_idx,thresh) - histsumi(left_idx,thresh);
score += right_avg*histsum(thresh,right_idx) - histsumi(thresh,right_idx);
score -= right_avg*histsum(right_idx,hist.size()) - histsumi(right_idx,hist.size());
return score;
};
unsigned long thresh = 0;
double min_sad = std::numeric_limits<double>::infinity();
for (long i = 0; i < hist.size(); ++i)
{
double sad = total_abs(i);
// The 1e-13 here is to avoid jumping to a higher threshold because of rounding
// error in total_abs() reporting a very slightly larger value. Really this
// probably doesn't matter for any real application, but it makes the behavior
// of the code more stable which is always nice.
if (sad+1e-13*sad < min_sad)
{
min_sad = sad;
thresh = i;
}
}
return thresh;
}
// ----------------------------------------------------------------------------------------
template <
@ -69,6 +152,18 @@ namespace dlib
threshold_image(img,img,thresh);
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
void threshold_image (
image_type& img
)
{
threshold_image(img,img,partition_pixels(img));
}
// ----------------------------------------------------------------------------------------
template <

View File

@ -13,6 +13,29 @@ namespace dlib
const unsigned char on_pixel = 255;
const unsigned char off_pixel = 0;
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type
partition_pixels (
const image_type& img
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- pixel_traits<typename image_traits<image_type>::pixel_type>::max() <= 65535
- pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
- pixel_traits<typename image_traits<image_type>::pixel_type>::is_unsigned == true
ensures
- Finds a threshold value that would be reasonable to use with
threshold_image(img, threshold). It does this by finding the threshold that
partitions the pixels in img into two groups such that the sum of absolute
deviations between each pixel and the mean of its group is minimized.
!*/
// ----------------------------------------------------------------------------------------
template <
@ -26,8 +49,10 @@ namespace dlib
);
/*!
requires
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- in_image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- out_image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
- pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false
@ -53,48 +78,17 @@ namespace dlib
- calls threshold_image(img,img,thresh);
!*/
// ----------------------------------------------------------------------------------------
template <
typename in_image_type,
typename out_image_type
>
void auto_threshold_image (
const in_image_type& in_img,
out_image_type& out_img
);
/*!
requires
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- pixel_traits<typename image_traits<in_image_type>::pixel_type>::max() <= 65535
- pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
- pixel_traits<typename image_traits<in_image_type>::pixel_type>::is_unsigned == true
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::is_unsigned == true
ensures
- #out_img == the thresholded version of in_img (in_img is converted to a grayscale
intensity image if it is color). Pixels in in_img with grayscale values >= thresh
have an output value of on_pixel and all others have a value of off_pixel.
- The thresh value used is determined by performing a k-means clustering
on the input image histogram with a k of 2. The point between the two
means found is used as the thresh value.
- #out_img.nc() == in_img.nc()
- #out_img.nr() == in_img.nr()
!*/
template <
typename image_type
>
void auto_threshold_image (
void threshold_image (
image_type& img
);
/*!
requires
- it is valid to call auto_threshold_image(img,img);
- it is valid to call threshold_image(img,img,thresh);
ensures
- calls auto_threshold_image(img,img);
- calls threshold_image(img,img,partition_pixels(img));
!*/
// ----------------------------------------------------------------------------------------
@ -111,8 +105,10 @@ namespace dlib
);
/*!
requires
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- out_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- in_image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- out_image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
- pixel_traits<typename image_traits<in_image_type>::pixel_type>::has_alpha == false
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::has_alpha == false

View File

@ -1812,6 +1812,86 @@ namespace
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type
test_partition_pixels (
const image_type& img
)
{
matrix<unsigned long,1> hist;
get_histogram(img,hist);
auto average1 = [&](unsigned long thresh)
{
double accum = 0;
double cnt = 0;
for (unsigned long i = 0; i < thresh; ++i)
{
accum += hist(i)*i;
cnt += hist(i);
}
if (cnt != 0)
return accum/cnt;
else
return 0.0;
};
auto average2 = [&](unsigned long thresh)
{
double accum = 0;
double cnt = 0;
for (long i = thresh; i < hist.size(); ++i)
{
accum += hist(i)*i;
cnt += hist(i);
}
if (cnt != 0)
return accum/cnt;
else
return 0.0;
};
auto total_abs = [&](unsigned long thresh)
{
auto a = average1(thresh);
auto b = average2(thresh);
double score = 0;
for (long i = 0; i < hist.size(); ++i)
{
if (i < (long)thresh)
score += std::abs(a-i)*hist(i);
else
score += std::abs(b-i)*hist(i);
}
return score;
};
unsigned long thresh = 0;
double min_sad = total_abs(0);
for (long i = 1; i < hist.size(); ++i)
{
double sad = total_abs(i);
//cout << "TRUTH: i:" << i << " total: "<< total_abs(i) << endl;
if (sad+1e-13*sad < min_sad)
{
//cout << "CHANGE TRUTH: i:" << i << " total: "<< total_abs(i)-min_sad << endl;
min_sad = sad;
thresh = i;
}
}
return thresh;
}
// ----------------------------------------------------------------------------------------
class image_tester : public tester
@ -1894,6 +1974,20 @@ namespace
draw_line(img, point(20,19), point(59,19), 00);
DLIB_TEST(sum(matrix_cast<int>(mat(img))) == 0);
}
{
matrix<unsigned char> img(4,7);
dlib::rand rnd;
for (int round = 0; round < 100; ++round)
{
print_spinner();
for (auto& p : img)
p = rnd.get_random_8bit_number();
DLIB_CASSERT(test_partition_pixels(img) == partition_pixels(img))
}
}
}
} a;