Made extract_fhog_features() faster by using simd instructions. Also added an

option to zero pad the borders of the output to it's easier to filter.
This commit is contained in:
Davis King 2013-11-06 22:39:45 -05:00
parent 2895d425a1
commit cbc79bcfb0
2 changed files with 455 additions and 94 deletions

View File

@ -11,6 +11,8 @@
#include "assign_image.h"
#include "draw.h"
#include "interpolation.h"
#include "../simd/simd4i.h"
#include "../simd/simd4f.h"
namespace dlib
{
@ -57,6 +59,89 @@ namespace dlib
}
}
template <typename image_type>
inline typename dlib::enable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
const int r,
const int c,
const image_type& img,
simd4f& grad_x,
simd4f& grad_y,
simd4f& len
)
{
simd4i rleft((int)img[r][c-1].red,
(int)img[r][c].red,
(int)img[r][c+1].red,
(int)img[r][c+2].red);
simd4i rright((int)img[r][c+1].red,
(int)img[r][c+2].red,
(int)img[r][c+3].red,
(int)img[r][c+4].red);
simd4i rtop((int)img[r-1][c].red,
(int)img[r-1][c+1].red,
(int)img[r-1][c+2].red,
(int)img[r-1][c+3].red);
simd4i rbottom((int)img[r+1][c].red,
(int)img[r+1][c+1].red,
(int)img[r+1][c+2].red,
(int)img[r+1][c+3].red);
simd4i gleft((int)img[r][c-1].green,
(int)img[r][c].green,
(int)img[r][c+1].green,
(int)img[r][c+2].green);
simd4i gright((int)img[r][c+1].green,
(int)img[r][c+2].green,
(int)img[r][c+3].green,
(int)img[r][c+4].green);
simd4i gtop((int)img[r-1][c].green,
(int)img[r-1][c+1].green,
(int)img[r-1][c+2].green,
(int)img[r-1][c+3].green);
simd4i gbottom((int)img[r+1][c].green,
(int)img[r+1][c+1].green,
(int)img[r+1][c+2].green,
(int)img[r+1][c+3].green);
simd4i bleft((int)img[r][c-1].blue,
(int)img[r][c].blue,
(int)img[r][c+1].blue,
(int)img[r][c+2].blue);
simd4i bright((int)img[r][c+1].blue,
(int)img[r][c+2].blue,
(int)img[r][c+3].blue,
(int)img[r][c+4].blue);
simd4i btop((int)img[r-1][c].blue,
(int)img[r-1][c+1].blue,
(int)img[r-1][c+2].blue,
(int)img[r-1][c+3].blue);
simd4i bbottom((int)img[r+1][c].blue,
(int)img[r+1][c+1].blue,
(int)img[r+1][c+2].blue,
(int)img[r+1][c+3].blue);
simd4i grad_x_red = rright-rleft;
simd4i grad_y_red = rbottom-rtop;
simd4i grad_x_green = gright-gleft;
simd4i grad_y_green = gbottom-gtop;
simd4i grad_x_blue = bright-bleft;
simd4i grad_y_blue = bbottom-btop;
simd4i rlen = grad_x_red*grad_x_red + grad_y_red*grad_y_red;
simd4i glen = grad_x_green*grad_x_green + grad_y_green*grad_y_green;
simd4i blen = grad_x_blue*grad_x_blue + grad_y_blue*grad_y_blue;
simd4i cmp = rlen>glen;
simd4i tgrad_x = select(cmp,grad_x_red,grad_x_green);
simd4i tgrad_y = select(cmp,grad_y_red,grad_y_green);
simd4i tlen = select(cmp,rlen,glen);
cmp = tlen>blen;
grad_x = select(cmp,tgrad_x,grad_x_blue);
grad_y = select(cmp,tgrad_y,grad_y_blue);
len = select(cmp,tlen,blen);
}
// ------------------------------------------------------------------------------------
template <typename image_type>
@ -73,10 +158,44 @@ namespace dlib
len = length_squared(grad);
}
template <typename image_type>
inline typename dlib::disable_if_c<pixel_traits<typename image_type::type>::rgb>::type get_gradient (
int r,
int c,
const image_type& img,
simd4f& grad_x,
simd4f& grad_y,
simd4f& len
)
{
simd4i left((int)get_pixel_intensity(img[r][c-1]),
(int)get_pixel_intensity(img[r][c]),
(int)get_pixel_intensity(img[r][c+1]),
(int)get_pixel_intensity(img[r][c+2]));
simd4i right((int)get_pixel_intensity(img[r][c+1]),
(int)get_pixel_intensity(img[r][c+2]),
(int)get_pixel_intensity(img[r][c+3]),
(int)get_pixel_intensity(img[r][c+4]));
simd4i top((int)get_pixel_intensity(img[r-1][c]),
(int)get_pixel_intensity(img[r-1][c+1]),
(int)get_pixel_intensity(img[r-1][c+2]),
(int)get_pixel_intensity(img[r-1][c+3]));
simd4i bottom((int)get_pixel_intensity(img[r+1][c]),
(int)get_pixel_intensity(img[r+1][c+1]),
(int)get_pixel_intensity(img[r+1][c+2]),
(int)get_pixel_intensity(img[r+1][c+3]));
grad_x = right-left;
grad_y = bottom-top;
len = (grad_x*grad_x + grad_y*grad_y);
}
// ------------------------------------------------------------------------------------
template <typename T, typename mm1, typename mm2>
void set_hog (
inline void set_hog (
dlib::array<array2d<T,mm1>,mm2>& hog,
int o,
int x,
@ -91,21 +210,29 @@ namespace dlib
void init_hog (
dlib::array<array2d<T,mm1>,mm2>& hog,
int hog_nr,
int hog_nc
int hog_nc,
int filter_rows_padding,
int filter_cols_padding
)
{
const int num_hog_bands = 27+4;
hog.resize(num_hog_bands);
for (int i = 0; i < num_hog_bands; ++i)
{
hog[i].set_size(hog_nr, hog_nc);
hog[i].set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
rectangle rect = get_rect(hog[i]);
rect.top() += (filter_rows_padding-1)/2;
rect.left() += (filter_cols_padding-1)/2;
rect.right() -= filter_cols_padding/2;
rect.bottom() -= filter_rows_padding/2;
zero_border_pixels(hog[i],rect);
}
}
// ------------------------------------------------------------------------------------
template <typename T, typename mm>
void set_hog (
inline void set_hog (
array2d<matrix<T,31,1>,mm>& hog,
int o,
int x,
@ -120,10 +247,25 @@ namespace dlib
void init_hog (
array2d<matrix<T,31,1>,mm>& hog,
int hog_nr,
int hog_nc
int hog_nc,
int filter_rows_padding,
int filter_cols_padding
)
{
hog.set_size(hog_nr, hog_nc);
hog.set_size(hog_nr+filter_rows_padding-1, hog_nc+filter_cols_padding-1);
// now zero out the border region
rectangle rect = get_rect(hog);
rect.top() += (filter_rows_padding-1)/2;
rect.left() += (filter_cols_padding-1)/2;
rect.right() -= filter_cols_padding/2;
rect.bottom() -= filter_rows_padding/2;
border_enumerator be(get_rect(hog),rect);
while (be.move_next())
{
const point p = be.element();
set_all_elements(hog[p.y()][p.x()], 0);
}
}
// ------------------------------------------------------------------------------------
@ -135,9 +277,22 @@ namespace dlib
void impl_extract_fhog_features(
const image_type& img,
out_type& hog,
int cell_size
int cell_size,
int filter_rows_padding,
int filter_cols_padding
)
{
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t void extract_fhog_features()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
/*
This function implements the HOG feature extraction method described in
the paper:
@ -199,7 +354,11 @@ namespace dlib
return;
}
array2d<matrix<float,18,1> > hist(cells_nr, cells_nc);
// We give hist extra padding around the edges (1 cell all the way around the
// edge) so we can avoid needing to do boundary checks when indexing into it
// later on. So some statements assign to the boundary but those values are
// never used.
array2d<matrix<float,18,1> > hist(cells_nr+2, cells_nc+2);
for (long r = 0; r < hist.nr(); ++r)
{
for (long c = 0; c < hist.nc(); ++c)
@ -219,23 +378,98 @@ namespace dlib
hog.clear();
return;
}
init_hog(hog, hog_nr, hog_nc);
const int padding_rows_offset = (filter_rows_padding-1)/2;
const int padding_cols_offset = (filter_cols_padding-1)/2;
init_hog(hog, hog_nr, hog_nc, filter_rows_padding, filter_cols_padding);
const int visible_nr = cells_nr*cell_size;
const int visible_nc = cells_nc*cell_size;
const int visible_nr = std::min((long)cells_nr*cell_size,img.nr())-1;
const int visible_nc = std::min((long)cells_nc*cell_size,img.nc())-1;
// First populate the gradient histograms
for (int y = 1; y < visible_nr-1; y++)
for (int y = 1; y < visible_nr; y++)
{
const int r = std::min<int>(y, img.nr()-2);
for (int x = 1; x < visible_nc-1; x++)
const double yp = ((double)y+0.5)/(double)cell_size - 0.5;
const int iyp = (int)std::floor(yp);
const double vy0 = yp-iyp;
const double vy1 = 1.0-vy0;
int x;
for (x = 1; x < visible_nc-3; x+=4)
{
const int c = std::min<int>(x, img.nc()-2);
simd4f xx(x,x+1,x+2,x+3);
// v will be the length of the gradient vectors.
simd4f grad_x, grad_y, v;
get_gradient(y,x,img,grad_x,grad_y,v);
// We will use bilinear interpolation to add into the histogram bins.
// So first we precompute the values needed to determine how much each
// pixel votes into each bin.
simd4f xp = (xx+0.5)/(float)cell_size - 0.5;
simd4f ixp = floor(xp);
simd4f vx0 = xp-ixp;
simd4f vx1 = 1.0f-vx0;
v = sqrt(v);
// Now snap the gradient to one of 18 orientations
simd4f best_dot = 0;
simd4f best_o = 0;
for (int o = 0; o < 9; o++)
{
simd4f dot = grad_x*directions[o](0) + grad_y*directions[o](1);
simd4f_bool cmp = dot>best_dot;
best_dot = select(cmp,dot,best_dot);
dot *= -1;
best_o = select(cmp,o,best_o);
cmp = dot>best_dot;
best_dot = select(cmp,dot,best_dot);
best_o = select(cmp,o+9,best_o);
}
// Add the gradient magnitude, v, to 4 histograms around pixel using
// bilinear interpolation.
vx1 *= v;
vx0 *= v;
// The amounts for each bin
simd4f v11 = vy1*vx1;
simd4f v01 = vy0*vx1;
simd4f v10 = vy1*vx0;
simd4f v00 = vy0*vx0;
float _best_o[4]; best_o.store(_best_o);
float _ixp[4]; ixp.store(_ixp);
float _v11[4]; v11.store(_v11);
float _v01[4]; v01.store(_v01);
float _v10[4]; v10.store(_v10);
float _v00[4]; v00.store(_v00);
hist[iyp+1] [_ixp[0]+1](_best_o[0]) += _v11[0];
hist[iyp+1+1][_ixp[0]+1](_best_o[0]) += _v01[0];
hist[iyp+1] [_ixp[0]+2](_best_o[0]) += _v10[0];
hist[iyp+1+1][_ixp[0]+2](_best_o[0]) += _v00[0];
hist[iyp+1] [_ixp[1]+1](_best_o[1]) += _v11[1];
hist[iyp+1+1][_ixp[1]+1](_best_o[1]) += _v01[1];
hist[iyp+1] [_ixp[1]+2](_best_o[1]) += _v10[1];
hist[iyp+1+1][_ixp[1]+2](_best_o[1]) += _v00[1];
hist[iyp+1] [_ixp[2]+1](_best_o[2]) += _v11[2];
hist[iyp+1+1][_ixp[2]+1](_best_o[2]) += _v01[2];
hist[iyp+1] [_ixp[2]+2](_best_o[2]) += _v10[2];
hist[iyp+1+1][_ixp[2]+2](_best_o[2]) += _v00[2];
hist[iyp+1] [_ixp[3]+1](_best_o[3]) += _v11[3];
hist[iyp+1+1][_ixp[3]+1](_best_o[3]) += _v01[3];
hist[iyp+1] [_ixp[3]+2](_best_o[3]) += _v10[3];
hist[iyp+1+1][_ixp[3]+2](_best_o[3]) += _v00[3];
}
// Now process the right columns that don't fit into simd registers.
for (; x < visible_nc; x++)
{
matrix<double,2,1> grad;
double v;
get_gradient(r,c,img,grad,v);
v = std::sqrt(v);
get_gradient(y,x,img,grad,v);
// snap to one of 18 orientations
double best_dot = 0;
@ -255,27 +489,17 @@ namespace dlib
}
}
v = std::sqrt(v);
// add to 4 histograms around pixel using bilinear interpolation
double xp = ((double)x+0.5)/(double)cell_size - 0.5;
double yp = ((double)y+0.5)/(double)cell_size - 0.5;
int ixp = (int)std::floor(xp);
int iyp = (int)std::floor(yp);
double vx0 = xp-ixp;
double vy0 = yp-iyp;
double vx1 = 1.0-vx0;
double vy1 = 1.0-vy0;
const double xp = ((double)x+0.5)/(double)cell_size - 0.5;
const int ixp = (int)std::floor(xp);
const double vx0 = xp-ixp;
const double vx1 = 1.0-vx0;
if (ixp >= 0 && iyp >= 0)
hist[iyp][ixp](best_o) += vy1*vx1*v;
if (iyp+1 < cells_nr && ixp >= 0)
hist[iyp+1][ixp](best_o) += vy0*vx1*v;
if (iyp >= 0 && ixp+1 < cells_nc)
hist[iyp][ixp+1](best_o) += vy1*vx0*v;
if (ixp+1 < cells_nc && iyp+1 < cells_nr)
hist[iyp+1][ixp+1](best_o) += vy0*vx0*v;
hist[iyp+1][ixp+1](best_o) += vy1*vx1*v;
hist[iyp+1+1][ixp+1](best_o) += vy0*vx1*v;
hist[iyp+1][ixp+1+1](best_o) += vy1*vx0*v;
hist[iyp+1+1][ixp+1+1](best_o) += vy0*vx0*v;
}
}
@ -286,7 +510,7 @@ namespace dlib
{
for (int o = 0; o < 9; o++)
{
norm[r][c] += (hist[r][c](o) + hist[r][c](o+9)) * (hist[r][c](o) + hist[r][c](o+9));
norm[r][c] += (hist[r+1][c+1](o) + hist[r+1][c+1](o+9)) * (hist[r+1][c+1](o) + hist[r+1][c+1](o+9));
}
}
}
@ -295,6 +519,7 @@ namespace dlib
// compute features
for (int y = 0; y < hog_nr; y++)
{
const int yy = y+padding_rows_offset;
for (int x = 0; x < hog_nc; x++)
{
const double nn1 = 0.2*std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps);
@ -311,14 +536,16 @@ namespace dlib
double t3 = 0;
double t4 = 0;
const int xx = x+padding_cols_offset;
// contrast-sensitive features
for (int o = 0; o < 18; o++)
{
double h1 = std::min<double>(hist[y+1][x+1](o) , nn1)*n1;
double h2 = std::min<double>(hist[y+1][x+1](o) , nn2)*n2;
double h3 = std::min<double>(hist[y+1][x+1](o) , nn3)*n3;
double h4 = std::min<double>(hist[y+1][x+1](o) , nn4)*n4;
set_hog(hog,o,x,y, (h1 + h2 + h3 + h4));
double h1 = std::min<double>(hist[y+1+1][x+1+1](o) , nn1)*n1;
double h2 = std::min<double>(hist[y+1+1][x+1+1](o) , nn2)*n2;
double h3 = std::min<double>(hist[y+1+1][x+1+1](o) , nn3)*n3;
double h4 = std::min<double>(hist[y+1+1][x+1+1](o) , nn4)*n4;
set_hog(hog,o,xx,yy, (h1 + h2 + h3 + h4));
t1 += h1;
t2 += h2;
t3 += h3;
@ -328,19 +555,19 @@ namespace dlib
// contrast-insensitive features
for (int o = 0; o < 9; o++)
{
double sum = hist[y+1][x+1](o) + hist[y+1][x+1](o+9);
double sum = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9);
double h1 = std::min(sum , nn1)*n1;
double h2 = std::min(sum , nn2)*n2;
double h3 = std::min(sum , nn3)*n3;
double h4 = std::min(sum , nn4)*n4;
set_hog(hog,o+18,x,y, (h1 + h2 + h3 + h4));
set_hog(hog,o+18,xx,yy, (h1 + h2 + h3 + h4));
}
// texture features
set_hog(hog,27,x,y, 2*0.2357 * t1);
set_hog(hog,28,x,y, 2*0.2357 * t2);
set_hog(hog,29,x,y, 2*0.2357 * t3);
set_hog(hog,30,x,y, 2*0.2357 * t4);
set_hog(hog,27,xx,yy, 2*0.2357 * t1);
set_hog(hog,28,xx,yy, 2*0.2357 * t2);
set_hog(hog,29,xx,yy, 2*0.2357 * t3);
set_hog(hog,30,xx,yy, 2*0.2357 * t4);
}
}
}
@ -383,10 +610,12 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
}
template <
@ -397,10 +626,12 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
array2d<matrix<T,31,1>,mm>& hog,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size);
return impl_fhog::impl_extract_fhog_features(img, hog, cell_size, filter_rows_padding, filter_cols_padding);
}
// ----------------------------------------------------------------------------------------
@ -408,52 +639,105 @@ namespace dlib
inline point image_to_fhog (
point p,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
// There is a one pixel border around the imag.
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t point image_to_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
// There is a one pixel border around the image.
p -= point(1,1);
// There is also a 1 "cell" border around the HOG image formation.
return p/cell_size - point(1,1);
return p/cell_size - point(1,1) + point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2);
}
// ----------------------------------------------------------------------------------------
inline rectangle image_to_fhog (
const rectangle& rect,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
return rectangle(image_to_fhog(rect.tl_corner(),cell_size),
image_to_fhog(rect.br_corner(),cell_size));
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t rectangle image_to_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
return rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
}
// ----------------------------------------------------------------------------------------
inline point fhog_to_image (
point p,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t point fhog_to_image()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
// Convert to image space and then set to the center of the cell.
point offset;
p = (p+point(1,1)-point((filter_cols_padding-1)/2,(filter_rows_padding-1)/2))*cell_size + point(1,1);
if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2);
if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2);
if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2);
if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2);
return (p+point(1,1))*cell_size + point(1,1) + offset;
return p + offset;
}
// ----------------------------------------------------------------------------------------
inline rectangle fhog_to_image (
const rectangle& rect,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
)
{
return rectangle(fhog_to_image(rect.tl_corner(),cell_size),
fhog_to_image(rect.br_corner(),cell_size));
// make sure requires clause is not broken
DLIB_ASSERT( cell_size > 0 &&
filter_rows_padding > 0 &&
filter_cols_padding > 0 ,
"\t rectangle fhog_to_image()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_size: " << cell_size
<< "\n\t filter_rows_padding: " << filter_rows_padding
<< "\n\t filter_cols_padding: " << filter_cols_padding
);
return rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding));
}
// ----------------------------------------------------------------------------------------
@ -466,25 +750,35 @@ namespace dlib
>
matrix<unsigned char> draw_fhog(
const dlib::array<array2d<T,mm1>,mm2>& hog,
const long w = 15
const long cell_draw_size = 15
)
{
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
<< "\n\t hog.size(): " << hog.size()
);
dlib::array<matrix<float> > mbars;
impl_fhog::create_fhog_bar_images(mbars,w);
impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
// now draw the bars onto the HOG cells
matrix<float> himg(hog[0].nr()*w, hog[0].nc()*w);
matrix<float> himg(hog[0].nr()*cell_draw_size, hog[0].nc()*cell_draw_size);
himg = 0;
for (unsigned long d = 0; d < mbars.size(); ++d)
{
for (long r = 0; r < himg.nr(); r+=w)
for (long r = 0; r < himg.nr(); r+=cell_draw_size)
{
for (long c = 0; c < himg.nc(); c+=w)
for (long c = 0; c < himg.nc(); c+=cell_draw_size)
{
const float val = hog[d][r/w][c/w] + hog[d+mbars.size()][r/w][c/w] + hog[d+mbars.size()*2][r/w][c/w];
const float val = hog[d][r/cell_draw_size][c/cell_draw_size] +
hog[d+mbars.size()][r/cell_draw_size][c/cell_draw_size] +
hog[d+mbars.size()*2][r/cell_draw_size][c/cell_draw_size];
if (val > 0)
{
set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
}
}
}
@ -501,9 +795,17 @@ namespace dlib
>
matrix<unsigned char> draw_fhog (
const std::vector<matrix<T> >& hog,
const long w = 15
const long cell_draw_size = 15
)
{
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0 && hog.size()==31,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
<< "\n\t hog.size(): " << hog.size()
);
// Just convert the input into the right object and then call the above draw_fhog()
// function on it.
dlib::array<array2d<T> > temp(hog.size());
@ -518,7 +820,7 @@ namespace dlib
}
}
}
return draw_fhog(temp,w);
return draw_fhog(temp,cell_draw_size);
}
// ----------------------------------------------------------------------------------------
@ -529,25 +831,34 @@ namespace dlib
>
matrix<unsigned char> draw_fhog(
const array2d<matrix<T,31,1>,mm>& hog,
const long w = 15
const long cell_draw_size = 15
)
{
// make sure requires clause is not broken
DLIB_ASSERT( cell_draw_size > 0,
"\t matrix<unsigned char> draw_fhog()"
<< "\n\t Invalid inputs were given to this function. "
<< "\n\t cell_draw_size: " << cell_draw_size
);
dlib::array<matrix<float> > mbars;
impl_fhog::create_fhog_bar_images(mbars,w);
impl_fhog::create_fhog_bar_images(mbars,cell_draw_size);
// now draw the bars onto the HOG cells
matrix<float> himg(hog.nr()*w, hog.nc()*w);
matrix<float> himg(hog.nr()*cell_draw_size, hog.nc()*cell_draw_size);
himg = 0;
for (unsigned long d = 0; d < mbars.size(); ++d)
{
for (long r = 0; r < himg.nr(); r+=w)
for (long r = 0; r < himg.nr(); r+=cell_draw_size)
{
for (long c = 0; c < himg.nc(); c+=w)
for (long c = 0; c < himg.nc(); c+=cell_draw_size)
{
const float val = hog[r/w][c/w](d) + hog[r/w][c/w](d+mbars.size()) + hog[r/w][c/w](d+mbars.size()*2);
const float val = hog[r/cell_draw_size][c/cell_draw_size](d) +
hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()) +
hog[r/cell_draw_size][c/cell_draw_size](d+mbars.size()*2);
if (val > 0)
{
set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()];
set_subm(himg, r, c, cell_draw_size, cell_draw_size) += val*mbars[d%mbars.size()];
}
}
}

View File

@ -20,11 +20,15 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
array2d<matrix<T,31,1>,mm>& hog,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined)
@ -40,11 +44,29 @@ namespace dlib
- The input image is broken into cells that are cell_size by cell_size pixels
and within each cell we compute a 31 dimensional FHOG vector. This vector
describes the gradient structure within the cell.
- #hog.nr() is approximately equal to img.nr()/cell_size.
- #hog.nc() is approximately equal to img.nc()/cell_size.
- A common task is to convolve each channel of the hog image with a linear
filter. This is made more convenient if the contents of #hog includes extra
rows and columns of zero padding along the borders. This extra padding
allows for more efficient convolution code since the code does not need to
perform expensive boundary checking. Therefore, you can set
filter_rows_padding and filter_cols_padding to indicate the size of the
filter you wish to use and this function will ensure #hog has the appropriate
extra zero padding along the borders. In particular, it will include the
following extra padding:
- (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog.
- (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog.
- filter_rows_padding/2 extra rows of zeros on the bottom of #hog.
- filter_cols_padding/2 extra columns of zeros on the right of #hog.
Therefore, the extra padding is done such that functions like
spatially_filter_image() apply their filters to the entire content containing
area of a hog image (note that you should use the following planar version of
extract_fhog_features() instead of the interlaced version if you want to use
spatially_filter_image() on a hog image).
- #hog.nr() is approximately equal to img.nr()/cell_size + filter_rows_padding-1.
- #hog.nc() is approximately equal to img.nc()/cell_size + filter_cols_padding-1.
- for all valid r and c:
- #hog[r][c] == the FHOG vector describing the cell centered at the pixel
location fhog_to_image(point(c,r),cell_size) in img.
- #hog[r][c] == the FHOG vector describing the cell centered at the pixel location
fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img.
!*/
// ----------------------------------------------------------------------------------------
@ -58,11 +80,15 @@ namespace dlib
void extract_fhog_features(
const image_type& img,
dlib::array<array2d<T,mm1>,mm2>& hog,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
- in_image_type == is an implementation of array2d/array2d_kernel_abstract.h
- img contains some kind of pixel type.
(i.e. pixel_traits<typename in_image_type::type> is defined)
@ -83,11 +109,15 @@ namespace dlib
inline point image_to_fhog (
point p,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- When using extract_fhog_features(), each FHOG cell is extracted from a
certain region in the input image. image_to_fhog() returns the identity of
@ -98,56 +128,75 @@ namespace dlib
might not have corresponding feature locations. E.g. border points or points
outside the image. In these cases the returned point will be outside the
input image.
- Note that you should use the same values of cell_size, filter_rows_padding,
and filter_cols_padding that you used with extract_fhog_features().
!*/
// ----------------------------------------------------------------------------------------
inline rectangle image_to_fhog (
const rectangle& rect,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- maps a rectangle from image space to fhog space. In particular this function returns:
rectangle(image_to_fhog(rect.tl_corner(),cell_size), image_to_fhog(rect.br_corner(),cell_size))
rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
!*/
// ----------------------------------------------------------------------------------------
inline point fhog_to_image (
point p,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
corresponding original input pixel. Note that since FHOG images are
spatially downsampled by aggregation into cells the mapping is not totally
invertible. Therefore, the returned location will be the center of the cell
in the original image that contained the FHOG vector at position p. Moreover,
cell_size should be set to the value used by the call to extract_fhog_features().
cell_size, filter_rows_padding, and filter_cols_padding should be set to the
values used by the call to extract_fhog_features().
- Mapping from fhog space to image space is an invertible transformation. That
is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size),cell_size).
is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/
// ----------------------------------------------------------------------------------------
inline rectangle fhog_to_image (
const rectangle& rect,
int cell_size = 8
int cell_size = 8,
int filter_rows_padding = 1,
int filter_cols_padding = 1
);
/*!
requires
- cell_size > 0
- filter_rows_padding > 0
- filter_cols_padding > 0
ensures
- maps a rectangle from fhog space to image space. In particular this function returns:
rectangle(fhog_to_image(rect.tl_corner(),cell_size), fhog_to_image(rect.br_corner(),cell_size))
rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding),
fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
- Mapping from fhog space to image space is an invertible transformation. That
is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size),cell_size).
is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
cell_size,filter_rows_padding,filter_cols_padding).
!*/
// ----------------------------------------------------------------------------------------
@ -187,6 +236,7 @@ namespace dlib
/*!
requires
- cell_draw_size > 0
- hog.size() == 31
ensures
- This function just converts the given hog object into an array<array2d<T>>
and passes it to the above draw_fhog() routine and returns the results.