Refined the scan_image_pyramid interface a little. In particular, I split the

get_feature_vector() method into two separate functions so the interface
is a little simpler and more flexible.
This commit is contained in:
Davis King 2011-12-24 18:39:35 -05:00
parent 578322dca2
commit 3ebf0f2e3f
3 changed files with 140 additions and 72 deletions

View File

@ -86,8 +86,11 @@ namespace dlib
void get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
feature_vector_type& psi
) const;
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
template <typename T, typename U>
@ -128,6 +131,13 @@ namespace dlib
deserialize(item.rects, in);
}
void get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const;
feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements.
typename array<feature_extractor_type>::kernel_2a feats;
@ -527,6 +537,107 @@ namespace dlib
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_best_matching_rect (
const rectangle& rect
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(get_num_detection_templates() > 0 &&
is_loaded_with_image(),
"\t const rectangle scan_image_pyramid::get_best_matching_rect()"
<< "\n\t Invalid inputs were given to this function "
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
<< "\n\t this: " << this
);
rectangle mapped_rect;
detection_template best_template;
unsigned long best_level;
get_mapped_rect_and_metadata(rect, mapped_rect, best_template, best_level);
return mapped_rect;
}
// ----------------------------------------------------------------------------------------
template <
typename Pyramid_type,
typename Feature_extractor_type
>
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_mapped_rect_and_metadata (
rectangle rect,
rectangle& mapped_rect,
detection_template& best_template,
unsigned long& best_level
) const
{
pyramid_type pyr;
// Figure out the pyramid level which best matches rect against one of our
// detection template object boxes.
best_level = 0;
double match_score = std::numeric_limits<double>::infinity();
const dlib::vector<double,2> p(rect.width(), rect.height());
// for all the levels
for (unsigned long l = 0; l < feats.size(); ++l)
{
// Run the center point through the feature/image space transformation just to make
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
// in this file.
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
for (unsigned long t = 0; t < det_templates.size(); ++t)
{
// Map this detection template into the normal image space and see how
// close it is to the rect we are looking for. We do the translation here
// because the rect_up() routine takes place using integer arithmetic and
// could potentially give slightly different results with and without the
// translation.
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
mapped_rect = pyr.rect_up(mapped_rect, l);
const dlib::vector<double,2> p2(mapped_rect.width(),
mapped_rect.height());
if ((p-p2).length() < match_score)
{
match_score = (p-p2).length();
best_level = l;
best_template = det_templates[t];
}
}
}
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rect)
// and convert it into the feature image coordinate system.
rect = pyr.rect_down(rect,best_level);
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
{
rectangle temp = best_template.rects[k];
temp = feats[best_level].image_to_feat_space(temp);
temp = translate_rect(temp, origin);
temp = get_rect(feats[best_level]).intersect(temp);
best_template.rects[k] = temp;
}
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = pyr.rect_up(mapped_rect, best_level);
}
// ----------------------------------------------------------------------------------------
template <
@ -536,8 +647,7 @@ namespace dlib
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
feature_vector_type& psi
) const
{
// make sure requires clause is not broken
@ -555,74 +665,18 @@ namespace dlib
psi = 0;
mapped_rects.clear();
pyramid_type pyr;
for (unsigned long i = 0; i < rects.size(); ++i)
{
// Figure out the pyramid level which best matches rects[i] against one of our
// detection template object boxes.
unsigned long best_level = 0;
double match_score = std::numeric_limits<double>::infinity();
rectangle mapped_rect;
detection_template best_template;
rectangle rect = rects[i];
const dlib::vector<double,2> p(rect.width(), rect.height());
// for all the levels
for (unsigned long l = 0; l < feats.size(); ++l)
{
// Run the center point through the feature/image space transformation just to make
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
// in this file.
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
for (unsigned long t = 0; t < det_templates.size(); ++t)
{
// Map this detection template into the normal image space and see how
// close it is to the rect we are looking for. We do the translation here
// because the rect_up() routine takes place using integer arithmetic and
// could potentially give slightly different results with and without the
// translation.
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
mapped_rect = pyr.rect_up(mapped_rect, l);
const dlib::vector<double,2> p2(mapped_rect.width(),
mapped_rect.height());
if ((p-p2).length() < match_score)
{
match_score = (p-p2).length();
best_level = l;
best_template = det_templates[t];
}
}
}
// Now get the features out of feats[best_level]. But first translate best_template
// into the right spot (it should be centered at the location determined by rects[i])
// and convert it into the feature image coordinate system.
rect = pyr.rect_down(rects[i],best_level);
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
{
rectangle temp = best_template.rects[k];
temp = feats[best_level].image_to_feat_space(temp);
temp = translate_rect(temp, origin);
temp = get_rect(feats[best_level]).intersect(temp);
best_template.rects[k] = temp;
}
// The input rectangle was mapped to one of the detection templates. Reverse the process
// to figure out what the mapped rectangle is in the original input space.
rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
mapped_rect = pyr.rect_up(mapped_rect, best_level);
mapped_rects.push_back(mapped_rect);
unsigned long best_level;
get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level);
for (unsigned long j = 0; j < best_template.rects.size(); ++j)
{
rect = best_template.rects[j];
const rectangle rect = best_template.rects[j];
const unsigned long template_region_id = j;
const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
for (long r = rect.top(); r <= rect.bottom(); ++r)

View File

@ -289,10 +289,22 @@ namespace dlib
been reached).
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
- get_num_detection_templates() > 0
ensures
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
nearest possible sliding window rectangle.
!*/
void get_feature_vector (
const std::vector<rectangle>& rects,
feature_vector_type& psi,
std::vector<rectangle>& mapped_rects
feature_vector_type& psi
) const;
/*!
requires
@ -305,7 +317,6 @@ namespace dlib
- if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
- #psi == the sum of feature vectors corresponding to the sliding window locations contained
in rects.
- #mapped_rects == rects
- Let w denote the w vector given to detect(), then we have:
- dot(w,#psi) == sum of scores of the dets produced by detect()
- else
@ -313,8 +324,8 @@ namespace dlib
be output by detect(). So in the case where rects contains rectangles which could not arise
from a call to detect(), this function will map the rectangles in rects to the nearest possible
object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
- for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
- #mapped_rects.size() == rects.size()
- for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for
feature extraction.
!*/
};

View File

@ -156,7 +156,11 @@ namespace dlib
scanner.load(images[idx]);
psi.set_size(get_num_dimensions());
std::vector<rectangle> mapped_rects;
scanner.get_feature_vector(truth_rects[idx], psi, mapped_rects);
scanner.get_feature_vector(truth_rects[idx], psi);
for (unsigned long i = 0; i < truth_rects[idx].size(); ++i)
{
mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i]));
}
psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size();
// check if any of the boxes overlap. If they do then it is impossible for
@ -328,8 +332,7 @@ namespace dlib
psi.set_size(get_num_dimensions());
psi = 0;
std::vector<rectangle> mapped_rects;
scanner.get_feature_vector(final_dets, psi, mapped_rects);
scanner.get_feature_vector(final_dets, psi);
psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
}