mirror of https://github.com/davisking/dlib.git
Refined the scan_image_pyramid interface a little. In particular, I split the
get_feature_vector() method into two separate functions so the interface is a little simpler and more flexible.
This commit is contained in:
parent
578322dca2
commit
3ebf0f2e3f
|
@ -86,8 +86,11 @@ namespace dlib
|
|||
|
||||
void get_feature_vector (
|
||||
const std::vector<rectangle>& rects,
|
||||
feature_vector_type& psi,
|
||||
std::vector<rectangle>& mapped_rects
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
|
||||
template <typename T, typename U>
|
||||
|
@ -128,6 +131,13 @@ namespace dlib
|
|||
deserialize(item.rects, in);
|
||||
}
|
||||
|
||||
void get_mapped_rect_and_metadata (
|
||||
rectangle rect,
|
||||
rectangle& mapped_rect,
|
||||
detection_template& best_template,
|
||||
unsigned long& best_level
|
||||
) const;
|
||||
|
||||
|
||||
feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements.
|
||||
typename array<feature_extractor_type>::kernel_2a feats;
|
||||
|
@ -527,6 +537,107 @@ namespace dlib
|
|||
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
||||
get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(get_num_detection_templates() > 0 &&
|
||||
is_loaded_with_image(),
|
||||
"\t const rectangle scan_image_pyramid::get_best_matching_rect()"
|
||||
<< "\n\t Invalid inputs were given to this function "
|
||||
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
||||
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
rectangle mapped_rect;
|
||||
detection_template best_template;
|
||||
unsigned long best_level;
|
||||
get_mapped_rect_and_metadata(rect, mapped_rect, best_template, best_level);
|
||||
return mapped_rect;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Pyramid_type,
|
||||
typename Feature_extractor_type
|
||||
>
|
||||
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
||||
get_mapped_rect_and_metadata (
|
||||
rectangle rect,
|
||||
rectangle& mapped_rect,
|
||||
detection_template& best_template,
|
||||
unsigned long& best_level
|
||||
) const
|
||||
{
|
||||
pyramid_type pyr;
|
||||
// Figure out the pyramid level which best matches rect against one of our
|
||||
// detection template object boxes.
|
||||
best_level = 0;
|
||||
double match_score = std::numeric_limits<double>::infinity();
|
||||
|
||||
const dlib::vector<double,2> p(rect.width(), rect.height());
|
||||
|
||||
// for all the levels
|
||||
for (unsigned long l = 0; l < feats.size(); ++l)
|
||||
{
|
||||
// Run the center point through the feature/image space transformation just to make
|
||||
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
|
||||
// in this file.
|
||||
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
|
||||
|
||||
for (unsigned long t = 0; t < det_templates.size(); ++t)
|
||||
{
|
||||
// Map this detection template into the normal image space and see how
|
||||
// close it is to the rect we are looking for. We do the translation here
|
||||
// because the rect_up() routine takes place using integer arithmetic and
|
||||
// could potentially give slightly different results with and without the
|
||||
// translation.
|
||||
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
|
||||
mapped_rect = pyr.rect_up(mapped_rect, l);
|
||||
|
||||
const dlib::vector<double,2> p2(mapped_rect.width(),
|
||||
mapped_rect.height());
|
||||
if ((p-p2).length() < match_score)
|
||||
{
|
||||
match_score = (p-p2).length();
|
||||
best_level = l;
|
||||
best_template = det_templates[t];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Now get the features out of feats[best_level]. But first translate best_template
|
||||
// into the right spot (it should be centered at the location determined by rect)
|
||||
// and convert it into the feature image coordinate system.
|
||||
rect = pyr.rect_down(rect,best_level);
|
||||
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
|
||||
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
|
||||
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
|
||||
{
|
||||
rectangle temp = best_template.rects[k];
|
||||
temp = feats[best_level].image_to_feat_space(temp);
|
||||
temp = translate_rect(temp, origin);
|
||||
temp = get_rect(feats[best_level]).intersect(temp);
|
||||
best_template.rects[k] = temp;
|
||||
}
|
||||
|
||||
// The input rectangle was mapped to one of the detection templates. Reverse the process
|
||||
// to figure out what the mapped rectangle is in the original input space.
|
||||
mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
|
||||
mapped_rect = pyr.rect_up(mapped_rect, best_level);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
|
@ -536,8 +647,7 @@ namespace dlib
|
|||
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
||||
get_feature_vector (
|
||||
const std::vector<rectangle>& rects,
|
||||
feature_vector_type& psi,
|
||||
std::vector<rectangle>& mapped_rects
|
||||
feature_vector_type& psi
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
|
@ -555,74 +665,18 @@ namespace dlib
|
|||
|
||||
psi = 0;
|
||||
|
||||
mapped_rects.clear();
|
||||
|
||||
pyramid_type pyr;
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
// Figure out the pyramid level which best matches rects[i] against one of our
|
||||
// detection template object boxes.
|
||||
unsigned long best_level = 0;
|
||||
double match_score = std::numeric_limits<double>::infinity();
|
||||
rectangle mapped_rect;
|
||||
detection_template best_template;
|
||||
|
||||
rectangle rect = rects[i];
|
||||
const dlib::vector<double,2> p(rect.width(), rect.height());
|
||||
|
||||
// for all the levels
|
||||
for (unsigned long l = 0; l < feats.size(); ++l)
|
||||
{
|
||||
// Run the center point through the feature/image space transformation just to make
|
||||
// sure we exactly replicate the procedure for shifting an object_box used elsewhere
|
||||
// in this file.
|
||||
const point origin = feats[l].feat_to_image_space(feats[l].image_to_feat_space(center(pyr.rect_down(rect,l))));
|
||||
|
||||
for (unsigned long t = 0; t < det_templates.size(); ++t)
|
||||
{
|
||||
// Map this detection template into the normal image space and see how
|
||||
// close it is to the rect we are looking for. We do the translation here
|
||||
// because the rect_up() routine takes place using integer arithmetic and
|
||||
// could potentially give slightly different results with and without the
|
||||
// translation.
|
||||
rectangle mapped_rect = translate_rect(det_templates[t].object_box, origin);
|
||||
mapped_rect = pyr.rect_up(mapped_rect, l);
|
||||
|
||||
const dlib::vector<double,2> p2(mapped_rect.width(),
|
||||
mapped_rect.height());
|
||||
if ((p-p2).length() < match_score)
|
||||
{
|
||||
match_score = (p-p2).length();
|
||||
best_level = l;
|
||||
best_template = det_templates[t];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Now get the features out of feats[best_level]. But first translate best_template
|
||||
// into the right spot (it should be centered at the location determined by rects[i])
|
||||
// and convert it into the feature image coordinate system.
|
||||
rect = pyr.rect_down(rects[i],best_level);
|
||||
const point offset = -feats[best_level].image_to_feat_space(point(0,0));
|
||||
const point origin = feats[best_level].image_to_feat_space(center(rect)) + offset;
|
||||
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
|
||||
{
|
||||
rectangle temp = best_template.rects[k];
|
||||
temp = feats[best_level].image_to_feat_space(temp);
|
||||
temp = translate_rect(temp, origin);
|
||||
temp = get_rect(feats[best_level]).intersect(temp);
|
||||
best_template.rects[k] = temp;
|
||||
}
|
||||
|
||||
// The input rectangle was mapped to one of the detection templates. Reverse the process
|
||||
// to figure out what the mapped rectangle is in the original input space.
|
||||
rectangle mapped_rect = translate_rect(best_template.object_box, feats[best_level].feat_to_image_space(origin-offset));
|
||||
mapped_rect = pyr.rect_up(mapped_rect, best_level);
|
||||
mapped_rects.push_back(mapped_rect);
|
||||
unsigned long best_level;
|
||||
get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level);
|
||||
|
||||
for (unsigned long j = 0; j < best_template.rects.size(); ++j)
|
||||
{
|
||||
rect = best_template.rects[j];
|
||||
const rectangle rect = best_template.rects[j];
|
||||
const unsigned long template_region_id = j;
|
||||
const unsigned long offset = feats_config.get_num_dimensions()*template_region_id;
|
||||
for (long r = rect.top(); r <= rect.bottom(); ++r)
|
||||
|
|
|
@ -289,10 +289,22 @@ namespace dlib
|
|||
been reached).
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- is_loaded_with_image() == true
|
||||
- get_num_detection_templates() > 0
|
||||
ensures
|
||||
- Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles
|
||||
can be represented. Therefore, this function allows you to supply a rectangle and obtain the
|
||||
nearest possible sliding window rectangle.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const std::vector<rectangle>& rects,
|
||||
feature_vector_type& psi,
|
||||
std::vector<rectangle>& mapped_rects
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
|
@ -305,7 +317,6 @@ namespace dlib
|
|||
- if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then
|
||||
- #psi == the sum of feature vectors corresponding to the sliding window locations contained
|
||||
in rects.
|
||||
- #mapped_rects == rects
|
||||
- Let w denote the w vector given to detect(), then we have:
|
||||
- dot(w,#psi) == sum of scores of the dets produced by detect()
|
||||
- else
|
||||
|
@ -313,8 +324,8 @@ namespace dlib
|
|||
be output by detect(). So in the case where rects contains rectangles which could not arise
|
||||
from a call to detect(), this function will map the rectangles in rects to the nearest possible
|
||||
object boxes and then store the sum of feature vectors for the mapped rectangles into #psi.
|
||||
- for all valid i: #mapped_rects[i] == the rectangle rects[i] gets mapped to for feature extraction.
|
||||
- #mapped_rects.size() == rects.size()
|
||||
- for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for
|
||||
feature extraction.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
|
|
@ -156,7 +156,11 @@ namespace dlib
|
|||
scanner.load(images[idx]);
|
||||
psi.set_size(get_num_dimensions());
|
||||
std::vector<rectangle> mapped_rects;
|
||||
scanner.get_feature_vector(truth_rects[idx], psi, mapped_rects);
|
||||
scanner.get_feature_vector(truth_rects[idx], psi);
|
||||
for (unsigned long i = 0; i < truth_rects[idx].size(); ++i)
|
||||
{
|
||||
mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i]));
|
||||
}
|
||||
psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size();
|
||||
|
||||
// check if any of the boxes overlap. If they do then it is impossible for
|
||||
|
@ -328,8 +332,7 @@ namespace dlib
|
|||
|
||||
psi.set_size(get_num_dimensions());
|
||||
psi = 0;
|
||||
std::vector<rectangle> mapped_rects;
|
||||
scanner.get_feature_vector(final_dets, psi, mapped_rects);
|
||||
scanner.get_feature_vector(final_dets, psi);
|
||||
|
||||
psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue