diff --git a/dlib/image_processing/scan_image_pyramid.h b/dlib/image_processing/scan_image_pyramid.h index 60fda269f..ee7665b31 100644 --- a/dlib/image_processing/scan_image_pyramid.h +++ b/dlib/image_processing/scan_image_pyramid.h @@ -85,7 +85,7 @@ namespace dlib ) const; void get_feature_vector ( - const std::vector& rects, + const rectangle& rect, feature_vector_type& psi ) const; @@ -646,7 +646,7 @@ namespace dlib > void scan_image_pyramid:: get_feature_vector ( - const std::vector& rects, + const rectangle& rect, feature_vector_type& psi ) const { @@ -663,31 +663,25 @@ namespace dlib << "\n\t this: " << this ); - psi = 0; - - pyramid_type pyr; - for (unsigned long i = 0; i < rects.size(); ++i) - { - rectangle mapped_rect; - detection_template best_template; - unsigned long best_level; - get_mapped_rect_and_metadata (rects[i], mapped_rect, best_template, best_level); + rectangle mapped_rect; + detection_template best_template; + unsigned long best_level; + get_mapped_rect_and_metadata (rect, mapped_rect, best_template, best_level); - for (unsigned long j = 0; j < best_template.rects.size(); ++j) + for (unsigned long j = 0; j < best_template.rects.size(); ++j) + { + const rectangle rect = best_template.rects[j]; + const unsigned long template_region_id = j; + const unsigned long offset = feats_config.get_num_dimensions()*template_region_id; + for (long r = rect.top(); r <= rect.bottom(); ++r) { - const rectangle rect = best_template.rects[j]; - const unsigned long template_region_id = j; - const unsigned long offset = feats_config.get_num_dimensions()*template_region_id; - for (long r = rect.top(); r <= rect.bottom(); ++r) + for (long c = rect.left(); c <= rect.right(); ++c) { - for (long c = rect.left(); c <= rect.right(); ++c) + const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c); + for (unsigned long k = 0; k < descriptor.size(); ++k) { - const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c); - for (unsigned long k = 0; k < descriptor.size(); ++k) - { - psi(descriptor[k].first + offset) += descriptor[k].second; - } + psi(descriptor[k].first + offset) += descriptor[k].second; } } } diff --git a/dlib/image_processing/scan_image_pyramid_abstract.h b/dlib/image_processing/scan_image_pyramid_abstract.h index f14bd694c..705ecd76d 100644 --- a/dlib/image_processing/scan_image_pyramid_abstract.h +++ b/dlib/image_processing/scan_image_pyramid_abstract.h @@ -303,7 +303,7 @@ namespace dlib !*/ void get_feature_vector ( - const std::vector& rects, + const rectangle& rects, feature_vector_type& psi ) const; /*! @@ -312,20 +312,21 @@ namespace dlib - get_num_detection_templates() > 0 - psi.size() >= get_num_dimensions() ensures - - This function allows you to determine the feature vector used for a sliding window location - or the sum of such vectors for a set of locations. - - if (rects was produced by a call to detect(), i.e. rects contains the contents of dets) then - - #psi == the sum of feature vectors corresponding to the sliding window locations contained - in rects. - - Let w denote the w vector given to detect(), then we have: - - dot(w,#psi) == sum of scores of the dets produced by detect() + - This function allows you to determine the feature vector used for a sliding window location. + Note that this vector is added to psi. + - if (rect was produced by a call to detect(), i.e. rect contains an element of dets) then + - #psi == psi + the feature vector corresponding to the sliding window location indicated + by rect. + - Let w denote the w vector given to detect(), then if we assigned psi to 0 before calling + get_feature_vector() then we have: + - dot(w,#psi) == the score produced by detect() for rect. + - get_best_matching_rect(rect) == rect - else - Since scan_image_pyramid is a sliding window classifier system, not all possible rectangles can - be output by detect(). So in the case where rects contains rectangles which could not arise - from a call to detect(), this function will map the rectangles in rects to the nearest possible - object boxes and then store the sum of feature vectors for the mapped rectangles into #psi. - - for all valid i: get_best_matching_rect(rects[i]) == the rectangle rects[i] gets mapped to for - feature extraction. + be output by detect(). So in the case where rect could not arise from a call to detect(), this + function will map rect to the nearest possible object box and then add the feature vector for + the mapped rectangle into #psi. + - get_best_matching_rect(rect) == the rectangle rect gets mapped to for feature extraction. !*/ }; diff --git a/dlib/svm/structural_svm_object_detection_problem.h b/dlib/svm/structural_svm_object_detection_problem.h index 2deca21b1..49de76ae5 100644 --- a/dlib/svm/structural_svm_object_detection_problem.h +++ b/dlib/svm/structural_svm_object_detection_problem.h @@ -156,10 +156,12 @@ namespace dlib scanner.load(images[idx]); psi.set_size(get_num_dimensions()); std::vector mapped_rects; - scanner.get_feature_vector(truth_rects[idx], psi); + + psi = 0; for (unsigned long i = 0; i < truth_rects[idx].size(); ++i) { mapped_rects.push_back(scanner.get_best_matching_rect(truth_rects[idx][i])); + scanner.get_feature_vector(truth_rects[idx][i], psi); } psi(scanner.get_num_dimensions()) = -1.0*truth_rects[idx].size(); @@ -332,7 +334,8 @@ namespace dlib psi.set_size(get_num_dimensions()); psi = 0; - scanner.get_feature_vector(final_dets, psi); + for (unsigned long i = 0; i < final_dets.size(); ++i) + scanner.get_feature_vector(final_dets[i], psi); psi(scanner.get_num_dimensions()) = -1.0*final_dets.size(); }