mirror of https://github.com/davisking/dlib.git
Added random_cropper
This commit is contained in:
parent
61dce40e35
commit
1c940a5bcb
|
@ -25,6 +25,7 @@
|
|||
#include "image_transforms/fhog.h"
|
||||
#include "image_transforms/lbp.h"
|
||||
#include "image_transforms/random_color_transform.h"
|
||||
#include "image_transforms/random_cropper.h"
|
||||
|
||||
#endif // DLIB_IMAGE_TRANSFORMs_
|
||||
|
||||
|
|
|
@ -0,0 +1,232 @@
|
|||
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_RaNDOM_CROPPER_H_
|
||||
#define DLIB_RaNDOM_CROPPER_H_
|
||||
|
||||
#include "../threads.h"
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include "interpolation.h"
|
||||
#include "../image_processing/full_object_detection.h"
|
||||
#include "../rand.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
class random_cropper
|
||||
{
|
||||
chip_dims dims = chip_dims(300,300);
|
||||
bool randomly_flip = true;
|
||||
double max_rotation_degrees = 30;
|
||||
double min_object_height = 0.25; // cropped object will be at least this fraction of the height of the image.
|
||||
double max_object_height = 0.7; // cropped object will be at most this fraction of the height of the image.
|
||||
public:
|
||||
|
||||
const chip_dims& get_chip_dims(
|
||||
) const { return dims; }
|
||||
|
||||
void set_chip_dims (
|
||||
const chip_dims& dims_
|
||||
) { dims = dims_; }
|
||||
|
||||
bool get_randomly_flip (
|
||||
) const { return randomly_flip; }
|
||||
|
||||
void set_randomly_flip (
|
||||
bool value
|
||||
) { randomly_flip = value; }
|
||||
|
||||
double get_max_rotation_degrees (
|
||||
) const { return max_rotation_degrees; }
|
||||
void set_max_rotation_degrees (
|
||||
double value
|
||||
) { max_rotation_degrees = value; }
|
||||
|
||||
double get_min_object_height (
|
||||
) const { return min_object_height; }
|
||||
void set_min_object_height (
|
||||
double value
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(0 < value && value < 1);
|
||||
min_object_height = value;
|
||||
}
|
||||
|
||||
double get_max_object_height (
|
||||
) const { return max_object_height; }
|
||||
void set_max_object_height (
|
||||
double value
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(0 < value && value < 1);
|
||||
max_object_height = value;
|
||||
}
|
||||
|
||||
template <
|
||||
typename array_type
|
||||
>
|
||||
void operator() (
|
||||
size_t num_crops,
|
||||
const array_type& images,
|
||||
const std::vector<std::vector<mmod_rect>>& rects,
|
||||
array_type& crops,
|
||||
std::vector<std::vector<mmod_rect>>& crop_rects
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(images.size() == rects.size());
|
||||
crops.resize(num_crops);
|
||||
crop_rects.resize(num_crops);
|
||||
parallel_for(0, num_crops, [&](long i) {
|
||||
(*this)(images, rects, crops[i], crop_rects[i]);
|
||||
});
|
||||
}
|
||||
|
||||
template <
|
||||
typename array_type,
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const array_type& images,
|
||||
const std::vector<std::vector<mmod_rect>>& rects,
|
||||
image_type& crop,
|
||||
std::vector<mmod_rect>& crop_rects
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(images.size() == rects.size());
|
||||
size_t idx;
|
||||
{ std::lock_guard<std::mutex> lock(rnd_mutex);
|
||||
idx = rnd.get_random_64bit_number()%images.size();
|
||||
}
|
||||
(*this)(images[idx], rects[idx], crop, crop_rects);
|
||||
}
|
||||
|
||||
template <
|
||||
typename image_type1,
|
||||
typename image_type2
|
||||
>
|
||||
void operator() (
|
||||
const image_type1& img,
|
||||
const std::vector<mmod_rect>& rects,
|
||||
image_type2& crop,
|
||||
std::vector<mmod_rect>& crop_rects
|
||||
)
|
||||
{
|
||||
chip_details crop_details;
|
||||
bool should_flip_crop;
|
||||
make_crop_plan(img, rects, crop_details, should_flip_crop);
|
||||
|
||||
extract_image_chip(img, crop_details, crop);
|
||||
const rectangle_transform tform = get_mapping_to_chip(crop_details);
|
||||
|
||||
// copy rects into crop_rects and set ones that are outside the crop to ignore or
|
||||
// drop entirely as appropriate.
|
||||
crop_rects.clear();
|
||||
for (auto rect : rects)
|
||||
{
|
||||
// map to crop
|
||||
rect.rect = tform(rect.rect);
|
||||
|
||||
// if the rect is at least partly in the crop
|
||||
if (get_rect(crop).intersect(rect.rect).area() != 0)
|
||||
{
|
||||
// set to ignore if not totally in the crop
|
||||
if (!get_rect(crop).contains(rect.rect))
|
||||
rect.ignore = true;
|
||||
|
||||
crop_rects.push_back(rect);
|
||||
}
|
||||
}
|
||||
|
||||
// Also randomly flip the image
|
||||
if (should_flip_crop)
|
||||
{
|
||||
image_type2 temp;
|
||||
flip_image_left_right(crop, temp);
|
||||
swap(crop,temp);
|
||||
for (auto&& rect : crop_rects)
|
||||
rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template <typename image_type1>
|
||||
void make_crop_plan (
|
||||
const image_type1& img,
|
||||
const std::vector<mmod_rect>& rects,
|
||||
chip_details& crop_details,
|
||||
bool& should_flip_crop
|
||||
)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(rnd_mutex);
|
||||
rectangle crop_rect;
|
||||
if (has_non_ignored_box(rects))
|
||||
{
|
||||
auto rect = rects[randomly_pick_rect(rects)].rect;
|
||||
// perturb the location of the crop by a small fraction of the object's size.
|
||||
const point rand_translate = dpoint(rnd.get_double_in_range(-0.1,0.1)*rect.width(),
|
||||
rnd.get_double_in_range(-0.1,0.1)*rect.height());
|
||||
|
||||
// perturb the scale of the crop by a fraction of the object's size
|
||||
const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height);
|
||||
|
||||
const long box_size = rect.height()/rand_scale_perturb;
|
||||
crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
crop_rect = make_random_cropping_rect_resnet(img);
|
||||
}
|
||||
should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5;
|
||||
const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
|
||||
crop_details = chip_details(crop_rect, dims, angle);
|
||||
}
|
||||
|
||||
bool has_non_ignored_box (
|
||||
const std::vector<mmod_rect>& rects
|
||||
) const
|
||||
{
|
||||
for (auto&& b : rects)
|
||||
{
|
||||
if (!b.ignore)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t randomly_pick_rect (
|
||||
const std::vector<mmod_rect>& rects
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(has_non_ignored_box(rects));
|
||||
size_t idx = rnd.get_random_64bit_number()%rects.size();
|
||||
while(rects[idx].ignore)
|
||||
idx = rnd.get_random_64bit_number()%rects.size();
|
||||
return idx;
|
||||
}
|
||||
|
||||
template <typename image_type>
|
||||
rectangle make_random_cropping_rect_resnet(
|
||||
const image_type& img
|
||||
)
|
||||
{
|
||||
// figure out what rectangle we want to crop from the image
|
||||
double mins = 0.466666666, maxs = 0.875;
|
||||
auto scale = mins + rnd.get_random_double()*(maxs-mins);
|
||||
auto size = scale*std::min(img.nr(), img.nc());
|
||||
rectangle rect(size, size);
|
||||
// randomly shift the box around
|
||||
point offset(rnd.get_random_32bit_number()%(img.nc()-rect.width()),
|
||||
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
|
||||
return move_rect(rect, offset);
|
||||
}
|
||||
|
||||
|
||||
std::mutex rnd_mutex;
|
||||
dlib::rand rnd;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RaNDOM_CROPPER_H_
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_RaNDOM_CROPPER_ABSTRACT_H_
|
||||
#ifdef DLIB_RaNDOM_CROPPER_ABSTRACT_H_
|
||||
|
||||
#include "../threads.h"
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include "interpolation.h"
|
||||
#include "../image_processing/full_object_detection.h"
|
||||
#include "../rand.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
class random_cropper
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for extracting random crops of objects from a set of
|
||||
images. The crops are randomly jittered in scale, translation, and
|
||||
rotation but more or less centered on objects specified by mmod_rect
|
||||
objects.
|
||||
|
||||
THREAD SAFETY
|
||||
It is safe for multiple threads to make concurrent calls to this object's
|
||||
operator() methods.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
random_cropper (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_chip_dims() == chip_dims(300,300)
|
||||
- #get_randomly_flip() == true
|
||||
- #get_max_rotation_degrees() == 30
|
||||
- #get_min_object_height() == 0.25
|
||||
- #get_max_object_height() == 0.7
|
||||
!*/
|
||||
|
||||
const chip_dims& get_chip_dims(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the dimensions of image chips produced by this object.
|
||||
!*/
|
||||
|
||||
void set_chip_dims (
|
||||
const chip_dims& dims
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_chip_dims() == dims
|
||||
!*/
|
||||
|
||||
bool get_randomly_flip (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- if this object will randomly mirror chips left to right.
|
||||
!*/
|
||||
|
||||
void set_randomly_flip (
|
||||
bool value
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_randomly_flip() == value
|
||||
!*/
|
||||
|
||||
double get_max_rotation_degrees (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When extracting an image chip, this object will pick a random rotation
|
||||
in the range [-get_max_rotation_degrees(), get_max_rotation_degrees()]
|
||||
and rotate the chip by that amount.
|
||||
!*/
|
||||
|
||||
void set_max_rotation_degrees (
|
||||
double value
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_max_rotation_degrees() == value
|
||||
!*/
|
||||
|
||||
double get_min_object_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When a chip is extracted around an object, the chip will be sized so that
|
||||
the object's height is at least get_min_object_height() percent of the
|
||||
chip height.
|
||||
!*/
|
||||
|
||||
void set_min_object_height (
|
||||
double value
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < value < 1
|
||||
ensures
|
||||
- #get_min_object_height() == value
|
||||
!*/
|
||||
|
||||
double get_max_object_height (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When a chip is extracted around an object, the chip will be sized so that
|
||||
the object's height is at most get_min_object_height() percent of the
|
||||
chip height.
|
||||
!*/
|
||||
|
||||
void set_max_object_height (
|
||||
double value
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < value < 1
|
||||
ensures
|
||||
- #get_max_object_height() == value
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename array_type
|
||||
>
|
||||
void operator() (
|
||||
size_t num_crops,
|
||||
const array_type& images,
|
||||
const std::vector<std::vector<mmod_rect>>& rects,
|
||||
array_type& crops,
|
||||
std::vector<std::vector<mmod_rect>>& crop_rects
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- images.size() == rects.size()
|
||||
ensures
|
||||
- Randomly extracts num_crops chips from images. We also copy the object
|
||||
metadata for each extracted crop and store it into #crop_rects. In
|
||||
particular, calling this function is the same as invoking the version of
|
||||
operator() below multiple times, except that this version of operator()
|
||||
will use multiple CPU cores to do the processing and is therefore faster.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename array_type,
|
||||
typename image_type
|
||||
>
|
||||
void operator() (
|
||||
const array_type& images,
|
||||
const std::vector<std::vector<mmod_rect>>& rects,
|
||||
image_type& crop,
|
||||
std::vector<mmod_rect>& crop_rects
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- images.size() == rects.size()
|
||||
ensures
|
||||
- Selects a random image and creates a random crop from it. Specifically,
|
||||
we pick a random index IDX < images.size() and then execute
|
||||
(*this)(images[IDX],rects[IDX],crop,crop_rects)
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type1,
|
||||
typename image_type2
|
||||
>
|
||||
void operator() (
|
||||
const image_type1& img,
|
||||
const std::vector<mmod_rect>& rects,
|
||||
image_type2& crop,
|
||||
std::vector<mmod_rect>& crop_rects
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Extracts a random crop from img and copies over the mmod_rect objects in
|
||||
rects to #crop_rects if they are contained inside the crop. Moreover,
|
||||
rectangles are marked as ignore if they aren't completely contained
|
||||
inside the crop.
|
||||
- #crop_rects.size() <= rects.size()
|
||||
!*/
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RaNDOM_CROPPER_ABSTRACT_H_
|
||||
|
||||
|
Loading…
Reference in New Issue