mirror of https://github.com/davisking/dlib.git
Added two vehicle detection examples.
This commit is contained in:
parent
0a7a75a245
commit
51eae2ba07
|
@ -121,6 +121,8 @@ if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
|
|||
add_gui_example(random_cropper_ex)
|
||||
add_gui_example(dnn_mmod_dog_hipsterizer)
|
||||
add_gui_example(dnn_imagenet_ex)
|
||||
add_gui_example(dnn_mmod_find_cars_ex)
|
||||
add_example(dnn_mmod_train_find_cars_ex)
|
||||
if (NOT MSVC)
|
||||
# Don't try to compile this program using Visual Studio since it causes the
|
||||
# compiler to run out of RAM and to crash. Maybe someday Visual Studio
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
|
||||
|
||||
#include <iostream>
|
||||
#include <dlib/dnn.h>
|
||||
#include <dlib/data_io.h>
|
||||
#include <dlib/gui_widgets.h>
|
||||
#include <dlib/dir_nav.h>
|
||||
#include <dlib/time_this.h>
|
||||
#include <dlib/gui_widgets.h>
|
||||
#include <dlib/image_processing.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
|
||||
|
||||
// the dnn rear view vehicle detector network
|
||||
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
|
||||
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
|
||||
template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
|
||||
template <typename SUBNET> using rcon5 = relu<affine<con5<55,SUBNET>>>;
|
||||
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
int main() try
|
||||
{
|
||||
net_type net;
|
||||
shape_predictor sp;
|
||||
// You can get this file from http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2
|
||||
// This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program.
|
||||
// As you can see, it also includes a shape_predictor. To see a generic example of how
|
||||
// to train those refer to train_shape_predictor_ex.cpp.
|
||||
deserialize("mmod_rear_end_vehicle_detector.dat") >> net >> sp;
|
||||
|
||||
matrix<rgb_pixel> img;
|
||||
load_image(img, "../mmod_cars_test_image.jpg");
|
||||
|
||||
image_window win;
|
||||
win.set_image(img);
|
||||
|
||||
// Run the detector on the image and show us the output.
|
||||
for (auto&& d : net(img))
|
||||
{
|
||||
// We use a shape_predictor to refine the exact shape and location of the detection
|
||||
// box. This shape_predictor is trained to simply output the 4 corner points. So
|
||||
// all we do is make a rectangle that tightly contains those 4 points and that
|
||||
// rectangle is our refined detection position.
|
||||
auto fd = sp(img,d);
|
||||
rectangle rect;
|
||||
for (long j = 0; j < fd.num_parts(); ++j)
|
||||
rect += fd.part(j);
|
||||
win.add_overlay(rect, rgb_pixel(255,0,0));
|
||||
}
|
||||
|
||||
|
||||
|
||||
cout << "Hit enter to view the intermediate processing steps" << endl;
|
||||
cin.get();
|
||||
|
||||
|
||||
|
||||
// Create a tiled image pyramid and display it on the screen.
|
||||
std::vector<rectangle> rects;
|
||||
matrix<rgb_pixel> tiled_img;
|
||||
create_tiled_pyramid<std::remove_reference<decltype(input_layer(net))>::type::pyramid_type>(img,
|
||||
tiled_img, rects, input_layer(net).get_pyramid_padding(),
|
||||
input_layer(net).get_pyramid_outer_padding());
|
||||
image_window winpyr(tiled_img, "Tiled image pyramid");
|
||||
|
||||
|
||||
|
||||
cout << "Number of channels in final tensor image: " << net.subnet().get_output().k() << endl;
|
||||
matrix<float> network_output = image_plane(net.subnet().get_output(),0,0);
|
||||
for (long k = 1; k < net.subnet().get_output().k(); ++k)
|
||||
network_output = max_pointwise(network_output, image_plane(net.subnet().get_output(),0,k));
|
||||
const double v0_scale = img.nc()/(double)network_output.nc();
|
||||
resize_image(v0_scale, network_output);
|
||||
|
||||
|
||||
const float lower = -2.5;// min(network_output);
|
||||
const float upper = 0.0;// max(network_output);
|
||||
cout << "jet color mapping range: lower="<< lower << " upper="<< upper << endl;
|
||||
|
||||
// Display the final layer as a color image
|
||||
image_window win_output(jet(network_output, upper, lower), "Output tensor from the network");
|
||||
|
||||
|
||||
|
||||
// Overlay network_output on top of the tiled image pyramid and display it.
|
||||
matrix<rgb_pixel> tiled_img_sal = tiled_img;
|
||||
for (long r = 0; r < tiled_img_sal.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < tiled_img_sal.nc(); ++c)
|
||||
{
|
||||
dpoint tmp(c,r);
|
||||
tmp = input_tensor_to_output_tensor(net, tmp);
|
||||
tmp = point(v0_scale*tmp);
|
||||
if (get_rect(network_output).contains(tmp))
|
||||
{
|
||||
float val = network_output(tmp.y(),tmp.x());
|
||||
rgb_alpha_pixel p;
|
||||
assign_pixel(p , colormap_jet(val,lower,upper));
|
||||
p.alpha = 120;
|
||||
assign_pixel(tiled_img_sal(r,c), p);
|
||||
}
|
||||
}
|
||||
}
|
||||
image_window win_pyr_sal(tiled_img_sal, "Saliency on image pyramid");
|
||||
|
||||
|
||||
|
||||
|
||||
// Now collapse the pyramid scales into the original image
|
||||
matrix<float> collapsed_saliency(img.nr(), img.nc());
|
||||
resizable_tensor input_tensor;
|
||||
input_layer(net).to_tensor(&img, &img+1, input_tensor);
|
||||
for (long r = 0; r < collapsed_saliency.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < collapsed_saliency.nc(); ++c)
|
||||
{
|
||||
// Loop over a bunch of scale values and look up what part of network_output corresponds to
|
||||
// the point(c,r) in the original image, then take the max saliency value over
|
||||
// all the scales and save it at pixel point(c,r).
|
||||
float max_sal = -1e30;
|
||||
for (double scale = 1; scale > 0.2; scale *= 5.0/6.0)
|
||||
{
|
||||
// map from input image coordinates to tiled pyramid and then to output
|
||||
// tensor coordinates.
|
||||
dpoint tmp = center(input_layer(net).image_space_to_tensor_space(input_tensor,scale, drectangle(dpoint(c,r))));
|
||||
tmp = point(v0_scale*input_tensor_to_output_tensor(net, tmp));
|
||||
if (get_rect(network_output).contains(tmp))
|
||||
{
|
||||
float val = network_output(tmp.y(),tmp.x());
|
||||
if (val > max_sal)
|
||||
max_sal = val;
|
||||
}
|
||||
}
|
||||
|
||||
collapsed_saliency(r,c) = max_sal;
|
||||
|
||||
// Also blend the saliency into the original input image so we can view it as
|
||||
// an overlay on the cars.
|
||||
rgb_alpha_pixel p;
|
||||
assign_pixel(p , colormap_jet(max_sal,lower,upper));
|
||||
p.alpha = 120;
|
||||
assign_pixel(img(r,c), p);
|
||||
}
|
||||
}
|
||||
|
||||
image_window win_collapsed(jet(collapsed_saliency, upper, lower), "collapsed saliency map");
|
||||
image_window win_img_and_sal(img);
|
||||
|
||||
|
||||
cout << "Hit enter to end program" << endl;
|
||||
cin.get();
|
||||
}
|
||||
catch(image_load_error& e)
|
||||
{
|
||||
cout << e.what() << endl;
|
||||
cout << "The test image is located in the examples folder. So you should run this program from a sub folder so that the relative path is correct." << endl;
|
||||
}
|
||||
catch(serialization_error& e)
|
||||
{
|
||||
cout << e.what() << endl;
|
||||
cout << "The model file can be obtained from: http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2 Don't forget to unzip the file." << endl;
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
cout << e.what() << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||
/*
|
||||
This example shows how to train a CNN based object detector using dlib's
|
||||
loss_mmod loss layer. This loss layer implements the Max-Margin Object
|
||||
Detection loss as described in the paper:
|
||||
Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046).
|
||||
This is the same loss used by the popular SVM+HOG object detector in dlib
|
||||
(see fhog_object_detector_ex.cpp) except here we replace the HOG features
|
||||
with a CNN and train the entire detector end-to-end. This allows us to make
|
||||
much more powerful detectors.
|
||||
|
||||
It would be a good idea to become familiar with dlib's DNN tooling before
|
||||
reading this example. So you should read dnn_introduction_ex.cpp and
|
||||
dnn_introduction2_ex.cpp before reading this example program. You should also read the
|
||||
DNN+MMOD example as well: dnn_mmod_ex.cpp
|
||||
|
||||
|
||||
This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train
|
||||
a detector that finds the rear ends of motor vehicles. I will also discuss some
|
||||
aspects of data preparation useful when training this kind of detector.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <dlib/dnn.h>
|
||||
#include <dlib/data_io.h>
|
||||
#include <dlib/dir_nav.h>
|
||||
#include <dlib/time_this.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
|
||||
|
||||
// the dnn vehicle detector network
|
||||
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
|
||||
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
|
||||
template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>;
|
||||
template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>;
|
||||
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
int ignore_overlapped_boxes(
|
||||
std::vector<mmod_rect>& boxes,
|
||||
const test_box_overlap& overlaps
|
||||
)
|
||||
{
|
||||
int num_ignored = 0;
|
||||
for (size_t i = 0; i < boxes.size(); ++i)
|
||||
{
|
||||
if (boxes[i].ignore)
|
||||
continue;
|
||||
for (size_t j = i+1; j < boxes.size(); ++j)
|
||||
{
|
||||
if (boxes[j].ignore)
|
||||
continue;
|
||||
if (overlaps(boxes[i], boxes[j]))
|
||||
{
|
||||
++num_ignored;
|
||||
if(boxes[i].rect.area() < boxes[j].rect.area())
|
||||
boxes[i].ignore = true;
|
||||
else
|
||||
boxes[j].ignore = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return num_ignored;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename pyramid_type,
|
||||
typename image_array_type
|
||||
>
|
||||
void upsample_image_dataset_limit (
|
||||
image_array_type& images,
|
||||
std::vector<std::vector<mmod_rect>>& objects
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT( images.size() == objects.size(),
|
||||
"\t void upsample_image_dataset_limit()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
<< "\n\t objects.size(): " << objects.size()
|
||||
);
|
||||
|
||||
typename image_array_type::value_type temp;
|
||||
pyramid_type pyr;
|
||||
for (unsigned long i = 0; i < images.size(); ++i)
|
||||
{
|
||||
if (images[i].size() < 1800*1800)
|
||||
{
|
||||
pyramid_up(images[i], temp, pyr);
|
||||
swap(temp, images[i]);
|
||||
for (unsigned long j = 0; j < objects[i].size(); ++j)
|
||||
{
|
||||
objects[i][j].rect = pyr.rect_up(objects[i][j].rect);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
int main(int argc, char** argv) try
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
cout << "Give the path to a folder containing training.xml and testing.xml files." << endl;
|
||||
cout << "This example program is specifically designed to run on the dlib vehicle " << endl;
|
||||
cout << "detection dataset, which is available at this URL: " << endl;
|
||||
cout << " http://dlib.net/files/data/dlib_rear_end_vehicles_v1.tar" << endl;
|
||||
cout << endl;
|
||||
cout << "So download that dataset, extract it somewhere, and then run this program" << endl;
|
||||
cout << "with the dlib_rear_end_vehicles folder as an argument. E.g. if you extract" << endl;
|
||||
cout << "the dataset to the current folder then you should run this example program" << endl;
|
||||
cout << "by typing: " << endl;
|
||||
cout << " ./dnn_mmod_train_find_cars_ex dlib_rear_end_vehicles" << endl;
|
||||
cout << endl;
|
||||
return 0;
|
||||
}
|
||||
const std::string data_directory = argv[1];
|
||||
|
||||
|
||||
std::vector<matrix<rgb_pixel>> images_train, images_test;
|
||||
std::vector<std::vector<mmod_rect>> boxes_train, boxes_test;
|
||||
load_image_dataset(images_train, boxes_train, data_directory+"/training.xml");
|
||||
load_image_dataset(images_test, boxes_test, data_directory+"/testing.xml");
|
||||
|
||||
|
||||
int num_overlapped_ignored_test = 0;
|
||||
for (auto& v : boxes_test)
|
||||
num_overlapped_ignored_test += ignore_overlapped_boxes(v, test_box_overlap(0.50, 0.99));
|
||||
|
||||
int num_overlapped_ignored = 0;
|
||||
int num_additional_ignored = 0;
|
||||
for (auto& v : boxes_train)
|
||||
{
|
||||
num_overlapped_ignored += ignore_overlapped_boxes(v, test_box_overlap(0.50, 0.99));
|
||||
for (auto& bb : v)
|
||||
{
|
||||
if (bb.rect.width() < 35 && bb.rect.height() < 35)
|
||||
{
|
||||
if (!bb.ignore)
|
||||
{
|
||||
bb.ignore = true;
|
||||
++num_additional_ignored;
|
||||
}
|
||||
}
|
||||
|
||||
// The dlib vehicle detection dataset doesn't contain any detections with
|
||||
// really extreme aspect ratios. However, some datasets do, often because of
|
||||
// bad labeling. So it's a good idea to check for that and either eliminate
|
||||
// those boxes or set them to ignore. Although, this depends on your
|
||||
// application.
|
||||
//
|
||||
// For instance, if your dataset has boxes with an aspect ratio
|
||||
// of 10 then you should think about what that means for the network
|
||||
// architecture. Does the receptive field even cover the entirety of the box
|
||||
// in those cases? Do you care about these boxes? Are they labeling errors?
|
||||
// I find that many people will download some dataset from the internet and
|
||||
// just take it as given. They run it through some training algorithm and take
|
||||
// the dataset as unchallengeable truth. But many datasets are full of
|
||||
// labeling errors. There are also a lot of datasets that aren't full of
|
||||
// errors, but are annotated in a sloppy and inconsistent way. Fixing those
|
||||
// errors and inconsistencies can often greatly improve models trained from
|
||||
// such data. It's almost always worth the time to try and improve your
|
||||
// training dataset.
|
||||
}
|
||||
}
|
||||
|
||||
cout << "num_overlapped_ignored: "<< num_overlapped_ignored << endl;
|
||||
cout << "num_additional_ignored: "<< num_additional_ignored << endl;
|
||||
cout << "num_overlapped_ignored_test: "<< num_overlapped_ignored_test << endl;
|
||||
|
||||
|
||||
cout << "num training images: " << images_train.size() << endl;
|
||||
cout << "num testing images: " << images_test.size() << endl;
|
||||
|
||||
|
||||
// Our vehicle detection dataset has basically 3 different types of boxes. Square
|
||||
// boxes, tall and skinny boxes (e.g. semi trucks), and short and wide boxes (e.g.
|
||||
// sedans). Here we are telling the MMOD algorithm that a vehicle is recognizable as
|
||||
// long as the longest box side is at least 70 pixels long and the shortest box side is
|
||||
// at least 30 pixels long. It will use these parameters to decide how large each of
|
||||
// the sliding windows need to be so as to be able to detect all the vehicles. Since
|
||||
// our dataset has basically only these 3 different aspect ratios, it will decide to
|
||||
// use 3 different sliding windows at the end of the network.
|
||||
mmod_options options(boxes_train, 70, 30);
|
||||
|
||||
// This setting is very important and dataset specific. The vehicle detection dataset
|
||||
// contains boxes that are marked as "ignore", as we discussed above. Some of them are
|
||||
// ignored because we set ignore to true on them in the above code. However, the xml
|
||||
// files already contained a lot of ignore boxes. Some of them are large boxes that
|
||||
// encompass large parts of an image and the intention is to have everything inside
|
||||
// those boxes be ignored. Therefore, we need to tell the MMOD algorithm to do that,
|
||||
// which we do by setting options.overlaps_ignore appropriately.
|
||||
//
|
||||
// But first, we need to understand exactly what this option does. The MMOD loss
|
||||
// is essentially counting the number of false alarms + missed detections, produced by
|
||||
// the detector, for each image. During training, the code is running the detector on
|
||||
// each image in a mini-batch and looking at it's output and counting the number of
|
||||
// mistakes. The optimizer tries to find parameters settings that minimize the number
|
||||
// of detector mistakes.
|
||||
//
|
||||
// This overlaps_ignore option allows you to tell the loss that some outputs from the
|
||||
// detector should be totally ignored, as if they never happened. In particular, if a
|
||||
// detection overlaps a box in the training data with ignore==true then that detection
|
||||
// is ignored. This overlap is determined by calling
|
||||
// options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns
|
||||
// true then that detection is ignored.
|
||||
//
|
||||
// You should read the documentation for test_box_overlap, the class type for
|
||||
// overlaps_ignore for full details. However, the gist is that the default behavior is
|
||||
// to only consider boxes as overlapping if their intersection over union is > 0.5.
|
||||
// However, the dlib vehicle detection dataset contains large boxes that are meant to
|
||||
// mask out large areas of an image. So intersection over union isn't an appropriate
|
||||
// way to measure "overlaps with box" in this case. We want any box that is contained
|
||||
// inside one of these big regions to be ignored, even if the detection box is really
|
||||
// small. So we set overlaps_ignore to behave that way with this line.
|
||||
options.overlaps_ignore = test_box_overlap(0.5, 0.95);
|
||||
|
||||
net_type net(options);
|
||||
// The final layer of the network must be a con_ layer that contains
|
||||
// options.detector_windows.size() filters. This is because these final filters are
|
||||
// what perform the final "sliding window" detection in the network.
|
||||
net.subnet().layer_details().set_num_filters(options.detector_windows.size());
|
||||
|
||||
dnn_trainer<net_type> trainer(net,sgd(0.0001,0.9));
|
||||
trainer.set_learning_rate(0.1);
|
||||
trainer.be_verbose();
|
||||
trainer.set_iterations_without_progress_threshold(50000);
|
||||
trainer.set_test_iterations_without_progress_threshold(1000);
|
||||
const string sync_filename = "mmod_cars_sync";
|
||||
trainer.set_synchronization_file(sync_filename, std::chrono::minutes(5));
|
||||
|
||||
|
||||
|
||||
|
||||
std::vector<matrix<rgb_pixel>> mini_batch_samples;
|
||||
std::vector<std::vector<mmod_rect>> mini_batch_labels;
|
||||
random_cropper cropper;
|
||||
cropper.set_seed(1);
|
||||
cropper.set_chip_dims(350, 350);
|
||||
cropper.set_min_object_size(0.20);
|
||||
cropper.set_max_rotation_degrees(2);
|
||||
dlib::rand rnd;
|
||||
cout << trainer << cropper << endl;
|
||||
|
||||
int cnt = 1;
|
||||
// Run the trainer until the learning rate gets small.
|
||||
while(trainer.get_learning_rate() >= 1e-4)
|
||||
{
|
||||
if (cnt%30 != 0 || images_test.size() == 0)
|
||||
{
|
||||
cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels);
|
||||
// We can also randomly jitter the colors and that often helps a detector
|
||||
// generalize better to new images.
|
||||
for (auto&& img : mini_batch_samples)
|
||||
disturb_colors(img, rnd);
|
||||
|
||||
// It's a good idea to, at least once, put code here that displays the images
|
||||
// and boxes the random cropper is generating. You should look at them and
|
||||
// think about if the output makes sense for your problem. Most of the time
|
||||
// it will be fine, but sometimes you will realize that the pattern of cropping
|
||||
// isn't really appropriate for your problem and you will need to make some
|
||||
// change to how the mini-batches are being generated. Maybe you will tweak
|
||||
// some of the cropper's settings, or write your own entirely separate code to
|
||||
// create mini-batches. But either way, if you don't look you will never know.
|
||||
// An easy way to do this is to create a dlib::image_window to display the
|
||||
// images and boxes.
|
||||
|
||||
trainer.train_one_step(mini_batch_samples, mini_batch_labels);
|
||||
}
|
||||
else
|
||||
{
|
||||
cropper(87, images_test, boxes_test, mini_batch_samples, mini_batch_labels);
|
||||
// We can also randomly jitter the colors and that often helps a detector
|
||||
// generalize better to new images.
|
||||
for (auto&& img : mini_batch_samples)
|
||||
disturb_colors(img, rnd);
|
||||
|
||||
trainer.test_one_step(mini_batch_samples, mini_batch_labels);
|
||||
}
|
||||
++cnt;
|
||||
}
|
||||
// wait for training threads to stop
|
||||
trainer.get_net();
|
||||
cout << "done training" << endl;
|
||||
|
||||
// Save the network to disk
|
||||
net.clean();
|
||||
serialize("mmod_rear_end_vehicle_detector.dat") << net;
|
||||
|
||||
|
||||
// It's a really good idea to print the training parameters. This is because you will
|
||||
// invariably be running multiple rounds of training and should be logging the output
|
||||
// to a log file. This print statement will include many of the training parameters in
|
||||
// your log.
|
||||
cout << trainer << cropper << endl;
|
||||
|
||||
cout << "\nsync_filename: " << sync_filename << endl;
|
||||
cout << "num training images: "<< images_train.size() << endl;
|
||||
cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore);
|
||||
upsample_image_dataset_limit<pyramid_down<2>>(images_train, boxes_train);
|
||||
cout << "training upsampled results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore);
|
||||
|
||||
|
||||
cout << "num testing images: "<< images_test.size() << endl;
|
||||
cout << "testing results: " << test_object_detection_function(net, images_test, boxes_test, test_box_overlap(), 0, options.overlaps_ignore);
|
||||
upsample_image_dataset_limit<pyramid_down<2>>(images_test, boxes_test);
|
||||
cout << "testing upsampled results: " << test_object_detection_function(net, images_test, boxes_test, test_box_overlap(), 0, options.overlaps_ignore);
|
||||
|
||||
/*
|
||||
This program takes many hours to execute on a high end GPU. It took about a day to
|
||||
train on an NVIDIA 1080ti. The resulting model file is available at
|
||||
http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2
|
||||
It should be noted that this file on dlib.net has a dlib::shape_predictor appended
|
||||
onto the end of it (see dnn_mmod_find_cars_ex.cpp for an example of its use). This
|
||||
explains why the model file on dlib.net is larger than the
|
||||
mmod_rear_end_vehicle_detector.dat output by this program.
|
||||
|
||||
Also, the training and testing accuracies were:
|
||||
|
||||
num training images: 2217
|
||||
training results: 0.990738 0.736431 0.736073
|
||||
training upsampled results: 0.986837 0.937694 0.936912
|
||||
num testing images: 135
|
||||
testing results: 0.988827 0.471372 0.470806
|
||||
testing upsampled results: 0.987879 0.651132 0.650399
|
||||
*/
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
cout << e.what() << endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 98 KiB |
Loading…
Reference in New Issue