mirror of https://github.com/davisking/dlib.git
merged
This commit is contained in:
commit
d207348af5
|
@ -328,12 +328,14 @@ namespace dlib
|
|||
rs.clear();
|
||||
}
|
||||
|
||||
void set_setep_size (
|
||||
void set_step_size (
|
||||
double ss
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(ss > 0,"");
|
||||
wait_for_thread_to_pause();
|
||||
if (step_size != ss)
|
||||
previous_loss_values.clear();
|
||||
step_size = ss;
|
||||
}
|
||||
|
||||
|
@ -391,24 +393,33 @@ namespace dlib
|
|||
resizable_tensor t;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void run_update(job_t& next_job, const T&)
|
||||
void record_loss(double loss)
|
||||
{
|
||||
double loss = net.update(next_job.t, next_job.labels.begin(), make_sstack(solvers),step_size);
|
||||
// Say that we will check if the gradient is bad 200 times during each
|
||||
// iter_between_step_size_adjust interval of network updates. This kind of
|
||||
// budgeting causes our gradient checking to use a fixed amount of
|
||||
// computational resources, regardless of the size of
|
||||
// iter_between_step_size_adjust.
|
||||
gradient_check_budget += 200;
|
||||
|
||||
rs.add(loss);
|
||||
previous_loss_values.push_back(loss);
|
||||
if (previous_loss_values.size() > iter_between_step_size_adjust)
|
||||
previous_loss_values.pop_front();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void run_update(job_t& next_job, const T&)
|
||||
{
|
||||
double loss = net.update(next_job.t, next_job.labels.begin(), make_sstack(solvers),step_size);
|
||||
record_loss(loss);
|
||||
}
|
||||
|
||||
void run_update(job_t& next_job, const no_label_type&)
|
||||
{
|
||||
no_label_type pick_wich_run_update;
|
||||
double loss = net.update(next_job.t, make_sstack(solvers), step_size);
|
||||
rs.add(loss);
|
||||
previous_loss_values.push_back(loss);
|
||||
if (previous_loss_values.size() > iter_between_step_size_adjust)
|
||||
previous_loss_values.pop_front();
|
||||
record_loss(loss);
|
||||
}
|
||||
|
||||
void thread() try
|
||||
|
@ -425,9 +436,14 @@ namespace dlib
|
|||
run_update(next_job, pick_wich_run_update);
|
||||
|
||||
// If we have been running for a while then check if the loss is still
|
||||
// dropping. If it isn't then we will reduce the step size.
|
||||
if (previous_loss_values.size() >= iter_between_step_size_adjust)
|
||||
// dropping. If it isn't then we will reduce the step size. Note that we
|
||||
// have a "budget" that prevents us from calling
|
||||
// probability_gradient_greater_than() every iteration. We do this because
|
||||
// it can be expensive to compute when previous_loss_values is large.
|
||||
if (previous_loss_values.size() >= iter_between_step_size_adjust &&
|
||||
gradient_check_budget > previous_loss_values.size())
|
||||
{
|
||||
gradient_check_budget = 0;
|
||||
if (probability_gradient_greater_than(previous_loss_values, 0) > 0.49)
|
||||
{
|
||||
step_size = step_size_shrink*step_size;
|
||||
|
@ -458,12 +474,13 @@ namespace dlib
|
|||
verbose = false;
|
||||
cuda_device_id = dlib::cuda::get_device();
|
||||
step_size = 1;
|
||||
min_step_size = 1e-4;
|
||||
min_step_size = 1e-3;
|
||||
iter_between_step_size_adjust = 2000;
|
||||
step_size_shrink = 0.1;
|
||||
epoch_iteration = 0;
|
||||
epoch_pos = 0;
|
||||
train_one_step_calls = 0;
|
||||
gradient_check_budget = 0;
|
||||
start();
|
||||
}
|
||||
|
||||
|
@ -575,7 +592,7 @@ namespace dlib
|
|||
std::vector<solver_type> solvers;
|
||||
std::atomic<double> step_size;
|
||||
double min_step_size;
|
||||
std::atomic<long> iter_between_step_size_adjust;
|
||||
std::atomic<unsigned long> iter_between_step_size_adjust;
|
||||
std::atomic<double> step_size_shrink;
|
||||
std::chrono::time_point<std::chrono::system_clock> last_sync_time;
|
||||
std::string sync_filename;
|
||||
|
@ -584,6 +601,7 @@ namespace dlib
|
|||
unsigned long epoch_pos;
|
||||
std::chrono::time_point<std::chrono::system_clock> last_time;
|
||||
unsigned long long train_one_step_calls;
|
||||
unsigned long gradient_check_budget;
|
||||
|
||||
// The job object is not logically part of the state of this object. It is here
|
||||
// only to avoid reallocating it over and over.
|
||||
|
|
|
@ -60,7 +60,7 @@ namespace dlib
|
|||
- #get_max_num_epochs() == 10000
|
||||
- #get_mini_batch_size() == 128
|
||||
- #get_step_size() == 1
|
||||
- #get_min_step_size() == 1e-4
|
||||
- #get_min_step_size() == 1e-3
|
||||
- #get_iterations_between_step_size_adjust() == 2000
|
||||
- #get_step_size_shrink() == 0.1
|
||||
!*/
|
||||
|
@ -149,7 +149,7 @@ namespace dlib
|
|||
- #get_max_num_epochs() == num
|
||||
!*/
|
||||
|
||||
void set_setep_size (
|
||||
void set_step_size (
|
||||
double ss
|
||||
);
|
||||
/*!
|
||||
|
|
|
@ -41,7 +41,9 @@ void randomly_crop_image (
|
|||
)
|
||||
{
|
||||
// figure out what rectangle we want to crop from the image
|
||||
auto scale = 1-rnd.get_random_double()*0.2;
|
||||
//auto scale = 1-rnd.get_random_double()*0.2;
|
||||
double mins = 0.466666666, maxs = 0.875;
|
||||
auto scale = mins + rnd.get_random_double()*(maxs-mins);
|
||||
auto size = scale*std::min(img.nr(), img.nc());
|
||||
rectangle rect(size, size);
|
||||
// randomly shift the box around
|
||||
|
@ -49,8 +51,8 @@ void randomly_crop_image (
|
|||
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
|
||||
rect = move_rect(rect, offset);
|
||||
|
||||
// now crop it out as a 250x250 image.
|
||||
extract_image_chip(img, chip_details(rect, chip_dims(250,250)), crop);
|
||||
// now crop it out as a 224x224 image.
|
||||
extract_image_chip(img, chip_details(rect, chip_dims(224,224)), crop);
|
||||
|
||||
// Also randomly flip the image
|
||||
if (rnd.get_random_double() > 0.5)
|
||||
|
@ -71,7 +73,9 @@ void randomly_crop_images (
|
|||
for (long i = 0; i < num_crops; ++i)
|
||||
{
|
||||
// figure out what rectangle we want to crop from the image
|
||||
auto scale = 1-rnd.get_random_double()*0.2;
|
||||
//auto scale = 1-rnd.get_random_double()*0.2;
|
||||
double mins = 0.466666666, maxs = 0.875;
|
||||
auto scale = mins + rnd.get_random_double()*(maxs-mins);
|
||||
auto size = scale*std::min(img.nr(), img.nc());
|
||||
rectangle rect(size, size);
|
||||
// randomly shift the box around
|
||||
|
@ -79,7 +83,7 @@ void randomly_crop_images (
|
|||
rnd.get_random_32bit_number()%(img.nr()-rect.height()));
|
||||
rect = move_rect(rect, offset);
|
||||
|
||||
dets.push_back(chip_details(rect, chip_dims(250,250)));
|
||||
dets.push_back(chip_details(rect, chip_dims(224,224)));
|
||||
}
|
||||
|
||||
extract_image_chips(img, dets, crops);
|
||||
|
@ -104,7 +108,7 @@ struct image_info
|
|||
unsigned long numeric_label;
|
||||
};
|
||||
|
||||
std::vector<image_info> get_mit67_listing(
|
||||
std::vector<image_info> get_imagenet_listing(
|
||||
const std::string& images_folder
|
||||
)
|
||||
{
|
||||
|
@ -147,9 +151,10 @@ int main(int argc, char** argv) try
|
|||
return 1;
|
||||
}
|
||||
|
||||
auto listing = get_mit67_listing(argv[1]);
|
||||
auto listing = get_imagenet_listing(argv[1]);
|
||||
cout << "images in dataset: " << listing.size() << endl;
|
||||
if (listing.size() == 0 || listing.back().numeric_label != 66)
|
||||
const auto number_of_classes = listing.back().numeric_label+1;
|
||||
if (listing.size() == 0 || number_of_classes != 1000)
|
||||
{
|
||||
cout << "Didn't find the MIT 67 scene dataset. Are you sure you gave the correct folder?" << endl;
|
||||
cout << "Give the Images folder as an argument to this program." << endl;
|
||||
|
@ -161,21 +166,21 @@ int main(int argc, char** argv) try
|
|||
const double weight_decay = sa = argv[2];
|
||||
|
||||
typedef loss_multiclass_log<fc<avg_pool<
|
||||
res<res<
|
||||
res<res<
|
||||
res<res<
|
||||
res<res<
|
||||
res<res<res<
|
||||
res<res<res<res<res<res<
|
||||
res<res<res<res<
|
||||
res<res<res<
|
||||
max_pool<relu<bn<con<
|
||||
input<matrix<rgb_pixel>
|
||||
>>>>>>>>>>>>>>>> net_type;
|
||||
>>>>>>>>>>>>>>>>>>>>>>>> net_type;
|
||||
|
||||
|
||||
net_type net(fc_(67),
|
||||
net_type net(fc_(number_of_classes),
|
||||
avg_pool_(1000,1000,1000,1000),
|
||||
res_(512),res_(512,2),
|
||||
res_(256),res_(256,2),
|
||||
res_(128),res_(128,2),
|
||||
res_(64), res_(64),
|
||||
res_(512),res_(512),res_(512,2),
|
||||
res_(256),res_(256),res_(256),res_(256),res_(256),res_(256,2),
|
||||
res_(128),res_(128),res_(128),res_(128,2),
|
||||
res_(64), res_(64), res_(64),
|
||||
max_pool_(3,3,2,2), relu_(), bn_(CONV_MODE), con_(64,7,7,2,2)
|
||||
);
|
||||
|
||||
|
@ -185,12 +190,13 @@ int main(int argc, char** argv) try
|
|||
|
||||
dnn_trainer<net_type> trainer(net,sgd(initial_step_size, weight_decay));
|
||||
trainer.be_verbose();
|
||||
trainer.set_synchronization_file("mit67_sync3_"+cast_to_string(weight_decay), std::chrono::minutes(5));
|
||||
trainer.set_synchronization_file("sync_imagenet_full_training_set_40000_minstep_"+cast_to_string(weight_decay), std::chrono::minutes(5));
|
||||
trainer.set_iterations_between_step_size_adjust(40000);
|
||||
std::vector<matrix<rgb_pixel>> samples;
|
||||
std::vector<unsigned long> labels;
|
||||
|
||||
randomize_samples(listing);
|
||||
const size_t training_part = listing.size()*0.7;
|
||||
const size_t training_part = listing.size()*1.0;
|
||||
|
||||
dlib::rand rnd;
|
||||
|
||||
|
@ -198,14 +204,14 @@ int main(int argc, char** argv) try
|
|||
const bool do_training = true;
|
||||
if (do_training)
|
||||
{
|
||||
while(trainer.get_step_size() >= 1e-4)
|
||||
while(trainer.get_step_size() >= 1e-3)
|
||||
{
|
||||
samples.clear();
|
||||
labels.clear();
|
||||
|
||||
// make a 64 image mini-batch
|
||||
// make a 128 image mini-batch
|
||||
matrix<rgb_pixel> img, crop;
|
||||
while(samples.size() < 64)
|
||||
while(samples.size() < 128)
|
||||
{
|
||||
auto l = listing[rnd.get_random_32bit_number()%training_part];
|
||||
load_image(img, l.filename);
|
||||
|
@ -222,25 +228,25 @@ int main(int argc, char** argv) try
|
|||
|
||||
net.clean();
|
||||
cout << "saving network" << endl;
|
||||
serialize("mit67_network3_"+cast_to_string(weight_decay)+".dat") << net;
|
||||
serialize("imagenet_full_training_set_40000_minstep_"+cast_to_string(weight_decay)+".dat") << net;
|
||||
}
|
||||
|
||||
|
||||
const bool test_network = true;
|
||||
const bool test_network = false;
|
||||
if (test_network)
|
||||
{
|
||||
|
||||
typedef loss_multiclass_log<fc<avg_pool<
|
||||
ares<ares<
|
||||
ares<ares<
|
||||
ares<ares<
|
||||
ares<ares<
|
||||
ares<ares<ares<
|
||||
ares<ares<ares<ares<ares<ares<
|
||||
ares<ares<ares<ares<
|
||||
ares<ares<ares<
|
||||
max_pool<relu<affine<con<
|
||||
input<matrix<rgb_pixel>
|
||||
>>>>>>>>>>>>>>>> anet_type;
|
||||
>>>>>>>>>>>>>>>>>>>>>>>> anet_type;
|
||||
|
||||
anet_type net;
|
||||
deserialize("mit67_network3_"+cast_to_string(weight_decay)+".dat") >> net;
|
||||
deserialize("imagenet_network3_"+cast_to_string(weight_decay)+".dat") >> net;
|
||||
|
||||
dlib::array<matrix<rgb_pixel>> images;
|
||||
std::vector<unsigned long> labels;
|
||||
|
@ -249,6 +255,7 @@ int main(int argc, char** argv) try
|
|||
int num_right = 0;
|
||||
int num_wrong = 0;
|
||||
console_progress_indicator pbar(training_part);
|
||||
/*
|
||||
for (size_t i = 0; i < training_part; ++i)
|
||||
{
|
||||
pbar.print_status(i);
|
||||
|
@ -261,6 +268,7 @@ int main(int argc, char** argv) try
|
|||
else
|
||||
++num_wrong;
|
||||
}
|
||||
*/
|
||||
|
||||
cout << "\ntraining num_right: " << num_right << endl;
|
||||
cout << "training num_wrong: " << num_wrong << endl;
|
||||
|
|
Loading…
Reference in New Issue