From cd4b62b494773ccc23b5ae21c9fd28322c49b7fb Mon Sep 17 00:00:00 2001 From: Dennis Francis Date: Wed, 23 Nov 2016 14:44:33 +0530 Subject: [PATCH 1/3] feature_addition : Added a mean squared loss layer to DNN Added mean squared loss layer "loss_mean_squared" to DNN as requested in https://github.com/davisking/dlib/issues/152 Also added test case of a simple linear regression with one variable that uses this layer. --- dlib/dnn/loss.h | 107 +++++++++++++++++++++++++++++++++++++++ dlib/dnn/loss_abstract.h | 58 +++++++++++++++++++++ dlib/test/dnn.cpp | 49 ++++++++++++++++++ 3 files changed, 214 insertions(+) diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index 1a4966b65..e233feb08 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -1292,6 +1292,113 @@ namespace dlib template using loss_metric_hardish = add_loss_layer; +// ---------------------------------------------------------------------------------------- + + class loss_mean_squared_ + { + public: + + typedef float training_label_type; + typedef float output_label_type; + + template < + typename SUB_TYPE, + typename label_iterator + > + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const + { + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + + const tensor& output_tensor = sub.get_output(); + + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + *iter++ = out_data[i]; + } + } + + + template < + typename const_label_iterator, + typename SUBNET + > + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const + { + const tensor& output_tensor = sub.get_output(); + tensor& grad = sub.get_gradient_input(); + + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + DLIB_CASSERT(input_tensor.num_samples() != 0); + DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); + DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(grad.nr() == 1 && + grad.nc() == 1 && + grad.k() == 1); + + // The loss we output is the average loss over the mini-batch. + const double scale = 1.0/output_tensor.num_samples(); + double loss = 0; + float* g = grad.host_write_only(); + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + const float y = *truth++; + const float temp1 = y - out_data[i]; + const float temp2 = scale*temp1; + loss += 0.5*temp2*temp1; + g[i] = -temp2; + + } + return loss; + } + + friend void serialize(const loss_mean_squared_& , std::ostream& out) + { + serialize("loss_mean_squared_", out); + } + + friend void deserialize(loss_mean_squared_& , std::istream& in) + { + std::string version; + deserialize(version, in); + if (version != "loss_mean_squared_") + throw serialization_error("Unexpected version found while deserializing dlib::loss_mean_squared_."); + } + + friend std::ostream& operator<<(std::ostream& out, const loss_mean_squared_& ) + { + out << "loss_mean_squared"; + return out; + } + + friend void to_xml(const loss_mean_squared_& /*item*/, std::ostream& out) + { + out << ""; + } + + }; + + template + using loss_mean_squared = add_loss_layer; + // ---------------------------------------------------------------------------------------- } diff --git a/dlib/dnn/loss_abstract.h b/dlib/dnn/loss_abstract.h index e8227c5f3..3587607a7 100644 --- a/dlib/dnn/loss_abstract.h +++ b/dlib/dnn/loss_abstract.h @@ -527,6 +527,64 @@ namespace dlib // ---------------------------------------------------------------------------------------- + class loss_mean_squared_ + { + /*! + WHAT THIS OBJECT REPRESENTS + This object implements the loss layer interface defined above by + EXAMPLE_LOSS_LAYER_. In particular, it implements the mean squared loss, which is + appropriate for regression problems. + !*/ + public: + + typedef float training_label_type; + typedef float output_label_type; + + template < + typename SUB_TYPE, + typename label_iterator + > + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const; + /*! + This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except + it has the additional calling requirements that: + - sub.get_output().nr() == 1 + - sub.get_output().nc() == 1 + - sub.get_output().k() == 1 + - sub.get_output().num_samples() == input_tensor.num_samples() + - sub.sample_expansion_factor() == 1 + and the output label is the predicted continuous variable. + !*/ + + template < + typename const_label_iterator, + typename SUBNET + > + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const; + /*! + This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient() + except it has the additional calling requirements that: + - sub.get_output().nr() == 1 + - sub.get_output().nc() == 1 + - sub.get_output().k() == 1 + - sub.get_output().num_samples() == input_tensor.num_samples() + - sub.sample_expansion_factor() == 1 + !*/ + + }; + + template + using loss_mean_squared = add_loss_layer; + + } #endif // DLIB_DNn_LOSS_ABSTRACT_H_ diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index bb70df011..2b0cb1c41 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "../dnn.h" #include "tester.h" @@ -1737,6 +1738,53 @@ namespace error = memcmp(g3.host(), b3g.host(), b3g.size()); DLIB_TEST(error == 0); } + +// ---------------------------------------------------------------------------------------- + + void test_simple_linear_regression() + { + ::std::vector> x(100); + ::std::vector y(100); + ::std::default_random_engine generator(16); + ::std::normal_distribution distribution(0,5); + const float true_intercept = 50.0; + const float true_slope = 10.0; + for ( int ii = 0; ii < 100; ++ii ) + { + const double val = static_cast(ii); + matrix tmp(1,1); + tmp = val; + x[ii] = tmp; + y[ii] = (true_intercept + true_slope*static_cast(val) + distribution(generator)); + } + + using net_type = loss_mean_squared< + fc< + 1, input> + > + >; + net_type net; + layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); + sgd defsolver; + dnn_trainer trainer(net, defsolver); + trainer.set_learning_rate(0.00001); + trainer.set_mini_batch_size(50); + trainer.set_max_num_epochs(170); + trainer.train(x, y); + + const float slope = layer<1>(net).layer_details().get_weights().host()[0]; + const float slope_error = abs(true_slope - slope); + const float intercept = layer<1>(net).layer_details().get_biases().host()[0]; + const float intercept_error = abs(true_intercept - intercept); + const float eps_slope = 0.5, eps_intercept = 1.0; + + DLIB_TEST_MSG(slope_error <= eps_slope, + "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope); + DLIB_TEST_MSG(intercept_error <= eps_intercept, + "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept); + + } + // ---------------------------------------------------------------------------------------- class dnn_tester : public tester @@ -1804,6 +1852,7 @@ namespace test_visit_funcions(); test_copy_tensor_cpu(); test_concat(); + test_simple_linear_regression(); } void perform_test() From af76e82633acf974d4ab77c9bcf2b6e28e6707d2 Mon Sep 17 00:00:00 2001 From: Dennis Francis Date: Fri, 25 Nov 2016 22:15:39 +0530 Subject: [PATCH 2/3] converted tabs to spaces in the indentation --- dlib/dnn/loss.h | 108 +++++++++++++++++++++++----------------------- dlib/test/dnn.cpp | 74 +++++++++++++++---------------- 2 files changed, 91 insertions(+), 91 deletions(-) diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index e233feb08..d34e78620 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -1305,70 +1305,70 @@ namespace dlib typename SUB_TYPE, typename label_iterator > - void to_label ( - const tensor& input_tensor, - const SUB_TYPE& sub, - label_iterator iter - ) const - { - DLIB_CASSERT(sub.sample_expansion_factor() == 1); - - const tensor& output_tensor = sub.get_output(); - - DLIB_CASSERT(output_tensor.nr() == 1 && - output_tensor.nc() == 1 && - output_tensor.k() == 1); - DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); - - const float* out_data = output_tensor.host(); - for (long i = 0; i < output_tensor.num_samples(); ++i) + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const { - *iter++ = out_data[i]; + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + + const tensor& output_tensor = sub.get_output(); + + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + *iter++ = out_data[i]; + } } - } template < typename const_label_iterator, typename SUBNET > - double compute_loss_value_and_gradient ( - const tensor& input_tensor, - const_label_iterator truth, - SUBNET& sub - ) const - { - const tensor& output_tensor = sub.get_output(); - tensor& grad = sub.get_gradient_input(); - - DLIB_CASSERT(sub.sample_expansion_factor() == 1); - DLIB_CASSERT(input_tensor.num_samples() != 0); - DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); - DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); - DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); - DLIB_CASSERT(output_tensor.nr() == 1 && - output_tensor.nc() == 1 && - output_tensor.k() == 1); - DLIB_CASSERT(grad.nr() == 1 && - grad.nc() == 1 && - grad.k() == 1); - - // The loss we output is the average loss over the mini-batch. - const double scale = 1.0/output_tensor.num_samples(); - double loss = 0; - float* g = grad.host_write_only(); - const float* out_data = output_tensor.host(); - for (long i = 0; i < output_tensor.num_samples(); ++i) + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const { - const float y = *truth++; - const float temp1 = y - out_data[i]; - const float temp2 = scale*temp1; - loss += 0.5*temp2*temp1; - g[i] = -temp2; + const tensor& output_tensor = sub.get_output(); + tensor& grad = sub.get_gradient_input(); + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + DLIB_CASSERT(input_tensor.num_samples() != 0); + DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); + DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(grad.nr() == 1 && + grad.nc() == 1 && + grad.k() == 1); + + // The loss we output is the average loss over the mini-batch. + const double scale = 1.0/output_tensor.num_samples(); + double loss = 0; + float* g = grad.host_write_only(); + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + const float y = *truth++; + const float temp1 = y - out_data[i]; + const float temp2 = scale*temp1; + loss += 0.5*temp2*temp1; + g[i] = -temp2; + + } + return loss; } - return loss; - } friend void serialize(const loss_mean_squared_& , std::ostream& out) { @@ -1397,7 +1397,7 @@ namespace dlib }; template - using loss_mean_squared = add_loss_layer; + using loss_mean_squared = add_loss_layer; // ---------------------------------------------------------------------------------------- diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index 2b0cb1c41..e5ffc4c6c 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -1743,45 +1743,45 @@ namespace void test_simple_linear_regression() { - ::std::vector> x(100); - ::std::vector y(100); - ::std::default_random_engine generator(16); - ::std::normal_distribution distribution(0,5); - const float true_intercept = 50.0; - const float true_slope = 10.0; - for ( int ii = 0; ii < 100; ++ii ) - { - const double val = static_cast(ii); - matrix tmp(1,1); - tmp = val; - x[ii] = tmp; - y[ii] = (true_intercept + true_slope*static_cast(val) + distribution(generator)); - } + ::std::vector> x(100); + ::std::vector y(100); + ::std::default_random_engine generator(16); + ::std::normal_distribution distribution(0,5); + const float true_intercept = 50.0; + const float true_slope = 10.0; + for ( int ii = 0; ii < 100; ++ii ) + { + const double val = static_cast(ii); + matrix tmp(1,1); + tmp = val; + x[ii] = tmp; + y[ii] = (true_intercept + true_slope*static_cast(val) + distribution(generator)); + } - using net_type = loss_mean_squared< - fc< - 1, input> - > - >; - net_type net; - layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); - sgd defsolver; - dnn_trainer trainer(net, defsolver); - trainer.set_learning_rate(0.00001); - trainer.set_mini_batch_size(50); - trainer.set_max_num_epochs(170); - trainer.train(x, y); + using net_type = loss_mean_squared< + fc< + 1, input> + > + >; + net_type net; + layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); + sgd defsolver; + dnn_trainer trainer(net, defsolver); + trainer.set_learning_rate(0.00001); + trainer.set_mini_batch_size(50); + trainer.set_max_num_epochs(170); + trainer.train(x, y); - const float slope = layer<1>(net).layer_details().get_weights().host()[0]; - const float slope_error = abs(true_slope - slope); - const float intercept = layer<1>(net).layer_details().get_biases().host()[0]; - const float intercept_error = abs(true_intercept - intercept); - const float eps_slope = 0.5, eps_intercept = 1.0; + const float slope = layer<1>(net).layer_details().get_weights().host()[0]; + const float slope_error = abs(true_slope - slope); + const float intercept = layer<1>(net).layer_details().get_biases().host()[0]; + const float intercept_error = abs(true_intercept - intercept); + const float eps_slope = 0.5, eps_intercept = 1.0; - DLIB_TEST_MSG(slope_error <= eps_slope, - "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope); - DLIB_TEST_MSG(intercept_error <= eps_intercept, - "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept); + DLIB_TEST_MSG(slope_error <= eps_slope, + "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope); + DLIB_TEST_MSG(intercept_error <= eps_intercept, + "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept); } @@ -1852,7 +1852,7 @@ namespace test_visit_funcions(); test_copy_tensor_cpu(); test_concat(); - test_simple_linear_regression(); + test_simple_linear_regression(); } void perform_test() From d4da6c53b6a469027612e2006bf3c598e151a627 Mon Sep 17 00:00:00 2001 From: Dennis Francis Date: Sat, 26 Nov 2016 09:07:36 +0530 Subject: [PATCH 3/3] adapt to dlib indentation style --- dlib/dnn/loss.h | 108 +++++++++++++++++++-------------------- dlib/dnn/loss_abstract.h | 1 - dlib/test/dnn.cpp | 6 +-- 3 files changed, 55 insertions(+), 60 deletions(-) diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index d34e78620..6a52f1f85 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -1305,70 +1305,70 @@ namespace dlib typename SUB_TYPE, typename label_iterator > - void to_label ( - const tensor& input_tensor, - const SUB_TYPE& sub, - label_iterator iter - ) const + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const + { + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + + const tensor& output_tensor = sub.get_output(); + + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) { - DLIB_CASSERT(sub.sample_expansion_factor() == 1); - - const tensor& output_tensor = sub.get_output(); - - DLIB_CASSERT(output_tensor.nr() == 1 && - output_tensor.nc() == 1 && - output_tensor.k() == 1); - DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); - - const float* out_data = output_tensor.host(); - for (long i = 0; i < output_tensor.num_samples(); ++i) - { - *iter++ = out_data[i]; - } + *iter++ = out_data[i]; } + } template < typename const_label_iterator, typename SUBNET > - double compute_loss_value_and_gradient ( - const tensor& input_tensor, - const_label_iterator truth, - SUBNET& sub - ) const + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const + { + const tensor& output_tensor = sub.get_output(); + tensor& grad = sub.get_gradient_input(); + + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + DLIB_CASSERT(input_tensor.num_samples() != 0); + DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); + DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(grad.nr() == 1 && + grad.nc() == 1 && + grad.k() == 1); + + // The loss we output is the average loss over the mini-batch. + const double scale = 1.0/output_tensor.num_samples(); + double loss = 0; + float* g = grad.host_write_only(); + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) { - const tensor& output_tensor = sub.get_output(); - tensor& grad = sub.get_gradient_input(); + const float y = *truth++; + const float temp1 = y - out_data[i]; + const float temp2 = scale*temp1; + loss += 0.5*temp2*temp1; + g[i] = -temp2; - DLIB_CASSERT(sub.sample_expansion_factor() == 1); - DLIB_CASSERT(input_tensor.num_samples() != 0); - DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); - DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); - DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); - DLIB_CASSERT(output_tensor.nr() == 1 && - output_tensor.nc() == 1 && - output_tensor.k() == 1); - DLIB_CASSERT(grad.nr() == 1 && - grad.nc() == 1 && - grad.k() == 1); - - // The loss we output is the average loss over the mini-batch. - const double scale = 1.0/output_tensor.num_samples(); - double loss = 0; - float* g = grad.host_write_only(); - const float* out_data = output_tensor.host(); - for (long i = 0; i < output_tensor.num_samples(); ++i) - { - const float y = *truth++; - const float temp1 = y - out_data[i]; - const float temp2 = scale*temp1; - loss += 0.5*temp2*temp1; - g[i] = -temp2; - - } - return loss; } + return loss; + } friend void serialize(const loss_mean_squared_& , std::ostream& out) { @@ -1397,7 +1397,7 @@ namespace dlib }; template - using loss_mean_squared = add_loss_layer; + using loss_mean_squared = add_loss_layer; // ---------------------------------------------------------------------------------------- diff --git a/dlib/dnn/loss_abstract.h b/dlib/dnn/loss_abstract.h index 3587607a7..a51d5f88c 100644 --- a/dlib/dnn/loss_abstract.h +++ b/dlib/dnn/loss_abstract.h @@ -584,7 +584,6 @@ namespace dlib template using loss_mean_squared = add_loss_layer; - } #endif // DLIB_DNn_LOSS_ABSTRACT_H_ diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index e5ffc4c6c..98cc32202 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -1758,11 +1758,7 @@ namespace y[ii] = (true_intercept + true_slope*static_cast(val) + distribution(generator)); } - using net_type = loss_mean_squared< - fc< - 1, input> - > - >; + using net_type = loss_mean_squared>>>; net_type net; layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); sgd defsolver;