From 351a6331e9501777387f7e96cae8948912fc9f9c Mon Sep 17 00:00:00 2001 From: Davis King Date: Sun, 13 Dec 2015 12:21:54 -0500 Subject: [PATCH] Added loss_multiclass_log_ --- dlib/dnn/loss.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index dfac4371c..1ae9fa4ef 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -204,6 +204,111 @@ namespace dlib template using loss_binary_log = add_loss_layer; +// ---------------------------------------------------------------------------------------- + + class loss_multiclass_log_ + { + public: + + const static unsigned int sample_expansion_factor = 1; + typedef unsigned long label_type; + + template < + typename SUB_TYPE, + typename label_iterator + > + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const + { + const tensor& output_tensor = sub.get_output(); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 ,""); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),""); + + + // Note that output_tensor.k() should match the number of labels. + + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + // The index of the largest output for this sample is the label. + *iter++ = index_of_max(rowm(mat(output_tensor),i)); + } + } + + + template < + typename const_label_iterator, + typename SUBNET + > + double compute_loss ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const + { + const tensor& output_tensor = sub.get_output(); + tensor& grad = sub.get_gradient_input(); + + DLIB_CASSERT(input_tensor.num_samples() != 0,""); + DLIB_CASSERT(input_tensor.num_samples()%sample_expansion_factor == 0,""); + DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples(),""); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),""); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1,""); + DLIB_CASSERT(grad.nr() == 1 && + grad.nc() == 1,""); + + tt::softmax(grad, output_tensor); + + // The loss we output is the average loss over the mini-batch. + const double scale = 1.0/output_tensor.num_samples(); + double loss = 0; + float* g = grad.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + const long y = (long)*truth++; + // The network must produce a number of outputs that is equal to the number + // of labels when using this type of loss. + DLIB_CASSERT(y < output_tensor.k(), "y: " << y << ", output_tensor.k(): " << output_tensor.k()); + for (long k = 0; k < output_tensor.k(); ++k) + { + const unsigned long idx = i*output_tensor.k()+k; + if (k == y) + { + loss += scale*-std::log(g[idx]); + g[idx] = scale*(g[idx]-1); + } + else + { + g[idx] = scale*g[idx]; + } + } + } + return loss; + } + + friend void serialize(const loss_multiclass_log_& , std::ostream& out) + { + serialize("loss_multiclass_log_", out); + } + + friend void deserialize(loss_multiclass_log_& , std::istream& in) + { + std::string version; + deserialize(version, in); + if (version != "loss_multiclass_log_") + throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_."); + } + + }; + + template + using loss_multiclass_log = add_loss_layer; + // ---------------------------------------------------------------------------------------- }