Moved log1pexp() and randomize_parameters() from core.h into utilities.h

This commit is contained in:
Davis King 2016-06-11 23:08:51 -04:00
parent 6578c1b574
commit 172647f87c
5 changed files with 66 additions and 61 deletions

View File

@ -112,23 +112,6 @@ namespace dlib
} }
} }
// ----------------------------------------------------------------------------------------
inline double log1pexp(double x)
{
using std::exp;
using namespace std; // Do this instead of using std::log1p because some compilers
// error out otherwise (E.g. gcc 4.9 in cygwin)
if (x <= -37)
return exp(x);
else if (-37 < x && x <= 18)
return log1p(exp(x));
else if (18 < x && x <= 33.3)
return x + exp(-x);
else
return x;
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// Tell us if T is one of the special layer types (i.e. add_layer, repeat, add_tag_layer, or // Tell us if T is one of the special layer types (i.e. add_layer, repeat, add_tag_layer, or
@ -442,24 +425,6 @@ namespace dlib
{ {
return item; return item;
} }
// ----------------------------------------------------------------------------------------
inline void randomize_parameters (
tensor& params,
unsigned long num_inputs_and_outputs,
dlib::rand& rnd
)
{
for (auto& val : params)
{
// Draw a random number to initialize the layer according to formula (16)
// from Understanding the difficulty of training deep feedforward neural
// networks by Xavier Glorot and Yoshua Bengio.
val = 2*rnd.get_random_float()-1;
val *= std::sqrt(6.0/(num_inputs_and_outputs));
}
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T> template <typename T>

View File

@ -16,23 +16,6 @@ namespace dlib
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
void randomize_parameters (
tensor& params,
unsigned long num_inputs_and_outputs,
dlib::rand& rnd
);
/*!
ensures
- This function assigns random values into params based on the given random
number generator. In particular, it uses the parameter initialization method
of formula 16 from the paper "Understanding the difficulty of training deep
feedforward neural networks" by Xavier Glorot and Yoshua Bengio.
- It is assumed that the total number of inputs and outputs from the layer is
num_inputs_and_outputs. That is, you should set num_inputs_and_outputs to
the sum of the dimensionalities of the vectors going into and out of the
layer that uses params as its parameters.
!*/
template < template <
typename... T typename... T
> >
@ -58,15 +41,6 @@ namespace dlib
a non-std::tuple object is found. a non-std::tuple object is found.
!*/ !*/
double log1pexp(
double x
);
/*!
ensures
- returns log(1+exp(x))
(except computes it using a numerically accurate method)
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename T> template <typename T>

View File

@ -12,6 +12,7 @@
#include "../string.h" #include "../string.h"
#include "tensor_tools.h" #include "tensor_tools.h"
#include "../vectorstream.h" #include "../vectorstream.h"
#include "utilities.h"
namespace dlib namespace dlib

View File

@ -9,6 +9,41 @@
namespace dlib namespace dlib
{ {
// ----------------------------------------------------------------------------------------
inline double log1pexp(double x)
{
using std::exp;
using namespace std; // Do this instead of using std::log1p because some compilers
// error out otherwise (E.g. gcc 4.9 in cygwin)
if (x <= -37)
return exp(x);
else if (-37 < x && x <= 18)
return log1p(exp(x));
else if (18 < x && x <= 33.3)
return x + exp(-x);
else
return x;
}
// ----------------------------------------------------------------------------------------
inline void randomize_parameters (
tensor& params,
unsigned long num_inputs_and_outputs,
dlib::rand& rnd
)
{
for (auto& val : params)
{
// Draw a random number to initialize the layer according to formula (16)
// from Understanding the difficulty of training deep feedforward neural
// networks by Xavier Glorot and Yoshua Bengio.
val = 2*rnd.get_random_float()-1;
val *= std::sqrt(6.0/(num_inputs_and_outputs));
}
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl namespace impl

View File

@ -8,6 +8,36 @@
namespace dlib namespace dlib
{ {
// ----------------------------------------------------------------------------------------
double log1pexp(
double x
);
/*!
ensures
- returns log(1+exp(x))
(except computes it using a numerically accurate method)
!*/
// ----------------------------------------------------------------------------------------
void randomize_parameters (
tensor& params,
unsigned long num_inputs_and_outputs,
dlib::rand& rnd
);
/*!
ensures
- This function assigns random values into params based on the given random
number generator. In particular, it uses the parameter initialization method
of formula 16 from the paper "Understanding the difficulty of training deep
feedforward neural networks" by Xavier Glorot and Yoshua Bengio.
- It is assumed that the total number of inputs and outputs from the layer is
num_inputs_and_outputs. That is, you should set num_inputs_and_outputs to
the sum of the dimensionalities of the vectors going into and out of the
layer that uses params as its parameters.
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template <typename net_type> template <typename net_type>