From 4d623597ab5d3fec224e2988390a705283758185 Mon Sep 17 00:00:00 2001 From: Davis King Date: Sat, 1 Oct 2016 18:32:01 -0400 Subject: [PATCH] Added find_upper_quantile() and count_steps_without_decrease_robust(). --- dlib/statistics/running_gradient.h | 68 +++++++++++++++++++++ dlib/statistics/running_gradient_abstract.h | 54 ++++++++++++++-- 2 files changed, 118 insertions(+), 4 deletions(-) diff --git a/dlib/statistics/running_gradient.h b/dlib/statistics/running_gradient.h index e235da4cf..3a4983b79 100644 --- a/dlib/statistics/running_gradient.h +++ b/dlib/statistics/running_gradient.h @@ -8,6 +8,7 @@ #include "../serialize.h" #include #include "../matrix.h" +#include namespace dlib @@ -215,6 +216,26 @@ namespace dlib return g.probability_gradient_greater_than(thresh); } +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double find_upper_quantile ( + T container, + double quantile + ) + { + DLIB_CASSERT(0 <= quantile && quantile <= 1.0); + DLIB_CASSERT(container.size() > 0); + + size_t idx_upper = std::round((container.size()-1)*(1-quantile)); + + std::nth_element(container.begin(), container.begin()+idx_upper, container.end()); + auto upper_q = *(container.begin()+idx_upper); + return upper_q; + } + // ---------------------------------------------------------------------------------------- template < @@ -252,6 +273,53 @@ namespace dlib return count; } +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_decrease_robust ( + const T& container, + double probability_of_decrease = 0.51, + double quantile_discard = 0.10 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1); + DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1, + "\t size_t count_steps_without_decrease_robust()" + << "\n\t probability_of_decrease: "<< probability_of_decrease + ); + + if (container.size() == 0) + return 0; + + const auto quantile_thresh = find_upper_quantile(container, quantile_discard); + + running_gradient g; + size_t count = 0; + size_t j = 0; + for (auto i = container.rbegin(); i != container.rend(); ++i) + { + ++j; + // ignore values that are too large + if (*i <= quantile_thresh) + g.add(*i); + + if (g.current_n() > 2) + { + // Note that this only looks backwards because we are looping over the + // container backwards. So here we are really checking if the gradient isn't + // decreasing. + double prob_decreasing = g.probability_gradient_greater_than(0); + // If we aren't confident things are decreasing. + if (prob_decreasing < probability_of_decrease) + count = j; + } + } + return count; + } + // ---------------------------------------------------------------------------------------- template < diff --git a/dlib/statistics/running_gradient_abstract.h b/dlib/statistics/running_gradient_abstract.h index e5f007fb1..c8fac8fe6 100644 --- a/dlib/statistics/running_gradient_abstract.h +++ b/dlib/statistics/running_gradient_abstract.h @@ -141,7 +141,7 @@ namespace dlib ); /*! requires - - container muse be a container of double values that can be enumerated with a + - container must be a container of double values that can be enumerated with a range based for loop. - The container must contain more than 2 elements. ensures @@ -158,7 +158,7 @@ namespace dlib ); /*! requires - - container muse be a container of double values that can be enumerated with a + - container must be a container of double values that can be enumerated with a range based for loop. - The container must contain more than 2 elements. ensures @@ -177,7 +177,7 @@ namespace dlib ); /*! requires - - container muse be a container of double values that can be enumerated with + - container must be a container of double values that can be enumerated with .rbegin() and .rend(). - 0.5 < probability_of_decrease < 1 ensures @@ -195,6 +195,30 @@ namespace dlib - The max possible output from this function is container.size(). !*/ + template < + typename T + > + size_t count_steps_without_decrease_robust ( + const T& container, + double probability_of_decrease = 0.51, + double quantile_discard = 0.10 + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .begin() and .end() as well as .rbegin() and .rend(). + - 0.5 < probability_of_decrease < 1 + - 0 <= quantile_discard <= 1 + ensures + - This function behaves just like + count_steps_without_decrease(container,probability_of_decrease) except that + it ignores values in container that are in the upper quantile_discard + quantile. So for example, if the quantile discard is 0.1 then the 10% + largest values in container are ignored. + !*/ + +// ---------------------------------------------------------------------------------------- + template < typename T > @@ -204,7 +228,7 @@ namespace dlib ); /*! requires - - container muse be a container of double values that can be enumerated with + - container must be a container of double values that can be enumerated with .rbegin() and .rend(). - 0.5 < probability_of_increase < 1 ensures @@ -221,6 +245,28 @@ namespace dlib increasing. !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double find_upper_quantile ( + T container, + double quantile + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .begin() and .end(). + - 0 <= quantile <= 1 + - container.size() > 0 + ensures + - Finds and returns the value such that quantile percent of the values in + container are greater than it. For example, 0.5 would find the median value + in container while 0.1 would find the value that lower bounded the 10% + largest values in container. + !*/ + // ---------------------------------------------------------------------------------------- }