From 18695b7b4b1b73730ed001b690cebd5ff7c96437 Mon Sep 17 00:00:00 2001 From: Davis King Date: Wed, 23 Dec 2015 08:23:46 -0500 Subject: [PATCH] Made the default input layer automatically normalize unsigned char pixel values to the range [0,1]. --- dlib/dnn/input.h | 13 +++++++++++-- dlib/dnn/input_abstract.h | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dlib/dnn/input.h b/dlib/dnn/input.h index a4bfa479c..7603aa0af 100644 --- a/dlib/dnn/input.h +++ b/dlib/dnn/input.h @@ -58,6 +58,8 @@ namespace dlib // initialize data to the right size to contain the stuff in the iterator range. data.set_size(std::distance(ibegin,iend), pixel_traits::num, nr, nc); + typedef typename pixel_traits::basic_pixel_type bptype; + const size_t offset = nr*nc; auto ptr = data.host(); for (auto i = ibegin; i != iend; ++i) @@ -70,7 +72,10 @@ namespace dlib auto p = ptr++; for (long j = 0; j < temp.size(); ++j) { - *p = temp(j); + if (is_same_type::value) + *p = temp(j)/256.0; + else + *p = temp(j); p += offset; } } @@ -130,6 +135,7 @@ namespace dlib // initialize data to the right size to contain the stuff in the iterator range. data.set_size(std::distance(ibegin,iend), pixel_traits::num, nr, nc); + typedef typename pixel_traits::basic_pixel_type bptype; const size_t offset = nr*nc; auto ptr = data.host(); @@ -143,7 +149,10 @@ namespace dlib auto p = ptr++; for (long j = 0; j < temp.size(); ++j) { - *p = temp(j); + if (is_same_type::value) + *p = temp(j)/256.0; + else + *p = temp(j); p += offset; } } diff --git a/dlib/dnn/input_abstract.h b/dlib/dnn/input_abstract.h index ceab0de9f..68e8b1f67 100644 --- a/dlib/dnn/input_abstract.h +++ b/dlib/dnn/input_abstract.h @@ -135,6 +135,10 @@ namespace dlib For example, a matrix would turn into a tensor with 3 rows, 3 columns, and k()==1. Or a matrix would turn into a tensor with 4 rows, 5 columns, and k()==3 (since rgb_pixels have 3 channels). + - If the input data contains pixels of type unsigned char, rgb_pixel, or + other pixel types with a basic_pixel_type of unsigned char then each + value written to the output tensor is first divided by 256.0 so that the + resulting outputs are all in the range [0,1]. !*/ };