From 6539ea677922fb127418d87ebe6ffe3337a8259b Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Sat, 7 Nov 2015 19:59:31 -0500
Subject: [PATCH] Added prototypes for batch normalization's gradients.

---
 dlib/dnn/cuda_dlib.h | 47 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/dlib/dnn/cuda_dlib.h b/dlib/dnn/cuda_dlib.h
index 61430f5fa..3d9867750 100644
--- a/dlib/dnn/cuda_dlib.h
+++ b/dlib/dnn/cuda_dlib.h
@@ -52,7 +52,6 @@ namespace dlib
 
     // -----------------------------------------------------------------------------------
 
-    // TODO, add versions of batch_normalize() that output the gradients.
         void batch_normalize (
             resizable_tensor& dest,
             resizable_tensor& means,
@@ -63,6 +62,7 @@ namespace dlib
         );
         /*!
             requires
+                - src.num_samples() > 1
                 - gamma.num_samples() == 1
                 - beta.num_samples() == 1
                 - gamma.nr() == beta.nr() == src.nr()
@@ -80,6 +80,39 @@ namespace dlib
                 - #vars == the variance values of the contents of src.
         !*/
 
+        void batch_normalize_gradient (
+            const tensor& gradient_input,
+            const tensor& means,
+            const tensor& vars,
+            const tensor& src,
+            const tensor& gamma,
+            tensor& src_grad,
+            tensor& gamma_grad, 
+            tensor& beta_grad 
+        );
+        /*!
+            requires
+                - vars and means should be the output of a call to
+                  batch_normalize(dest,means,vars,src,gamma,beta)
+                - have_same_dimensions(gradient_input, src) == true
+                - have_same_dimensions(src, src_grad) == true
+                - src.num_samples() > 1
+                - gamma.num_samples() == 1
+                - have_same_dimensions(gamma, gamma_grad) == true
+                - have_same_dimensions(gamma, beta_grad) == true
+                - gamma.nr() == src.nr()
+                - gamma.nc() == src.nc()
+                - gamma.k()  == src.k()
+                - have_same_dimensions(means, gamma) == true
+                - have_same_dimensions(vars, gamma) == true
+            ensures
+                - Let f(src,gamma,beta) == dot(gradient_input, dest output of
+                  batch_normalize(dest,means,vars,src,gamma,beta))
+                - Adds the gradient of f() with respect to src to #src
+                - Adds the gradient of f() with respect to gamma to #gamma
+                - Adds the gradient of f() with respect to beta to #beta
+        !*/
+
         void batch_normalize_conv (
             resizable_tensor& dest,
             resizable_tensor& means,
@@ -90,6 +123,7 @@ namespace dlib
         );
         /*!
             requires
+                - src.num_samples() > 1
                 - gamma.num_samples()==gamma.nr()==gamma.nc() == 1
                 - beta.num_samples() ==beta.nr() ==gamma.nc() == 1
                 - gamma.k()  == beta.k()  == src.k()
@@ -103,6 +137,17 @@ namespace dlib
                 - #vars == the variance values of the contents of src.
         !*/
 
+        void batch_normalize_conv_gradient (
+            const tensor& gradient_input,
+            const tensor& means,
+            const tensor& vars,
+            const tensor& src,
+            const tensor& gamma,
+            tensor& src_grad,
+            tensor& gamma_grad, 
+            tensor& beta_grad 
+        );
+
     // -----------------------------------------------------------------------------------
 
         class dropout