From fa0e559f7ce2086df58842196cfe23748f0120f6 Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Sat, 17 Oct 2015 11:52:28 -0400
Subject: [PATCH] cleaned up the cuda bindings a bit

---
 dlib/CMakeLists.txt     |   6 +-
 dlib/dnn/cublas_api.cpp |  56 ++++++++
 dlib/dnn/cublas_api.h   |  18 +--
 dlib/dnn/cudnn_api.cpp  | 294 ++++++++++++++++++++++++++++++++++++++++
 dlib/dnn/cudnn_api.h    |  28 +---
 dlib/dnn/tensor.h       |   7 +-
 6 files changed, 369 insertions(+), 40 deletions(-)
 create mode 100644 dlib/dnn/cublas_api.cpp
 create mode 100644 dlib/dnn/cudnn_api.cpp

diff --git a/dlib/CMakeLists.txt b/dlib/CMakeLists.txt
index f3bc7be1d..27438ce5b 100644
--- a/dlib/CMakeLists.txt
+++ b/dlib/CMakeLists.txt
@@ -434,7 +434,11 @@ if (NOT TARGET dlib)
 
          if (CUDA_FOUND AND cudnn AND cudnn_include AND COMPILER_CAN_DO_CPP_11)
             message(STATUS "Found cuDNN: " ${cudnn})
-            set(source_files ${source_files} dnn/cuda.cu )
+            set(source_files ${source_files} 
+               dnn/cuda.cu 
+               dnn/cudnn_api.cpp
+               dnn/cublas_api.cpp
+               )
             set(dlib_needed_libraries ${dlib_needed_libraries} ${CUDA_CUBLAS_LIBRARIES} ${cudnn})
             include_directories(${cudnn_include})
          else()
diff --git a/dlib/dnn/cublas_api.cpp b/dlib/dnn/cublas_api.cpp
new file mode 100644
index 000000000..2a86afe57
--- /dev/null
+++ b/dlib/dnn/cublas_api.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNN_CuBLAS_CPP_
+#define DLIB_DNN_CuBLAS_CPP_
+
+#ifdef DLIB_USE_CUDA
+
+#include "cublas_api.h"
+
+#include <cublas_v2.h>
+
+namespace dlib
+{
+    namespace cuda 
+    {
+
+    // -----------------------------------------------------------------------------------
+
+        cublas_context::
+        cublas_context()
+        {
+            // TODO
+        }
+
+        cublas_context::
+        ~cublas_context()
+        {
+            // TODO
+        }
+
+    // -----------------------------------------------------------------------------------
+
+        void gemm (
+            cublas_context& context,
+            float beta,
+            tensor& dest,
+            float alpha,
+            const tensor& lhs,
+            bool trans_lhs,
+            const tensor& rhs,
+            bool trans_rhs
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    }  
+}
+
+#endif // DLIB_USE_CUDA
+
+#endif // DLIB_DNN_CuBLAS_CPP_
+
+
+
diff --git a/dlib/dnn/cublas_api.h b/dlib/dnn/cublas_api.h
index 703fdd863..f44ce50dc 100644
--- a/dlib/dnn/cublas_api.h
+++ b/dlib/dnn/cublas_api.h
@@ -6,6 +6,7 @@
 #ifdef DLIB_USE_CUDA
 
 #include "tensor.h"
+#include "../error.h"
 
 namespace dlib
 {
@@ -28,18 +29,11 @@ namespace dlib
             cublas_context(const cublas_context&) = delete;
             cublas_context& operator=(const cublas_context&) = delete;
             // but is movable
-            cublas_context(const cublas_context&&) = default;
-            cublas_context& operator=(const cublas_context&&) = default;
+            cublas_context(cublas_context&&) = default;
+            cublas_context& operator=(cublas_context&&) = default;
 
-            cublas_context()
-            {
-                // TODO
-            }
-
-            ~cublas_context()
-            {
-                // TODO
-            }
+            cublas_context();
+            ~cublas_context();
 
             const void* get_handle (
             ) const { return handle; }
@@ -56,7 +50,7 @@ namespace dlib
             float beta,
             tensor& dest,
             float alpha,
-            const tensor& lhs 
+            const tensor& lhs,
             bool trans_lhs,
             const tensor& rhs,
             bool trans_rhs
diff --git a/dlib/dnn/cudnn_api.cpp b/dlib/dnn/cudnn_api.cpp
new file mode 100644
index 000000000..6d6b7be58
--- /dev/null
+++ b/dlib/dnn/cudnn_api.cpp
@@ -0,0 +1,294 @@
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNN_CuDNN_CPP_
+#define DLIB_DNN_CuDNN_CPP_
+
+#ifdef DLIB_USE_CUDA
+
+#include "cudnn_api.h"
+#include "tensor.h"
+#include <cudnn.h>
+
+namespace dlib
+{
+    namespace cuda 
+    {
+
+        // TODO, make into a macro that prints more information like the line number, etc.
+        static void check(cudnnStatus_t s)
+        {
+            switch(s)
+            {
+                case CUDNN_STATUS_SUCCESS: return;
+                case CUDNN_STATUS_NOT_INITIALIZED: 
+                    throw cudnn_error("CUDA Runtime API initialization failed.");
+                case CUDNN_STATUS_ALLOC_FAILED: 
+                    throw cudnn_error("CUDA Resources could not be allocated.");
+                default:
+                    throw cudnn_error("A call to cuDNN failed.");
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        cudnn_context::cudnn_context() : handle(nullptr)
+        {
+            cudnnHandle_t h;
+            check(cudnnCreate(&h));
+            handle = h;
+        }
+
+        cudnn_context::~cudnn_context()
+        {
+            if (handle)
+            {
+                cudnnDestroy((cudnnHandle_t)handle);
+                handle = nullptr;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        tensor_descriptor::tensor_descriptor() : handle(nullptr)
+        {
+            cudnnTensorDescriptor_t h;
+            check(cudnnCreateTensorDescriptor(&h));
+            handle = h;
+        }
+
+        tensor_descriptor::~tensor_descriptor()
+        {
+            if (handle)
+            {
+                cudnnDestroyTensorDescriptor((cudnnTensorDescriptor_t)handle);
+                handle = nullptr;
+            }
+        }
+
+        void tensor_descriptor::
+        set_size(
+            int n, 
+            int nr, 
+            int nc, 
+            int k
+        )
+        {
+            check(cudnnSetTensor4dDescriptor((cudnnTensorDescriptor_t)handle,
+                                       CUDNN_TENSOR_NHWC,
+                                       CUDNN_DATA_FLOAT,
+                                       n,
+                                       k,
+                                       nr,
+                                       nc));
+        }
+
+        void tensor_descriptor::
+        get_size (
+            int& n, 
+            int& nr, 
+            int& nc, 
+            int& k
+        ) const
+        {
+            int nStride, cStride, hStride, wStride;
+            cudnnDataType_t datatype;
+            check(cudnnGetTensor4dDescriptor((cudnnTensorDescriptor_t)handle,
+                                       &datatype,
+                                       &n,
+                                       &k,
+                                       &nr,
+                                       &nc,
+                                       &nStride,
+                                       &cStride,
+                                       &hStride,
+                                       &wStride));
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        void add(
+            cudnn_context& context, 
+            float beta,
+            tensor& dest,
+            float alpha,
+            const tensor& src
+        )
+        {
+        }
+
+        void set_tensor (
+            cudnn_context& context,
+            tensor& t,
+            float value
+        )
+        {
+        }
+
+        void scale_tensor (
+            cudnn_context& context,
+            tensor& t,
+            float value
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+        conv::conv(
+            cudnn_context& context,
+            const tensor& data,
+            const tensor& filters,
+            int stride_y,
+            int stride_x
+        )
+        {
+        }
+
+        void conv::operator() (
+            resizable_tensor& output,
+            const tensor& data,
+            const tensor& filters
+        )
+        {
+        }
+
+        void conv::get_gradient_for_data (
+            const tensor& gradient_input, 
+            const tensor& filters,
+            tensor& data_gradient
+        )
+        {
+        }
+
+        void conv::
+        get_gradient_for_filters (
+            const tensor& gradient_input, 
+            const tensor& data,
+            tensor& filters_gradient
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+        void soft_max (
+            cudnn_context& context,
+            resizable_tensor& dest,
+            const tensor& src
+        )
+        {
+        }
+
+        void soft_max_gradient (
+            cudnn_context& context,
+            tensor& grad,
+            const tensor& src,
+            const tensor& gradient_input
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+        max_pool::max_pool (
+            cudnn_context& context,
+            int window_height,
+            int window_width,
+            int stride_y,
+            int stride_x
+        )
+        {
+        }
+
+        max_pool::~max_pool(
+        )
+        {
+        }
+
+        void max_pool::
+        operator() (
+            resizable_tensor& dest,
+            const tensor& src
+        )
+        {
+        }
+
+        void max_pool::get_gradient(
+            const tensor& gradient_input, 
+            const tensor& src,
+            tensor& grad 
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+        void sigmoid (
+            cudnn_context& context,
+            resizable_tensor& dest,
+            const tensor& src
+        )
+        {
+        }
+
+        void sigmoid_gradient (
+            cudnn_context& context,
+            tensor& grad,
+            const tensor& src,
+            const tensor& gradient_input
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        void relu (
+            cudnn_context& context,
+            resizable_tensor& dest,
+            const tensor& src
+        )
+        {
+        }
+
+        void relu_gradient (
+            cudnn_context& context,
+            tensor& grad,
+            const tensor& src,
+            const tensor& gradient_input
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        void tanh (
+            cudnn_context& context,
+            resizable_tensor& dest,
+            const tensor& src
+        )
+        {
+        }
+
+        void tanh_gradient (
+            cudnn_context& context,
+            tensor& grad,
+            const tensor& src,
+            const tensor& gradient_input
+        )
+        {
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    } 
+}
+
+#endif // DLIB_USE_CUDA
+
+#endif // DLIB_DNN_CuDNN_CPP_
+
+
diff --git a/dlib/dnn/cudnn_api.h b/dlib/dnn/cudnn_api.h
index 88cf0d4a0..39f9540bf 100644
--- a/dlib/dnn/cudnn_api.h
+++ b/dlib/dnn/cudnn_api.h
@@ -5,6 +5,7 @@
 
 #ifdef DLIB_USE_CUDA
 
+#include "../error.h"
 
 namespace dlib
 {
@@ -33,17 +34,8 @@ namespace dlib
             cudnn_context(cudnn_context&&) = default;
             cudnn_context& operator=(cudnn_context&&) = default;
 
-            cudnn_context()
-            {
-                // TODO
-                // cudnnCreate()
-            }
-
-            ~cudnn_context()
-            {
-                // TODO
-                // cudnnDestroy()
-            }
+            cudnn_context();
+            ~cudnn_context();
 
             const void* get_handle (
             ) const { return handle; }
@@ -70,18 +62,8 @@ namespace dlib
             tensor_descriptor(tensor_descriptor&&) = default;
             tensor_descriptor& operator=(tensor_descriptor&&) = default;
 
-            tensor_descriptor()
-            {
-                //  cudnnCreateTensorDescriptor()
-
-                // Also call cudnnSetTensor4dDescriptor() somewhere.  Probably in a set()
-                // method.
-            }
-
-            ~tensor_descriptor()
-            {
-                // cudnnDestroyTensorDescriptor()
-            }
+            tensor_descriptor();
+            ~tensor_descriptor();
 
             void set_size(
                 int n, 
diff --git a/dlib/dnn/tensor.h b/dlib/dnn/tensor.h
index 90073c5aa..a2147257e 100644
--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
@@ -6,7 +6,7 @@
 #include <memory>
 #include <cstring>
 #include "../matrix.h"
-#include "cudnn.h"
+#include "cudnn_api.h"
 
 namespace dlib
 {
@@ -338,7 +338,7 @@ namespace dlib
 
 // ----------------------------------------------------------------------------------------
 
-    const matrix_op<op_pointer_to_mat<float> > mat (
+    inline const matrix_op<op_pointer_to_mat<float> > mat (
         const tensor& t,
         long nr,
         long nc
@@ -360,7 +360,7 @@ namespace dlib
         return matrix_op<op>(op(t.host(),nr,nc));
     }
 
-    const matrix_op<op_pointer_to_mat<float> > mat (
+    inline const matrix_op<op_pointer_to_mat<float> > mat (
         const tensor& t
     )
     {
@@ -486,7 +486,6 @@ namespace dlib
             data.set_size(m_n*m_nr*m_nc*m_k);
 #ifdef DLIB_USE_CUDA
             cudnn_descriptor.set_size(m_n,m_nr,m_nc,m_k);
-
 #endif
         }
     };