diff --git a/dlib/dnn/gpu_data.cpp b/dlib/dnn/gpu_data.cpp index 2f0b105db..089f8480b 100644 --- a/dlib/dnn/gpu_data.cpp +++ b/dlib/dnn/gpu_data.cpp @@ -136,6 +136,8 @@ namespace dlib try { + CHECK_CUDA(cudaGetDevice(&the_device_id)); + void* data; CHECK_CUDA(cudaMallocHost(&data, new_size*sizeof(float))); // Note that we don't throw exceptions since the free calls are invariably diff --git a/dlib/dnn/gpu_data.h b/dlib/dnn/gpu_data.h index 398fb59b0..15db5ab3d 100644 --- a/dlib/dnn/gpu_data.h +++ b/dlib/dnn/gpu_data.h @@ -40,7 +40,7 @@ namespace dlib public: gpu_data( - ) : data_size(0), host_current(true), device_current(true),have_active_transfer(false),device_in_use(false) + ) : data_size(0), host_current(true), device_current(true),have_active_transfer(false),device_in_use(false), the_device_id(0) { } @@ -52,6 +52,7 @@ namespace dlib gpu_data(gpu_data&& item) : gpu_data() { swap(item); } gpu_data& operator=(gpu_data&& item) { swap(item); return *this; } + int device_id() const { return the_device_id; } #ifdef DLIB_USE_CUDA void async_copy_to_device() const; @@ -153,6 +154,7 @@ namespace dlib std::swap(data_host, item.data_host); std::swap(data_device, item.data_device); std::swap(cuda_stream, item.cuda_stream); + std::swap(the_device_id, item.the_device_id); } private: @@ -177,6 +179,7 @@ namespace dlib std::shared_ptr data_host; std::shared_ptr data_device; std::shared_ptr cuda_stream; + int the_device_id; }; inline void serialize(const gpu_data& item, std::ostream& out) diff --git a/dlib/dnn/gpu_data_abstract.h b/dlib/dnn/gpu_data_abstract.h index 349eb6d1c..09b274094 100644 --- a/dlib/dnn/gpu_data_abstract.h +++ b/dlib/dnn/gpu_data_abstract.h @@ -45,6 +45,7 @@ namespace dlib - #device() == nullptr - #host_ready() == true - #device_ready() == true + - #device_id() == 0 !*/ // This object is not copyable, however, it is movable. @@ -53,6 +54,14 @@ namespace dlib gpu_data(gpu_data&& item); gpu_data& operator=(gpu_data&& item); + int device_id( + ) const; + /*! + ensures + - returns the ID of the CUDA device that allocated this memory. I.e. the + number returned by cudaGetDevice() when the memory was allocated. + - If CUDA is not being used then this function always returns 0. + !*/ void async_copy_to_device( ); diff --git a/dlib/dnn/tensor.h b/dlib/dnn/tensor.h index b077ef45f..13e3deec9 100644 --- a/dlib/dnn/tensor.h +++ b/dlib/dnn/tensor.h @@ -52,6 +52,8 @@ namespace dlib virtual float* device() = 0; virtual float* device_write_only() = 0; + int device_id() const { return data().device_id(); } + tensor& operator= (float val) { #ifdef DLIB_USE_CUDA diff --git a/dlib/dnn/tensor_abstract.h b/dlib/dnn/tensor_abstract.h index 56a32dc9d..8d59c322f 100644 --- a/dlib/dnn/tensor_abstract.h +++ b/dlib/dnn/tensor_abstract.h @@ -187,6 +187,15 @@ namespace dlib every memory location in the returned memory block. !*/ + int device_id( + ) const; + /*! + ensures + - returns the ID of the CUDA device that allocated this memory. I.e. the + number returned by cudaGetDevice() when the memory was allocated. + - If CUDA is not being used then this function always returns 0. + !*/ + tensor& operator= ( float val );