optimized code a little

2015-10-17 22:08:23 -04:00 · 2015-10-17 22:08:23 -04:00 · 681abe787e
parent 3ba21975f7
commit 681abe787e
2 changed files with 4 additions and 3 deletions
--- a/dlib/dnn/gpu_data.cpp
+++ b/dlib/dnn/gpu_data.cpp
@ -49,9 +49,9 @@ namespace dlib
    void gpu_data::
    copy_to_host() const
    {
-        wait_for_transfer_to_finish();
        if (!host_current)
        {
+            wait_for_transfer_to_finish();
            std::cout << "cudaMemcpy to host" << std::endl;
            CHECK_CUDA(cudaMemcpy(data_host.get(), data_device.get(), data_size*sizeof(float), cudaMemcpyDeviceToHost));
            host_current = true;
@ -78,9 +78,9 @@ namespace dlib
        size_t new_size
    )
    {
-        wait_for_transfer_to_finish();
        if (new_size == 0)
        {
+            wait_for_transfer_to_finish();
            data_size = 0;
            host_current = true;
            device_current = true;
@ -89,6 +89,7 @@ namespace dlib
        }
        else if (new_size != data_size)
        {
+            wait_for_transfer_to_finish();
            data_size = new_size;
            host_current = true;
            device_current = true;
--- a/dlib/dnn/gpu_data.h
+++ b/dlib/dnn/gpu_data.h
@ -21,7 +21,7 @@ namespace dlib
                - if (data_device) then 
                    - data_device == a pointer to size() floats in device memory.

-                - if (there might be an active transfer between host and device) then
+                - if (there might be an active async transfer from host to device) then
                    - have_active_transfer == true

                - We use the host_current and device_current bools to keep track of which