Apparently fix #1513 by avoiding the cudaStreamSynchronize call (#1514)

* Problem: see #1513 (https://github.com/davisking/dlib/issues/1513) Candidate solution: busy-loop until cudaStreamQuery returns cudaSuccess * Make the suggested fix a Windows-only thing
2018-11-11 17:33:15 +02:00 · 2018-11-11 17:33:15 +02:00 · d7e6f1d726
parent d2dcdd66ad
commit d7e6f1d726
1 changed files with 24 additions and 0 deletions
--- a/dlib/cuda/gpu_data.cpp
+++ b/dlib/cuda/gpu_data.cpp
@ -118,6 +118,25 @@ namespace dlib
        }
    }
 #ifdef WIN32
    // This should be pretty much the same as cudaStreamSynchronize, which for some
    // reason makes training freeze on some Windows machines.
    // (see https://github.com/davisking/dlib/issues/1513)
    void synchronize_stream(cudaStream_t stream)
    {
        while (true)
        {
            cudaError_t err = cudaStreamQuery(stream);
            switch (err)
            {
            case cudaSuccess: return;      // now we are synchronized
            case cudaErrorNotReady: break; // continue waiting
            default: CHECK_CUDA(err);      // unexpected error: throw
            }
        }
    }
 #endif // WIN32
    void gpu_data::
    async_copy_to_device() const
    {
@ -127,7 +146,12 @@ namespace dlib
            {
                // Wait for any possible CUDA kernels that might be using our memory block to
                // complete before we overwrite the memory.
 #ifdef WIN32
                synchronize_stream(0);
 #else
                CHECK_CUDA(cudaStreamSynchronize(0));
 #endif
                device_in_use = false;
            }
            CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));