diff --git a/dlib/cuda/gpu_data.cpp b/dlib/cuda/gpu_data.cpp index 6e7cec6be..3c999835d 100644 --- a/dlib/cuda/gpu_data.cpp +++ b/dlib/cuda/gpu_data.cpp @@ -118,6 +118,25 @@ namespace dlib } } +#ifdef WIN32 + // This should be pretty much the same as cudaStreamSynchronize, which for some + // reason makes training freeze on some Windows machines. + // (see https://github.com/davisking/dlib/issues/1513) + void synchronize_stream(cudaStream_t stream) + { + while (true) + { + cudaError_t err = cudaStreamQuery(stream); + switch (err) + { + case cudaSuccess: return; // now we are synchronized + case cudaErrorNotReady: break; // continue waiting + default: CHECK_CUDA(err); // unexpected error: throw + } + } + } +#endif // WIN32 + void gpu_data:: async_copy_to_device() const { @@ -127,7 +146,12 @@ namespace dlib { // Wait for any possible CUDA kernels that might be using our memory block to // complete before we overwrite the memory. +#ifdef WIN32 + synchronize_stream(0); +#else CHECK_CUDA(cudaStreamSynchronize(0)); +#endif + device_in_use = false; } CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));