mirror of https://github.com/davisking/dlib.git
* Problem: see #1513 (https://github.com/davisking/dlib/issues/1513) Candidate solution: busy-loop until cudaStreamQuery returns cudaSuccess * Make the suggested fix a Windows-only thing
This commit is contained in:
parent
d2dcdd66ad
commit
d7e6f1d726
|
@ -118,6 +118,25 @@ namespace dlib
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
// This should be pretty much the same as cudaStreamSynchronize, which for some
|
||||||
|
// reason makes training freeze on some Windows machines.
|
||||||
|
// (see https://github.com/davisking/dlib/issues/1513)
|
||||||
|
void synchronize_stream(cudaStream_t stream)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaStreamQuery(stream);
|
||||||
|
switch (err)
|
||||||
|
{
|
||||||
|
case cudaSuccess: return; // now we are synchronized
|
||||||
|
case cudaErrorNotReady: break; // continue waiting
|
||||||
|
default: CHECK_CUDA(err); // unexpected error: throw
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // WIN32
|
||||||
|
|
||||||
void gpu_data::
|
void gpu_data::
|
||||||
async_copy_to_device() const
|
async_copy_to_device() const
|
||||||
{
|
{
|
||||||
|
@ -127,7 +146,12 @@ namespace dlib
|
||||||
{
|
{
|
||||||
// Wait for any possible CUDA kernels that might be using our memory block to
|
// Wait for any possible CUDA kernels that might be using our memory block to
|
||||||
// complete before we overwrite the memory.
|
// complete before we overwrite the memory.
|
||||||
|
#ifdef WIN32
|
||||||
|
synchronize_stream(0);
|
||||||
|
#else
|
||||||
CHECK_CUDA(cudaStreamSynchronize(0));
|
CHECK_CUDA(cudaStreamSynchronize(0));
|
||||||
|
#endif
|
||||||
|
|
||||||
device_in_use = false;
|
device_in_use = false;
|
||||||
}
|
}
|
||||||
CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));
|
CHECK_CUDA(cudaMemcpyAsync(data_device.get(), data_host.get(), data_size*sizeof(float), cudaMemcpyHostToDevice, (cudaStream_t)cuda_stream.get()));
|
||||||
|
|
Loading…
Reference in New Issue