optimized code a little

This commit is contained in:
Davis King 2015-10-17 22:08:23 -04:00
parent 3ba21975f7
commit 681abe787e
2 changed files with 4 additions and 3 deletions

View File

@ -49,9 +49,9 @@ namespace dlib
void gpu_data::
copy_to_host() const
{
wait_for_transfer_to_finish();
if (!host_current)
{
wait_for_transfer_to_finish();
std::cout << "cudaMemcpy to host" << std::endl;
CHECK_CUDA(cudaMemcpy(data_host.get(), data_device.get(), data_size*sizeof(float), cudaMemcpyDeviceToHost));
host_current = true;
@ -78,9 +78,9 @@ namespace dlib
size_t new_size
)
{
wait_for_transfer_to_finish();
if (new_size == 0)
{
wait_for_transfer_to_finish();
data_size = 0;
host_current = true;
device_current = true;
@ -89,6 +89,7 @@ namespace dlib
}
else if (new_size != data_size)
{
wait_for_transfer_to_finish();
data_size = new_size;
host_current = true;
device_current = true;

View File

@ -21,7 +21,7 @@ namespace dlib
- if (data_device) then
- data_device == a pointer to size() floats in device memory.
- if (there might be an active transfer between host and device) then
- if (there might be an active async transfer from host to device) then
- have_active_transfer == true
- We use the host_current and device_current bools to keep track of which