From ccd8b64f1bb551b73ae3c80152ca105cc3ffbb7b Mon Sep 17 00:00:00 2001
From: Juha Reunanen <juha.reunanen@tomaattinen.com>
Date: Sun, 7 Apr 2019 21:27:03 +0300
Subject: [PATCH] Semantic-segmentation loss calculation: fix buffer usage on
 multi-GPU training (#1717)

* Semantic-segmentation loss calculation: fix buffer usage on multi-GPU training

* Review fix: make the work buffer live longer
---
 dlib/cuda/cuda_dlib.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dlib/cuda/cuda_dlib.h b/dlib/cuda/cuda_dlib.h
index d9c27b262..796fa18a8 100644
--- a/dlib/cuda/cuda_dlib.h
+++ b/dlib/cuda/cuda_dlib.h
@@ -423,7 +423,6 @@ namespace dlib
             compute_loss_multiclass_log_per_pixel(
             )
             {
-                work = device_global_buffer();
             }
 
             template <
@@ -439,6 +438,10 @@ namespace dlib
                 const size_t bytes_per_plane = subnetwork_output.nr()*subnetwork_output.nc()*sizeof(uint16_t);
                 // Allocate a cuda buffer to store all the truth images and also one float
                 // for the scalar loss output.
+                if (!work)
+                {
+                    work = device_global_buffer();
+                }
                 cuda_data_void_ptr buf = work->get(subnetwork_output.num_samples()*bytes_per_plane + sizeof(float));
 
                 cuda_data_void_ptr loss_buf = buf;
@@ -467,7 +470,7 @@ namespace dlib
                 double& loss
             );
             
-            std::shared_ptr<resizable_cuda_buffer> work;
+            mutable std::shared_ptr<resizable_cuda_buffer> work;
         };
 
     // ------------------------------------------------------------------------------------