Clarify training procedure.

Summary of content from @Strateus and @melgor at https://groups.google.com/forum/#!topic/cmu-openface/XgxfN8Xy9nA
2015-11-23 16:12:59 -05:00 · 2015-11-23 16:12:59 -05:00 · df89d749d1
parent bcf69ec2bb
commit df89d749d1
1 changed files with 73 additions and 4 deletions
--- a/training/train.lua
+++ b/training/train.lua
@ -12,6 +12,75 @@
 -- See the License for the specific language governing permissions and
 -- limitations under the License.

+
+-- This code samples images and trains a triplet network with the
+-- following steps, which are referenced inline.
+--
+-- [Step 1]
+-- Sample at most opt.peoplePerBatch * opt.imagesPerPerson
+-- images by choosing random people and images from the
+-- training set.
+--
+-- [Step 2]
+-- Compute the embeddings of all of these images by doing forward
+-- passs with the current state of a network.
+-- This is done offline and the network is not modified.
+-- Since not all of the images will fit in GPU memory, this is
+-- split into minibatches.
+--
+-- [Step 3]
+-- Select the semi-hard triplets as described in the FaceNet paper.
+--
+-- [Step 4]
+-- Google is able to do a single forward and backward pass to process
+-- all the triplets and update the network's parameters at once since
+-- they use a distributed system.
+-- With a memory-limited GPU, OpenFace uses smaller mini-batches and
+-- does many forward and backward passes to iteratively update the
+-- network's parameters.
+--
+--
+--
+-- Some other useful references for models with shared weights are:
+--
+--  1. Weinberger, K. Q., & Saul, L. K. (2009).
+--     Distance metric learning for large margin
+--     nearest neighbor classification.
+--     The Journal of Machine Learning Research, 10, 207-244.
+--
+--     http://machinelearning.wustl.edu/mlpapers/paper_files/jmlr10_weinberger09a.pdf
+--
+--
+--     Citation from the FaceNet paper on their motivation for
+--     using the triplet loss.
+--
+--
+--  2. Chopra, S., Hadsell, R., & LeCun, Y. (2005, June).
+--     Learning a similarity metric discriminatively, with application
+--     to face verification.
+--     In Computer Vision and Pattern Recognition, 2005. CVPR 2005.
+--     IEEE Computer Society Conference on (Vol. 1, pp. 539-546). IEEE.
+--
+--     http://yann.lecun.com/exdb/publis/pdf/chopra-05.pdf
+--
+--
+--     The idea is to just look at pairs of images at a time
+--     rather than triplets, which they train with two networks
+--     in parallel with shared weights.
+--
+--  3. Hoffer, E., & Ailon, N. (2014).
+--     Deep metric learning using Triplet network.
+--     arXiv preprint arXiv:1412.6622.
+--
+--     http://arxiv.org/abs/1412.6622
+--
+--
+--     Not used in OpenFace or FaceNet, but another view of triplet
+--     networks that provides slightly more details about training using
+--     three networks with shared weights.
+--     The code uses Torch and is available on GitHub at
+--     https://github.com/eladhoffer/TripletNet
+
 require 'optim'
 require 'fbnn'
 require 'image'
@ -46,15 +115,14 @@ function train()
   while batchNumber < opt.epochSize do
      -- queue jobs to data-workers
      donkeys:addjob(
-         -- the job callback (runs in data-worker thread)
         function()
+            -- [Step 1]: Sample people/images from the dataset.
            local inputs, numPerClass = trainLoader:samplePeople(opt.peoplePerBatch,
                                                                 opt.imagesPerPerson)
            inputs = inputs:float()
            numPerClass = numPerClass:float()
            return sendTensor(inputs), sendTensor(numPerClass)
         end,
-         -- the end callback (runs in the main thread)
         trainBatch
      )
      if i % 5 == 0 then
@ -114,8 +182,7 @@ function trainBatch(inputsThread, numPerClassThread)
   receiveTensor(inputsThread, inputsCPU)
   receiveTensor(numPerClassThread, numPerClass)

-   -- inputs:resize(inputsCPU:size()):copy(inputsCPU)
-
+   -- [Step 2]: Compute embeddings.
   local numImages = inputsCPU:size(1)
   local embeddings = torch.Tensor(numImages, 128)
   local singleNet = model.modules[1]
@ -133,6 +200,7 @@ function trainBatch(inputsThread, numPerClassThread)
   end
   assert(beginIdx - 1 == numImages)

+   -- [Step 3]: Select semi-hard triplets.
   local numTrips = numImages - opt.peoplePerBatch
   local as = torch.Tensor(numTrips, inputs:size(2),
                           inputs:size(3), inputs:size(4))
@ -194,6 +262,7 @@ function trainBatch(inputsThread, numPerClassThread)
   print(('  + (nRandomNegs, nTrips) = (%d, %d)'):format(nRandomNegs, numTrips))


+   -- [Step 4]: Upate network parameters.
   local beginIdx = 1
   local asCuda = torch.CudaTensor()
   local psCuda = torch.CudaTensor()