openface/training/OpenFaceOptim.lua

-- Modified from https://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua.

local pl = require('pl.import_into')()

local OpenFaceOptim, parent = torch.class('OpenFaceOptim')


-- deepcopy routine that assumes the presence of a 'clone' method in user
-- data should be used to deeply copy. This matches the behavior of Torch
-- tensors.
local function deepcopy(x)
    local typename = type(x)
    if typename == "userdata" then
        return x:clone()
    end
    if typename == "table" then
        local retval = { }
        for k,v in pairs(x) do
            retval[deepcopy(k)] = deepcopy(v)
        end
        return retval
    end
    return x
end

-- Returns weight parameters and bias parameters and associated grad parameters
-- for this module. Annotates the return values with flag marking parameter set
-- as bias parameters set
function OpenFaceOptim.weight_bias_parameters(module)
    local weight_params, bias_params
    if module.weight then
        weight_params = {module.weight, module.gradWeight}
        weight_params.is_bias = false
    end
    if module.bias then
        bias_params = {module.bias, module.gradBias}
        bias_params.is_bias = true
    end
    return {weight_params, bias_params}
end

function OpenFaceOptim:__init(model, optState, checkpoint_data)
    assert(model)
    assert(checkpoint_data or optState)
    assert(not (checkpoint_data and optState))

    self.model = model
    self.modulesToOptState = {}
    -- Keep this around so we update it in setParameters
    self.originalOptState = optState

    -- Each module has some set of parameters and grad parameters. Since
    -- they may be allocated discontinuously, we need separate optState for
    -- each parameter tensor. self.modulesToOptState maps each module to
    -- a lua table of optState clones.
    if not checkpoint_data then
        self.model:apply(function(module)
            self.modulesToOptState[module] = { }
            local params = self.weight_bias_parameters(module)
            -- expects either an empty table or 2 element table, one for weights
            -- and one for biases
            assert(pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2)
            for i, _ in ipairs(params) do
                self.modulesToOptState[module][i] = deepcopy(optState)
                if params[i] and params[i].is_bias then
                    -- never regularize biases
                    self.modulesToOptState[module][i].weightDecay = 0.0
                end
            end
            assert(module)
            assert(self.modulesToOptState[module])
        end)
    else
        local state = checkpoint_data.optim_state
        local modules = {}
        self.model:apply(function(m) table.insert(modules, m) end)
        assert(pl.tablex.compare_no_order(modules, pl.tablex.keys(state)))
        self.modulesToOptState = state
    end
end

local function get_device_for_module(mod)
   local dev_id = nil
   for name, val in pairs(mod) do
      if torch.typename(val) == 'torch.CudaTensor' then
         local this_dev = val:getDevice()
           if this_dev ~= 0 then
               -- _make sure the tensors are allocated consistently
              assert(dev_id == nil or dev_id == this_dev)
               dev_id = this_dev
           end
       end
   end
   return dev_id -- _may still be zero if none are allocated.
end

local function on_device_for_module(mod, f)
   local this_dev = get_device_for_module(mod)
    if this_dev ~= nil then
       return cutorch.withDevice(this_dev, f)
    end
    return f()
end

function OpenFaceOptim:optimizeTriplet(optimMethod, inputs, criterion)
   assert(optimMethod)
   assert(inputs)
   assert(criterion)
   assert(self.modulesToOptState)

   self.model:zeroGradParameters()
   local output = self.model:forward(inputs)

   local err = criterion:forward(output)

   local df_do = criterion:backward(output)
   self.model:backward(inputs, df_do)

    -- We'll set these in the loop that iterates over each module. Get them
    -- out here to be captured.
    local curGrad
    local curParam
    local function fEvalMod(x)
        return err, curGrad
    end

    for curMod, opt in pairs(self.modulesToOptState) do
       on_device_for_module(curMod, function()
                               local curModParams = self.weight_bias_parameters(curMod)
            -- expects either an empty table or 2 element table, one for weights
            -- and one for biases
                               assert(pl.tablex.size(curModParams) == 0 or
                                         pl.tablex.size(curModParams) == 2)
            if curModParams then
               for i, tensor in ipairs(curModParams) do
                  if curModParams[i] then
                        -- expect param, gradParam pair
                     curParam, curGrad = table.unpack(curModParams[i])
                     assert(curParam and curGrad)
                     optimMethod(fEvalMod, curParam, opt[i])
                    end
                end
            end
       end)
    end

    return err, output
end
Initial commit of training code from development repo. 2015-09-24 07:49:45 +08:00			`-- Modified from https://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua.`

			`local pl = require('pl.import_into')()`

Training: Remove fbnn's Optim dependency. 2015-10-26 01:05:00 +08:00			`local OpenFaceOptim, parent = torch.class('OpenFaceOptim')`


			`-- deepcopy routine that assumes the presence of a 'clone' method in user`
			`-- data should be used to deeply copy. This matches the behavior of Torch`
			`-- tensors.`
			`local function deepcopy(x)`
			`local typename = type(x)`
			`if typename == "userdata" then`
			`return x:clone()`
			`end`
			`if typename == "table" then`
			`local retval = { }`
			`for k,v in pairs(x) do`
			`retval[deepcopy(k)] = deepcopy(v)`
			`end`
			`return retval`
			`end`
			`return x`
			`end`

			`-- Returns weight parameters and bias parameters and associated grad parameters`
			`-- for this module. Annotates the return values with flag marking parameter set`
			`-- as bias parameters set`
			`function OpenFaceOptim.weight_bias_parameters(module)`
			`local weight_params, bias_params`
			`if module.weight then`
			`weight_params = {module.weight, module.gradWeight}`
			`weight_params.is_bias = false`
			`end`
			`if module.bias then`
			`bias_params = {module.bias, module.gradBias}`
			`bias_params.is_bias = true`
			`end`
			`return {weight_params, bias_params}`
			`end`
Initial commit of training code from development repo. 2015-09-24 07:49:45 +08:00
Refactor: facenet->openface. 2015-10-09 22:48:06 +08:00			`function OpenFaceOptim:__init(model, optState, checkpoint_data)`
Training: Remove fbnn's Optim dependency. 2015-10-26 01:05:00 +08:00			`assert(model)`
			`assert(checkpoint_data or optState)`
			`assert(not (checkpoint_data and optState))`

			`self.model = model`
			`self.modulesToOptState = {}`
			`-- Keep this around so we update it in setParameters`
			`self.originalOptState = optState`

			`-- Each module has some set of parameters and grad parameters. Since`
			`-- they may be allocated discontinuously, we need separate optState for`
			`-- each parameter tensor. self.modulesToOptState maps each module to`
			`-- a lua table of optState clones.`
			`if not checkpoint_data then`
			`self.model:apply(function(module)`
			`self.modulesToOptState[module] = { }`
			`local params = self.weight_bias_parameters(module)`
			`-- expects either an empty table or 2 element table, one for weights`
			`-- and one for biases`
			`assert(pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2)`
			`for i, _ in ipairs(params) do`
			`self.modulesToOptState[module][i] = deepcopy(optState)`
			`if params[i] and params[i].is_bias then`
			`-- never regularize biases`
			`self.modulesToOptState[module][i].weightDecay = 0.0`
			`end`
			`end`
			`assert(module)`
			`assert(self.modulesToOptState[module])`
			`end)`
			`else`
			`local state = checkpoint_data.optim_state`
			`local modules = {}`
			`self.model:apply(function(m) table.insert(modules, m) end)`
			`assert(pl.tablex.compare_no_order(modules, pl.tablex.keys(state)))`
			`self.modulesToOptState = state`
			`end`
Initial commit of training code from development repo. 2015-09-24 07:49:45 +08:00			`end`

			`local function get_device_for_module(mod)`
			`local dev_id = nil`
			`for name, val in pairs(mod) do`
			`if torch.typename(val) == 'torch.CudaTensor' then`
			`local this_dev = val:getDevice()`
			`if this_dev ~= 0 then`
			`-- _make sure the tensors are allocated consistently`
			`assert(dev_id == nil or dev_id == this_dev)`
			`dev_id = this_dev`
			`end`
			`end`
			`end`
			`return dev_id -- _may still be zero if none are allocated.`
			`end`

			`local function on_device_for_module(mod, f)`
			`local this_dev = get_device_for_module(mod)`
			`if this_dev ~= nil then`
			`return cutorch.withDevice(this_dev, f)`
			`end`
			`return f()`
			`end`

Refactor: facenet->openface. 2015-10-09 22:48:06 +08:00			`function OpenFaceOptim:optimizeTriplet(optimMethod, inputs, criterion)`
Initial commit of training code from development repo. 2015-09-24 07:49:45 +08:00			`assert(optimMethod)`
			`assert(inputs)`
			`assert(criterion)`
			`assert(self.modulesToOptState)`

			`self.model:zeroGradParameters()`
			`local output = self.model:forward(inputs)`

			`local err = criterion:forward(output)`

			`local df_do = criterion:backward(output)`
			`self.model:backward(inputs, df_do)`

			`-- We'll set these in the loop that iterates over each module. Get them`
			`-- out here to be captured.`
			`local curGrad`
			`local curParam`
			`local function fEvalMod(x)`
			`return err, curGrad`
			`end`

			`for curMod, opt in pairs(self.modulesToOptState) do`
			`on_device_for_module(curMod, function()`
			`local curModParams = self.weight_bias_parameters(curMod)`
			`-- expects either an empty table or 2 element table, one for weights`
			`-- and one for biases`
			`assert(pl.tablex.size(curModParams) == 0 or`
			`pl.tablex.size(curModParams) == 2)`
			`if curModParams then`
			`for i, tensor in ipairs(curModParams) do`
			`if curModParams[i] then`
			`-- expect param, gradParam pair`
			`curParam, curGrad = table.unpack(curModParams[i])`
			`assert(curParam and curGrad)`
			`optimMethod(fEvalMod, curParam, opt[i])`
			`end`
			`end`
			`end`
			`end)`
			`end`

			`return err, output`
			`end`