-- 2015-08-09: Originally from https://github.com/facebook/fbnn/blob/master/fbnn/Optim.lua. -- 2015-08-09: [Brandon Amos] Initial optimizeTriplet implementation. -- 2016-01-04: [Bartosz Ludwiczuk] Substantial improvements to optimizeTriplet at -- https://github.com/melgor/Triplet-Learning local pl = require('pl.import_into')() local OpenFaceOptim, _ = torch.class('OpenFaceOptim') -- deepcopy routine that assumes the presence of a 'clone' method in user -- data should be used to deeply copy. This matches the behavior of Torch -- tensors. local function deepcopy(x) local typename = type(x) if typename == "userdata" then return x:clone() end if typename == "table" then local retval = { } for k,v in pairs(x) do retval[deepcopy(k)] = deepcopy(v) end return retval end return x end -- Returns weight parameters and bias parameters and associated grad parameters -- for this module. Annotates the return values with flag marking parameter set -- as bias parameters set function OpenFaceOptim.weight_bias_parameters(module) local weight_params, bias_params if module.weight then weight_params = {module.weight, module.gradWeight} weight_params.is_bias = false end if module.bias then bias_params = {module.bias, module.gradBias} bias_params.is_bias = true end return {weight_params, bias_params} end function OpenFaceOptim:__init(model, optState, checkpoint_data) assert(model) assert(checkpoint_data or optState) assert(not (checkpoint_data and optState)) self.model = model self.modulesToOptState = {} -- Keep this around so we update it in setParameters self.originalOptState = optState -- Each module has some set of parameters and grad parameters. Since -- they may be allocated discontinuously, we need separate optState for -- each parameter tensor. self.modulesToOptState maps each module to -- a lua table of optState clones. if not checkpoint_data then self.model:apply(function(module) self.modulesToOptState[module] = { } local params = self.weight_bias_parameters(module) if pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2 then for i, _ in ipairs(params) do self.modulesToOptState[module][i] = deepcopy(optState) if params[i] and params[i].is_bias then -- never regularize biases self.modulesToOptState[module][i].weightDecay = 0.0 end end assert(module) assert(self.modulesToOptState[module]) end end) else local state = checkpoint_data.optim_state local modules = {} self.model:apply(function(m) table.insert(modules, m) end) assert(pl.tablex.compare_no_order(modules, pl.tablex.keys(state))) self.modulesToOptState = state end return self end local function get_device_for_module(mod) local dev_id = nil for _, val in pairs(mod) do if torch.typename(val) == 'torch.CudaTensor' then local this_dev = val:getDevice() if this_dev ~= 0 then -- _make sure the tensors are allocated consistently assert(dev_id == nil or dev_id == this_dev) dev_id = this_dev end end end return dev_id -- _may still be zero if none are allocated. end local function on_device_for_module(mod, f) local this_dev = get_device_for_module(mod) if this_dev ~= nil then return cutorch.withDevice(this_dev, f) end return f() end function OpenFaceOptim:optimizeTriplet(optimMethod, inputs, output, criterion, mapper) --, averageUse) assert(optimMethod) assert(inputs) assert(criterion) assert(self.modulesToOptState) self.model:zeroGradParameters() local numImages = inputs:size(1) local err = criterion:forward(output) local df_do = criterion:backward(output) --map gradient to the index of input gradient_all = torch.Tensor(numImages,output[1]:size(2)):type(inputs:type()) gradient_all:zero() --get all gradient for each example for i=1,table.getn(mapper) do gradient_all[mapper[i][1]]:add(df_do[1][i]) gradient_all[mapper[i][2]]:add(df_do[2][i]) gradient_all[mapper[i][3]]:add(df_do[3][i]) end --get average gradient per example: Not sure if it is right idea, so now Turn Off -- for i=1,numImages do -- if averageUse[i] ~= 0 then gradient_all[i]:div(averageUse[i]) end -- end -- print (('Gradient Average: %f: '):format(torch.abs(gradient_all):sum())) self.model:backward(inputs, gradient_all) -- We'll set these in the loop that iterates over each module. Get them -- out here to be captured. local curGrad local curParam local function fEvalMod(_) return err, curGrad end for curMod, opt in pairs(self.modulesToOptState) do on_device_for_module(curMod, function() local curModParams = self.weight_bias_parameters(curMod) if pl.tablex.size(curModParams) == 0 or pl.tablex.size(curModParams) == 2 then if curModParams then for i, _ in ipairs(curModParams) do if curModParams[i] then -- expect param, gradParam pair curParam, curGrad = table.unpack(curModParams[i]) assert(curParam and curGrad) optimMethod(fEvalMod, curParam, opt[i]) end end end end end) end return err, output end return OpenFaceOptim