Initial resnet model definition.
This commit is contained in:
parent
2c6ddfba2d
commit
0574a72641
|
@ -0,0 +1,173 @@
|
|||
-- Model: resnet1.def.lua
|
||||
-- Description: ResNet model for face recognition with OpenFace, v1.
|
||||
-- Input size: 3x96x96
|
||||
-- Number of Parameters from net:getParameters() with embSize=128: TODO
|
||||
-- Components: Mostly `nn`
|
||||
-- Devices: CPU and CUDA
|
||||
--
|
||||
-- Brandon Amos <http://bamos.github.io>
|
||||
-- 2016-06-19
|
||||
--
|
||||
-- Copyright 2016 Carnegie Mellon University
|
||||
--
|
||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
-- you may not use this file except in compliance with the License.
|
||||
-- You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
--
|
||||
-- Modified from:
|
||||
-- https://github.com/facebook/fb.resnet.torch/blob/master/models/resnet.lua
|
||||
-- Portions from here are BSD licensed.
|
||||
|
||||
imgDim = 96
|
||||
|
||||
local nn = require 'nn'
|
||||
|
||||
local Convolution = nn.SpatialConvolutionMM
|
||||
local Avg = nn.SpatialAveragePooling
|
||||
local ReLU = nn.ReLU
|
||||
local Max = nn.SpatialMaxPooling
|
||||
local SBatchNorm = nn.SpatialBatchNormalization
|
||||
|
||||
function createModel()
|
||||
local depth = 18
|
||||
local shortcutType = 'B'
|
||||
local iChannels
|
||||
|
||||
-- The shortcut layer is either identity or 1x1 convolution
|
||||
local function shortcut(nInputPlane, nOutputPlane, stride)
|
||||
local useConv = shortcutType == 'C' or
|
||||
(shortcutType == 'B' and nInputPlane ~= nOutputPlane)
|
||||
if useConv then
|
||||
-- 1x1 convolution
|
||||
return nn.Sequential()
|
||||
:add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
|
||||
:add(SBatchNorm(nOutputPlane))
|
||||
elseif nInputPlane ~= nOutputPlane then
|
||||
-- Strided, zero-padded identity shortcut
|
||||
return nn.Sequential()
|
||||
:add(nn.SpatialAveragePooling(1, 1, stride, stride))
|
||||
:add(nn.Concat(2)
|
||||
:add(nn.Identity())
|
||||
:add(nn.MulConstant(0)))
|
||||
else
|
||||
return nn.Identity()
|
||||
end
|
||||
end
|
||||
|
||||
-- The basic residual layer block for 18 and 34 layer network, and the
|
||||
-- CIFAR networks
|
||||
local function basicblock(n, stride)
|
||||
local nInputPlane = iChannels
|
||||
iChannels = n
|
||||
|
||||
local s = nn.Sequential()
|
||||
s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1))
|
||||
s:add(SBatchNorm(n))
|
||||
s:add(ReLU(true))
|
||||
s:add(Convolution(n,n,3,3,1,1,1,1))
|
||||
s:add(SBatchNorm(n))
|
||||
|
||||
return nn.Sequential()
|
||||
:add(nn.ConcatTable()
|
||||
:add(s)
|
||||
:add(shortcut(nInputPlane, n, stride)))
|
||||
:add(nn.CAddTable(true))
|
||||
:add(ReLU(true))
|
||||
end
|
||||
|
||||
-- The bottleneck residual layer for 50, 101, and 152 layer networks
|
||||
local function bottleneck(n, stride)
|
||||
local nInputPlane = iChannels
|
||||
iChannels = n * 4
|
||||
|
||||
local s = nn.Sequential()
|
||||
s:add(Convolution(nInputPlane,n,1,1,1,1,0,0))
|
||||
s:add(SBatchNorm(n))
|
||||
s:add(ReLU(true))
|
||||
s:add(Convolution(n,n,3,3,stride,stride,1,1))
|
||||
s:add(SBatchNorm(n))
|
||||
s:add(ReLU(true))
|
||||
s:add(Convolution(n,n*4,1,1,1,1,0,0))
|
||||
s:add(SBatchNorm(n * 4))
|
||||
|
||||
return nn.Sequential()
|
||||
:add(nn.ConcatTable()
|
||||
:add(s)
|
||||
:add(shortcut(nInputPlane, n * 4, stride)))
|
||||
:add(nn.CAddTable(true))
|
||||
:add(ReLU(true))
|
||||
end
|
||||
|
||||
-- Creates count residual blocks with specified number of features
|
||||
local function layer(block, features, count, stride)
|
||||
local s = nn.Sequential()
|
||||
for i=1,count do
|
||||
s:add(block(features, i == 1 and stride or 1))
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
-- Configurations for ResNet:
|
||||
-- num. residual blocks, num features, residual block function
|
||||
local cfg = {
|
||||
[18] = {{2, 2, 2, 2}, 4608, basicblock},
|
||||
-- [34] = {{3, 4, 6, 3}, 512, basicblock},
|
||||
-- [50] = {{3, 4, 6, 3}, 2048, bottleneck},
|
||||
-- [101] = {{3, 4, 23, 3}, 2048, bottleneck},
|
||||
-- [152] = {{3, 8, 36, 3}, 2048, bottleneck},
|
||||
}
|
||||
|
||||
assert(cfg[depth], 'Invalid depth: ' .. tostring(depth))
|
||||
local def, nLinear, block = table.unpack(cfg[depth])
|
||||
iChannels = 64
|
||||
|
||||
-- The ResNet ImageNet model
|
||||
local model = nn.Sequential()
|
||||
model:add(Convolution(3,64,7,7,2,2,3,3))
|
||||
model:add(SBatchNorm(64))
|
||||
model:add(ReLU(true))
|
||||
model:add(Max(3,3,2,2,1,1))
|
||||
model:add(layer(block, 64, def[1]))
|
||||
model:add(layer(block, 128, def[2], 2))
|
||||
model:add(layer(block, 256, def[3], 2))
|
||||
model:add(layer(block, 512, def[4], 2))
|
||||
-- model:add(nn.Reshape(nLinear))
|
||||
model:add(nn.View(nLinear))
|
||||
model:add(nn.Linear(nLinear, opt.embSize))
|
||||
model:add(nn.Normalize(2))
|
||||
|
||||
local function ConvInit(name)
|
||||
for k,v in pairs(model:findModules(name)) do
|
||||
local n = v.kW*v.kH*v.nOutputPlane
|
||||
v.weight:normal(0,math.sqrt(2/n))
|
||||
if cudnn.version >= 4000 then
|
||||
v.bias = nil
|
||||
v.gradBias = nil
|
||||
else
|
||||
v.bias:zero()
|
||||
end
|
||||
end
|
||||
end
|
||||
local function BNInit(name)
|
||||
for k,v in pairs(model:findModules(name)) do
|
||||
v.weight:fill(1)
|
||||
v.bias:zero()
|
||||
end
|
||||
end
|
||||
|
||||
ConvInit('nn.SpatialConvolutionMM')
|
||||
BNInit('nn.SpatialBatchNormalization')
|
||||
for k,v in pairs(model:findModules('nn.Linear')) do
|
||||
v.bias:zero()
|
||||
end
|
||||
|
||||
return model
|
||||
end
|
|
@ -7,7 +7,6 @@ local pl = require('pl.import_into')()
|
|||
|
||||
local OpenFaceOptim, _ = torch.class('OpenFaceOptim')
|
||||
|
||||
|
||||
-- deepcopy routine that assumes the presence of a 'clone' method in user
|
||||
-- data should be used to deeply copy. This matches the behavior of Torch
|
||||
-- tensors.
|
||||
|
@ -60,18 +59,17 @@ function OpenFaceOptim:__init(model, optState, checkpoint_data)
|
|||
self.model:apply(function(module)
|
||||
self.modulesToOptState[module] = { }
|
||||
local params = self.weight_bias_parameters(module)
|
||||
-- expects either an empty table or 2 element table, one for weights
|
||||
-- and one for biases
|
||||
assert(pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2)
|
||||
for i, _ in ipairs(params) do
|
||||
self.modulesToOptState[module][i] = deepcopy(optState)
|
||||
if params[i] and params[i].is_bias then
|
||||
-- never regularize biases
|
||||
self.modulesToOptState[module][i].weightDecay = 0.0
|
||||
end
|
||||
if pl.tablex.size(params) == 0 or pl.tablex.size(params) == 2 then
|
||||
for i, _ in ipairs(params) do
|
||||
self.modulesToOptState[module][i] = deepcopy(optState)
|
||||
if params[i] and params[i].is_bias then
|
||||
-- never regularize biases
|
||||
self.modulesToOptState[module][i].weightDecay = 0.0
|
||||
end
|
||||
end
|
||||
assert(module)
|
||||
assert(self.modulesToOptState[module])
|
||||
end
|
||||
assert(module)
|
||||
assert(self.modulesToOptState[module])
|
||||
end)
|
||||
else
|
||||
local state = checkpoint_data.optim_state
|
||||
|
@ -145,19 +143,19 @@ function OpenFaceOptim:optimizeTriplet(optimMethod, inputs, output,
|
|||
for curMod, opt in pairs(self.modulesToOptState) do
|
||||
on_device_for_module(curMod, function()
|
||||
local curModParams = self.weight_bias_parameters(curMod)
|
||||
-- expects either an empty table or 2 element table, one for weights
|
||||
-- and one for biases
|
||||
assert(pl.tablex.size(curModParams) == 0 or
|
||||
pl.tablex.size(curModParams) == 2)
|
||||
if curModParams then
|
||||
for i, _ in ipairs(curModParams) do
|
||||
if curModParams[i] then
|
||||
-- expect param, gradParam pair
|
||||
curParam, curGrad = table.unpack(curModParams[i])
|
||||
assert(curParam and curGrad)
|
||||
optimMethod(fEvalMod, curParam, opt[i])
|
||||
end
|
||||
end
|
||||
if pl.tablex.size(curModParams) == 0 or
|
||||
pl.tablex.size(curModParams) == 2
|
||||
then
|
||||
if curModParams then
|
||||
for i, _ in ipairs(curModParams) do
|
||||
if curModParams[i] then
|
||||
-- expect param, gradParam pair
|
||||
curParam, curGrad = table.unpack(curModParams[i])
|
||||
assert(curParam and curGrad)
|
||||
optimMethod(fEvalMod, curParam, opt[i])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue