Add classification demo.

2015-10-11 16:58:58 -04:00 · 2015-10-11 16:58:58 -04:00 · 118b7c0d7a
parent d15a2b7c71
commit 118b7c0d7a
5 changed files with 199 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,4 +15,6 @@ evaluation/*/*.pdf
 demos/web/bower_components
 demos/web/unknown*.npy

-models/openface/*.t7
+models/openface/*.t7
+models/openface/*.pkl
+celeb-classifier*
--- a/README.md
+++ b/README.md
@ -142,11 +142,11 @@ are aware of on the standard
 benchmark.
 We had to fallback to using the deep funneled versions for
 152 of 13233 images because dlib failed to detect a face or landmarks.
+We obtain a mean accuracy of 0.8483 &plusmn; 0.0172 with an AUC of 0.92.

 ![](images/nn4.v1.lfw.roc.png)

-This can be generated with the following commands from the root
-`openface`
+This can be generated with the following commands from the root `openface`
 directory, assuming you have downloaded and placed the raw and
 deep funneled LFW data from [here](http://vis-www.cs.umass.edu/lfw/)
 in `./data/lfw/raw` and `./data/lfw/deepfunneled`.
@ -188,6 +188,45 @@ These can be generated with the following commands from the root
 4. Generate t-SNE visualization with `./util/tsne.py <feature-directory> --names <name 1> ... <name n>`
   This creates `tsne.pdf` in `<feature-directory>`.

+# Training a Classifier
+OpenFace's core provides a feature extraction method to
+obtain a low-dimensional representation of any face.
+[demos/classifier.py](demos/classifier.py) shows a demo of
+how these representations can be used to create a face classifier.
+
+This is trained on about 6000 total images of the following people,
+which are the people with the most images in our dataset:
+
+ America Ferrera
+ Amy Adams
+ Anne Hathaway
+ Ben Stiller
+ Bradley Cooper
+ David Boreanaz
+ Emily Deschanel
+ Eva Longoria
+ Jon Hamm
+ Steve Carell
+
+This demo uses [scikit-learn](http://scikit-learn.org) to perform
+a grid search over SVM parameters.
+Our trained model obtains 87% accuracy on this set of data.
+[models/get-models.sh](models/get-models.sh)
+will automatically download this classifier and place
+it in `models/openface/celeb-classifier.nn4.v1.pkl`.
+
+For an example, consider the following small set of images
+the model has no knowledge of.
+For an unknown person, a prediction still needs to be made, but
+the confidence score is usually lower.
+
+| Person | Image | Prediction | Confidence |
+|---|---|---|---|
+| Lennon 1 | <img src='images/examples/lennon-1.jpg' width='200px'></img> | DavidBoreanaz | 0.28 |
+| Lennon 2 | <img src='images/examples/lennon-2.jpg' width='200px'></img> | DavidBoreanaz | 0.56 |
+| Carell | <img src='images/examples/carell.jpg' width='200px'></img> | SteveCarell | 0.78 |
+| Adams | <img src='images/examples/adams.jpg' width='200px'></img> | AmyAdams | 0.87 |
+
 # Model Definitions
 Model definitions should be kept in [models/openface](models/openface),
 where we have provided definitions of the [nn1](models/openface/nn1.def.lua)
@ -225,8 +264,10 @@ face detection and alignment.
 These only run on the CPU and take from 100-200ms to over
 a second.
 The neural network uses a fixed-size input and has
-a more consistent runtime, almost 400ms on our 3.70 GHz CPU
-and 20-40 ms on our Tesla K40 GPU.
+a more consistent runtime,
+86.97 ms &plusmn; 67.82 ms on our 3.70 GHz CPU
+32.45 ms &plusmn; 12.89 ms on our Tesla K40 GPU,
+obtained with [util/profile-network.lua](util/profile-network.lua).

 # Usage
 ## Existing Models
--- a/demos/classifier.py
+++ b/demos/classifier.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python2
+#
+# Example to classify faces.
+# Brandon Amos
+# 2015/10/11
+#
+# Copyright 2015 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import cv2
+import itertools
+import os
+import pickle
+
+from operator import itemgetter
+
+import numpy as np
+np.set_printoptions(precision=2)
+import pandas as pd
+
+import sys
+fileDir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(fileDir, ".."))
+
+import openface
+import openface.helper
+from openface.data import iterImgs
+
+from sklearn.preprocessing import LabelEncoder
+from sklearn.decomposition import PCA
+from sklearn.grid_search import GridSearchCV
+from sklearn.manifold import TSNE
+from sklearn.svm import SVC
+
+modelDir = os.path.join(fileDir, '..', 'models')
+dlibModelDir = os.path.join(modelDir, 'dlib')
+openfaceModelDir = os.path.join(modelDir, 'openface')
+
+def getRep(imgPath):
+    img = cv2.imread(imgPath)
+    if img is None:
+        raise Exception("Unable to load image: {}".format(imgPath))
+    if args.verbose:
+        print("  + Original size: {}".format(img.shape))
+
+    bb = align.getLargestFaceBoundingBox(img)
+    if bb is None:
+        raise Exception("Unable to find a face: {}".format(imgPath))
+
+    alignedFace = align.alignImg("affine", args.imgDim, img, bb)
+    if alignedFace is None:
+        raise Exception("Unable to align image: {}".format(imgPath))
+
+    rep = net.forwardImage(alignedFace)
+    return rep
+
+def train(args):
+    print("Loading embeddings.")
+    fname = "{}/labels.csv".format(args.workDir)
+    labels = pd.read_csv(fname, header=None).as_matrix()[:,1]
+    labels = map(itemgetter(1),
+                 map(os.path.split,
+                     map(os.path.dirname, labels))) # Get the directory.
+    fname = "{}/reps.csv".format(args.workDir)
+    embeddings = pd.read_csv(fname, header=None).as_matrix()
+    le = LabelEncoder().fit(labels)
+    labelsNum = le.transform(labels)
+
+    param_grid = [
+        {'C': [1, 10, 100, 1000],
+            'kernel': ['linear']},
+        {'C': [1, 10, 100, 1000],
+            'gamma': [0.001, 0.0001],
+            'kernel': ['rbf']}
+    ]
+    svm = GridSearchCV(
+        SVC(probability=True),
+        param_grid, verbose=4, cv=5, n_jobs=16
+    ).fit(embeddings, labelsNum)
+    print("Best estimator: {}".format(svm.best_estimator_))
+    print("Best score on left out data: {:.2f}".format(svm.best_score_))
+
+    with open("{}/classifier.pkl".format(args.workDir), 'w') as f:
+        pickle.dump((le, svm), f)
+
+def infer(args):
+    with open("{}/classifier.pkl".format(args.workDir), 'r') as f:
+        (le, svm) = pickle.load(f)
+    rep = getRep(args.img)
+    predictions = svm.predict_proba(rep)[0]
+    maxI = np.argmax(predictions)
+    person = le.inverse_transform(maxI)
+    confidence = predictions[maxI]
+    print("Predict {} with {:.2f} confidence.".format(person, confidence))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('workDir', type=str,
+                        help="The input work directory containing 'reps.csv' and 'labels.csv'. Obtained from aligning a directory with 'align-dlib' and getting the representations with 'batch-represent'.")
+    parser.add_argument('--dlibFaceMean', type=str,
+                        help="Path to dlib's face predictor.",
+                        default=os.path.join(dlibModelDir, "mean.csv"))
+    parser.add_argument('--dlibFacePredictor', type=str,
+                        help="Path to dlib's face predictor.",
+                        default=os.path.join(dlibModelDir,
+                                             "shape_predictor_68_face_landmarks.dat"))
+    parser.add_argument('--dlibRoot', type=str,
+                        default=os.path.expanduser("~/src/dlib-18.16/python_examples"),
+                        help="dlib directory with the dlib.so Python library.")
+    parser.add_argument('--networkModel', type=str,
+                        help="Path to Torch network model.",
+                        default=os.path.join(openfaceModelDir, 'nn4.v1.t7'))
+    parser.add_argument('--imgDim', type=int,
+                        help="Default image dimension.", default=96)
+    parser.add_argument('--cuda', action='store_true')
+    parser.add_argument('--verbose', action='store_true')
+
+    subparsers = parser.add_subparsers(dest='mode', help="Mode")
+    trainParser = subparsers.add_parser('train',
+                                        help="Train a new classifier.")
+    inferParser = subparsers.add_parser('infer',
+                                        help='Predict who an image contains from a trained classifier.')
+    inferParser.add_argument('img', type=str,
+                            help="Input image.")
+
+    args = parser.parse_args()
+
+    sys.path.append(args.dlibRoot)
+    import dlib
+    from openface.alignment import NaiveDlib # Depends on dlib.
+
+    align = NaiveDlib(args.dlibFaceMean, args.dlibFacePredictor)
+    net = openface.TorchWrap(args.networkModel, imgDim=args.imgDim, cuda=args.cuda)
+
+    if args.mode == 'train':
+        train(args)
+    elif args.mode == 'infer':
+        infer(args)
--- a/images/examples/adams.jpg
+++ b/images/examples/adams.jpg
--- a/images/examples/carell.jpg
+++ b/images/examples/carell.jpg