Merge branch 'experimental/align'

2015-12-10 21:09:01 -05:00 · 2015-12-10 21:09:01 -05:00 · 0e99d0c499
parent 05a1b87c6f 12a8648959
commit 0e99d0c499
15 changed files with 243 additions and 226 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,8 @@ images/examples-aligned

 evaluation/*/*.csv
 evaluation/*/*.pdf
+evaluation/attic/*/*.csv
+evaluation/attic/*/*.pdf

 demos/web/bower_components
 demos/web/unknown*.npy
--- a/demos/classifier.py
+++ b/demos/classifier.py
@ -50,17 +50,19 @@ openfaceModelDir = os.path.join(modelDir, 'openface')


 def getRep(imgPath):
-    img = cv2.imread(imgPath)
-    if img is None:
+    bgrImg = cv2.imread(imgPath)
+    if bgrImg is None:
        raise Exception("Unable to load image: {}".format(imgPath))
-    if args.verbose:
-        print("  + Original size: {}".format(img.shape))
+    rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)

-    bb = align.getLargestFaceBoundingBox(img)
+    if args.verbose:
+        print("  + Original size: {}".format(rgbImg.shape))
+
+    bb = align.getLargestFaceBoundingBox(rgbImg)
    if bb is None:
        raise Exception("Unable to find a face: {}".format(imgPath))

-    alignedFace = align.alignImg("affine", args.imgDim, img, bb)
+    alignedFace = align.alignImg("affine", args.imgDim, bgrImg, bb)
    if alignedFace is None:
        raise Exception("Unable to align image: {}".format(imgPath))

@ -111,9 +113,6 @@ def infer(args):
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()

-    parser.add_argument('--dlibFaceMean', type=str,
-                        help="Path to dlib's face predictor.",
-                        default=os.path.join(dlibModelDir, "mean.csv"))
    parser.add_argument('--dlibFacePredictor', type=str,
                        help="Path to dlib's face predictor.",
                        default=os.path.join(dlibModelDir,
@ -158,11 +157,11 @@ network and classification models:

 Use `--networkModel` to set a non-standard Torch network model.""")

-    sys.path.append(args.dlibRoot)
+    sys.path = [args.dlibRoot] + sys.path
    import dlib
    from openface.alignment import NaiveDlib  # Depends on dlib.

-    align = NaiveDlib(args.dlibFaceMean, args.dlibFacePredictor)
+    align = NaiveDlib(args.dlibFacePredictor)
    net = openface.TorchWrap(
        args.networkModel, imgDim=args.imgDim, cuda=args.cuda)

--- a/demos/compare.py
+++ b/demos/compare.py
@ -44,8 +44,6 @@ openfaceModelDir = os.path.join(modelDir, 'openface')
 parser = argparse.ArgumentParser()

 parser.add_argument('imgs', type=str, nargs='+', help="Input images.")
-parser.add_argument('--dlibFaceMean', type=str, help="Path to dlib's face predictor.",
-                    default=os.path.join(dlibModelDir, "mean.csv"))
 parser.add_argument('--dlibFacePredictor', type=str, help="Path to dlib's face predictor.",
                    default=os.path.join(dlibModelDir, "shape_predictor_68_face_landmarks.dat"))
 parser.add_argument('--dlibRoot', type=str,
@ -61,7 +59,7 @@ parser.add_argument('--verbose', action='store_true')

 args = parser.parse_args()

-sys.path.append(args.dlibRoot)
+sys.path = [args.dlibRoot] + sys.path
 import dlib

 from openface.alignment import NaiveDlib  # Depends on dlib.
@ -70,7 +68,7 @@ if args.verbose:
        time.time() - start))

 start = time.time()
-align = NaiveDlib(args.dlibFaceMean, args.dlibFacePredictor)
+align = NaiveDlib(args.dlibFacePredictor)
 net = openface.TorchWrap(args.networkModel, imgDim=args.imgDim, cuda=args.cuda)
 if args.verbose:
    print("Loading the dlib and OpenFace models took {} seconds.".format(
@ -80,21 +78,23 @@ if args.verbose:
 def getRep(imgPath):
    if args.verbose:
        print("Processing {}.".format(imgPath))
-    img = cv2.imread(imgPath)
-    if img is None:
+    bgrImg = cv2.imread(imgPath)
+    if bgrImg is None:
        raise Exception("Unable to load image: {}".format(imgPath))
+    rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)
+
    if args.verbose:
-        print("  + Original size: {}".format(img.shape))
+        print("  + Original size: {}".format(rgbImg.shape))

    start = time.time()
-    bb = align.getLargestFaceBoundingBox(img)
+    bb = align.getLargestFaceBoundingBox(rgbImg)
    if bb is None:
        raise Exception("Unable to find a face: {}".format(imgPath))
    if args.verbose:
        print("  + Face detection took {} seconds.".format(time.time() - start))

    start = time.time()
-    alignedFace = align.alignImg("affine", args.imgDim, img, bb)
+    alignedFace = align.alignImg("affine", args.imgDim, rgbImg, bb)
    if alignedFace is None:
        raise Exception("Unable to align image: {}".format(imgPath))
    if args.verbose:
@ -112,4 +112,4 @@ def getRep(imgPath):
 for (img1, img2) in itertools.combinations(args.imgs, 2):
    d = getRep(img1) - getRep(img2)
    print("Comparing {} with {}.".format(img1, img2))
-    print("  + Squared l2 distance between representations: {}".format(np.dot(d, d)))
+    print("  + Squared l2 distance between representations: {:0.3f}".format(np.dot(d, d)))
--- a/demos/web/create-unknown-vectors.py
+++ b/demos/web/create-unknown-vectors.py
@ -25,8 +25,6 @@ import random

 import cv2

-from skimage import io
-
 import openface
 from openface.alignment import NaiveDlib
 from openface.data import iterImgs
--- a/demos/web/server.py
+++ b/demos/web/server.py
@ -54,8 +54,6 @@ dlibModelDir = os.path.join(modelDir, 'dlib')
 openfaceModelDir = os.path.join(modelDir, 'openface')

 parser = argparse.ArgumentParser()
-parser.add_argument('--dlibFaceMean', type=str, help="Path to dlib's face predictor.",
-                    default=os.path.join(dlibModelDir, "mean.csv"))
 parser.add_argument('--dlibFacePredictor', type=str, help="Path to dlib's face predictor.",
                    default=os.path.join(dlibModelDir, "shape_predictor_68_face_landmarks.dat"))
 parser.add_argument('--dlibRoot', type=str,
@ -72,11 +70,11 @@ parser.add_argument('--unknown', type=bool, default=False,

 args = parser.parse_args()

-sys.path.append(args.dlibRoot)
+sys.path = [args.dlibRoot] + sys.path
 import dlib
 from openface.alignment import NaiveDlib  # Depends on dlib.

-align = NaiveDlib(args.dlibFaceMean, args.dlibFacePredictor)
+align = NaiveDlib(args.dlibFacePredictor)
 net = openface.TorchWrap(args.networkModel, imgDim=args.imgDim, cuda=args.cuda)


--- a/docs/accuracy.md
+++ b/docs/accuracy.md
@ -5,8 +5,8 @@ on the standard
 [LFW](http://vis-www.cs.umass.edu/lfw/results.html)
 benchmark.
 We had to fallback to using the deep funneled versions for
-152 of 13233 images because dlib failed to detect a face or landmarks.
-We obtain a mean accuracy of 0.8483 &plusmn; 0.0172 with an AUC of 0.923.
+58 of 13233 images because dlib failed to detect a face or landmarks.
+We obtain a mean accuracy of 0.8138 &plusmn; 0.0149 with an AUC of 0.893.
 For comparison, training with Google-scale data results in an
 accuracy of .9963 &plusmn; 0.009.

--- a/docs/demo-2-comparison.md
+++ b/docs/demo-2-comparison.md
@ -13,18 +13,18 @@ Eric Clapton were generated with
 |---|---|---|---|
 | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-1.jpg' width='200px'></img> | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-2.jpg' width='200px'></img> | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/clapton-1.jpg' width='200px'></img> | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/clapton-2.jpg' width='200px'></img> |

-The following table shows that a distance threshold of `0.5` would
+The following table shows that a distance threshold of `0.8` would
 distinguish these two people.
 In practice, further experimentation should be done on the distance threshold.
 On our LFW experiments, the mean threshold across multiple
-experiments is 0.71 &plusmn; 0.027,
+experiments is about 0.68,
 see [accuracies.txt](https://github.com/cmusatyalab/openface/blob/master/evaluation/lfw.nn4.v1.epoch-177/accuracies.txt).

 | Image 1 | Image 2 | Distance |
 |---|---|---|
-| Lennon 1 | Lennon 2 | 0.310 |
-| Lennon 1 | Clapton 1 | 1.241 |
-| Lennon 1 | Clapton 2 | 1.056 |
-| Lennon 2 | Clapton 1 | 1.386 |
-| Lennon 2 | Clapton 2 | 1.073 |
-| Clapton 1 | Clapton 2 | 0.259 |
+| Lennon 1 | Lennon 2 | 0.298 |
+| Lennon 1 | Clapton 1 | 0.985 |
+| Lennon 1 | Clapton 2 | 0.855 |
+| Lennon 2 | Clapton 1 | 0.904 |
+| Lennon 2 | Clapton 2 | 0.955 |
+| Clapton 1 | Clapton 2 | 0.712 |
--- a/docs/demo-3-classifier.md
+++ b/docs/demo-3-classifier.md
@ -88,7 +88,7 @@ Run the classifier on your images with:

 | Person | Image | Prediction | Confidence |
 |---|---|---|---|
-| Carell | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/carell.jpg' width='200px'></img> | SteveCarell | 0.78 |
-| Adams | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/adams.jpg' width='200px'></img> | AmyAdams | 0.87 |
-| Lennon 1 (Unknown) | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-1.jpg' width='200px'></img> | DavidBoreanaz | 0.28 |
-| Lennon 2 (Unknown) | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-2.jpg' width='200px'></img> | DavidBoreanaz | 0.56 |
+| Carell | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/carell.jpg' width='200px'></img> | SteveCarell | 0.96 |
+| Adams | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/adams.jpg' width='200px'></img> | AmyAdams | 0.98 |
+| Lennon 1 (Unknown) | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-1.jpg' width='200px'></img> | DavidBoreanaz | 0.27 |
+| Lennon 2 (Unknown) | <img src='https://raw.githubusercontent.com/cmusatyalab/openface/master/images/examples/lennon-2.jpg' width='200px'></img> | DavidBoreanaz | 0.43 |
--- a/evaluation/attic/lfw.nn4.v1.epoch-177.orig-alignment/accuracies.txt
+++ b/evaluation/attic/lfw.nn4.v1.epoch-177.orig-alignment/accuracies.txt
@ -0,0 +1,13 @@
+fold, threshold, accuracy
+0, 0.71, 0.86
+1, 0.71, 0.83
+2, 0.77, 0.82
+3, 0.70, 0.84
+4, 0.68, 0.82
+5, 0.71, 0.85
+6, 0.69, 0.85
+7, 0.71, 0.85
+8, 0.71, 0.86
+9, 0.67, 0.88
+
+avg, 0.8483 +/- 0.0172
--- a/evaluation/lfw.nn4.v1.epoch-177/accuracies.txt
+++ b/evaluation/lfw.nn4.v1.epoch-177/accuracies.txt
@ -1,13 +1,13 @@
 fold, threshold, accuracy
-0, 0.71, 0.86
-1, 0.71, 0.83
-2, 0.77, 0.82
-3, 0.70, 0.84
-4, 0.68, 0.82
-5, 0.71, 0.85
-6, 0.69, 0.85
-7, 0.71, 0.85
-8, 0.71, 0.86
-9, 0.67, 0.88
+0, 0.68, 0.82
+1, 0.68, 0.81
+2, 0.68, 0.79
+3, 0.68, 0.80
+4, 0.68, 0.81
+5, 0.68, 0.83
+6, 0.68, 0.83
+7, 0.68, 0.82
+8, 0.68, 0.81
+9, 0.60, 0.80

-avg, 0.8483 +/- 0.0172
+avg, 0.8138 +/- 0.0149
--- a/images/nn4.v1.lfw.roc.png
+++ b/images/nn4.v1.lfw.roc.png
--- a/openface/alignment/naive_dlib.py
+++ b/openface/alignment/naive_dlib.py
@ -20,170 +20,92 @@ import os
 import random
 import sys

-from skimage import io
-
 from .. import helper
 from .. import data

+TEMPLATE = np.float32([
+    (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
+    (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
+    (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
+    (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
+    (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
+    (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
+    (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
+    (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
+    (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
+    (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
+    (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
+    (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
+    (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
+    (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
+    (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
+    (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
+    (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
+    (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
+    (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
+    (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
+    (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
+    (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
+    (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
+    (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
+    (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
+    (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
+    (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
+    (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
+    (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
+    (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
+    (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
+    (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
+    (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
+    (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
+
+TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
+MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)

 class NaiveDlib:
+    # https://github.com/cmusatyalab/openface/blob/master/images/dlib-landmark-mean.png
+    INNER_EYES_AND_BOTTOM_LIP = np.array([39, 42, 57])
+    OUTER_EYES_AND_NOSE = np.array([36, 45, 33])

-    def __init__(self, faceMean, facePredictor):
+    def __init__(self, facePredictor):
        """Initialize the dlib-based alignment."""
        self.detector = dlib.get_frontal_face_detector()
-        self.normMeanAlignPoints = loadMeanPoints(faceMean)
        self.predictor = dlib.shape_predictor(facePredictor)

-    def getAllFaceBoundingBoxes(self, img):
-        return self.detector(img, 1)
+    def getAllFaceBoundingBoxes(self, rgbImg):
+        try:
+            return self.detector(rgbImg, 1)
+        except Exception as e:
+            print("Warning: {}".format(e))
+            # In rare cases, exceptions are thrown.
+            return []

-    def getLargestFaceBoundingBox(self, img):
-        faces = self.detector(img, 1)
+    def getLargestFaceBoundingBox(self, rgbImg):
+        faces = self.getAllFaceBoundingBoxes(rgbImg)
        if len(faces) > 0:
            return max(faces, key=lambda rect: rect.width() * rect.height())

-    def align(self, img, bb):
-        points = self.predictor(img, bb)
+    def align(self, rgbImg, bb):
+        points = self.predictor(rgbImg, bb)
        return list(map(lambda p: (p.x, p.y), points.parts()))

-    def alignImg(self, method, size, img, bb=None,
-                 outputPrefix=None, outputDebug=False,
-                 expandBox=False, alignPoints=None):
-        if outputPrefix:
-            helper.mkdirP(os.path.dirname(outputPrefix))
-
-            def getName(tag=None):
-                if tag is None:
-                    return "{}.png".format(outputPrefix)
-                else:
-                    return "{}-{}.png".format(outputPrefix, tag)
-
+    def alignImg(self, method, size, rgbImg, bb=None,
+                 landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP):
        if bb is None:
-            try:
-                bb = self.getLargestFaceBoundingBox(img)
-            except Exception as e:
-                print("Warning: {}".format(e))
-                # In rare cases, exceptions are thrown.
-                return
+            bb = self.getLargestFaceBoundingBox(rgbImg)
            if bb is None:
-                # Most failed detection attempts return here.
                return

-        if alignPoints is None:
-            alignPoints = self.align(img, bb)
-        meanAlignPoints = transformPoints(self.normMeanAlignPoints, bb, True)
+        if landmarks is None:
+            landmarks = self.align(rgbImg, bb)

-        (xs, ys) = zip(*meanAlignPoints)
-        tightBb = dlib.rectangle(left=min(xs), right=max(xs),
-                                 top=min(ys), bottom=max(ys))
+        npLandmarks = np.float32(landmarks)

-        if method != 'tightcrop':
-            npAlignPoints = np.float32(alignPoints)
-            npMeanAlignPoints = np.float32(meanAlignPoints)
-
-        if method == 'tightcrop':
-            warpedImg = img
-        elif method == 'affine':
-            ss = np.array([39, 42, 57])  # Eyes and bottom lip.
-            npAlignPointsSS = npAlignPoints[ss]
-            npMeanAlignPointsSS = npMeanAlignPoints[ss]
-            H = cv2.getAffineTransform(npAlignPointsSS, npMeanAlignPointsSS)
-            warpedImg = cv2.warpAffine(img, H, np.shape(img)[0:2])
-        elif method == 'perspective':
-            ss = np.array([39, 42, 48, 54])  # Eyes and corners of mouth.
-            npAlignPointsSS = npAlignPoints[ss]
-            npMeanAlignPointsSS = npMeanAlignPoints[ss]
-            H = cv2.getPerspectiveTransform(
-                npAlignPointsSS, npMeanAlignPointsSS)
-            warpedImg = cv2.warpPerspective(img, H, np.shape(img)[0:2])
-        elif method == 'homography':
-            (H, mask) = cv2.findHomography(npAlignPoints, npMeanAlignPoints,
-                                           method=cv2.LMEDS)
-            warpedImg = cv2.warpPerspective(img, H, np.shape(img)[0:2])
+        if method == 'affine':
+            H = cv2.getAffineTransform(npLandmarks[landmarkIndices],
+                                       size*MINMAX_TEMPLATE[landmarkIndices])
+            thumbnail = cv2.warpAffine(rgbImg, H, (size, size))
        else:
-            print("Error: method '{}' is unimplemented.".format(method))
-            sys.exit(-1)
+            raise Exception('Unrecognized method: {}'.format(method))

-        if method == 'tightcrop':
-            wAlignPoints = alignPoints
-        else:
-            wBb = self.getLargestFaceBoundingBox(warpedImg)
-            if wBb is None:
-                return
-            wAlignPoints = self.align(warpedImg, wBb)
-            wMeanAlignPoints = transformPoints(
-                self.normMeanAlignPoints, wBb, True)
-
-        if outputDebug:
-            annotatedImg = annotate(img, bb, alignPoints, meanAlignPoints)
-            io.imsave(getName("orig"), img)
-            io.imsave(getName("annotated"), annotatedImg)
-
-            if method != 'tightcrop':
-                wAnnotatedImg = annotate(warpedImg, wBb,
-                                         wAlignPoints, wMeanAlignPoints)
-                io.imsave(getName("warped"), warpedImg)
-                io.imsave(getName("warped-annotated"), wAnnotatedImg)
-
-        if len(warpedImg.shape) != 3:
-            print("  + Warning: Result does not have 3 dimensions.")
-            return None
-
-        (xs, ys) = zip(*wAlignPoints)
-        xRange = max(xs) - min(xs)
-        yRange = max(ys) - min(ys)
-        if expandBox:
-            (l, r, t, b) = (min(xs) - 0.20 * xRange, max(xs) + 0.20 * xRange,
-                            min(ys) - 0.65 * yRange, max(ys) + 0.20 * yRange)
-        else:
-            (l, r, t, b) = (min(xs), max(xs), min(ys), max(ys))
-        (w, h, _) = warpedImg.shape
-        if 0 <= l <= w and 0 <= r <= w and 0 <= b <= h and 0 <= t <= h:
-            cwImg = cv2.resize(warpedImg[t:b, l:r], (size, size))
-            h, edges = np.histogram(cwImg.ravel(), 16, [0, 256])
-            s = sum(h)
-            if any(h > 0.65 * s):
-                print("Warning: Image is likely a single color.")
-                return
-        else:
-            print("Warning: Unable to align and crop to the "
-                  "face's bounding box.")
-            return
-
-        if outputDebug:
-            io.imsave(getName(), cwImg)
-        return cwImg
-
-
-def transformPoints(points, bb, toImgCoords):
-    if toImgCoords:
-        def scale(p):
-            (x, y) = p
-            return (int((x * bb.width()) + bb.left()),
-                    int((y * bb.height()) + bb.top()))
-    else:
-        def scale(p):
-            (x, y) = p
-            return (float(x - bb.left()) / bb.width(),
-                    float(y - bb.top()) / bb.height())
-    return list(map(scale, points))
-
-
-def loadMeanPoints(modelFname):
-    def parse(line):
-        (x, y) = line.strip().split(",")
-        return (float(x), float(y))
-    with open(modelFname, 'r') as f:
-        return [parse(line) for line in f]
-
-
-def annotate(img, box, points=None, meanPoints=None):
-    a = np.copy(img)
-    bl = (box.left(), box.bottom())
-    tr = (box.right(), box.top())
-    cv2.rectangle(a, bl, tr, color=(153, 255, 204), thickness=3)
-    for p in points:
-        cv2.circle(a, center=p, radius=3, color=(102, 204, 255), thickness=-1)
-    for p in meanPoints:
-        cv2.circle(a, center=p, radius=3, color=(0, 0, 0), thickness=-1)
-    return a
+        return thumbnail
--- a/openface/data.py
+++ b/openface/data.py
@ -14,8 +14,7 @@

 import os

-from skimage import io
-
+import cv2

 class Image:

@ -25,17 +24,20 @@ class Image:
        self.path = path
        self.rgb = None

-    def getRGB(self, cache=False):
-        if self.rgb is not None:
-            return self.rgb
+    def getBGR(self):
+        try:
+            bgr = cv2.imread(self.path)
+        except:
+            bgr = None
+        return bgr
+
+    def getRGB(self):
+        bgr = self.getBGR()
+        if bgr is not None:
+            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        else:
-            try:
-                rgb = io.imread(self.path)
-            except:
-                rgb = None
-            if cache:
-                self.rgb = rgb
-            return rgb
+            rgb = None
+        return rgb

    def __repr__(self):
        return "({}, {})".format(self.cls, self.name)
--- a/util/align-dlib.py
+++ b/util/align-dlib.py
@ -24,8 +24,6 @@ import cv2
 import random
 import shutil

-from skimage import io
-
 modelDir = os.path.join(fileDir, '..', 'models')
 dlibModelDir = os.path.join(modelDir, 'dlib')
 openfaceModelDir = os.path.join(modelDir, 'openface')
@ -84,7 +82,14 @@ def alignMain(args):
    # Shuffle so multiple versions can be run at once.
    random.shuffle(imgs)

-    align = NaiveDlib(args.dlibFaceMean, args.dlibFacePredictor)
+    if args.landmarks == 'outerEyesAndNose':
+        landmarkIndices = NaiveDlib.OUTER_EYES_AND_NOSE
+    elif args.landmarks == 'innerEyesAndBottomLip':
+        landmarkIndices = NaiveDlib.INNER_EYES_AND_BOTTOM_LIP
+    else:
+        raise Exception("Landmarks unrecognized: {}".format(args.landmarks))
+
+    align = NaiveDlib(args.dlibFacePredictor)

    nFallbacks = 0
    for imgObject in imgs:
@ -94,11 +99,10 @@ def alignMain(args):
        imgName = outputPrefix + ".png"

        if not os.path.isfile(imgName):
-            rgb = imgObject.getRGB(cache=False)
-            out = align.alignImg(args.method, args.size, rgb,
-                                 outputPrefix=outputPrefix,
-                                 outputDebug=args.outputDebugImages)
-            if args.fallbackLfw and out is None:
+            rgb = imgObject.getRGB()
+            outRgb = align.alignImg('affine', args.size, rgb,
+                                    landmarkIndices = landmarkIndices)
+            if args.fallbackLfw and outRgb is None:
                nFallbacks += 1
                deepFunneled = "{}/{}.jpg".format(os.path.join(args.fallbackLfw,
                                                               imgObject.cls),
@ -107,9 +111,12 @@ def alignMain(args):
                                                                          imgObject.cls),
                                                             imgObject.name))

-            if out is not None:
-                io.imsave(imgName, out)
-    print('nFallbacks:', nFallbacks)
+            if outRgb is not None:
+                outBgr = cv2.cvtColor(outRgb, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(imgName, outBgr)
+
+    if args.fallbackLfw:
+        print('nFallbacks:', nFallbacks)

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
@ -131,22 +138,19 @@ if __name__ == '__main__':
                                   default=0)  # <= 0 ===> all imgs
    alignmentParser = subparsers.add_parser(
        'align', help='Align a directory of images.')
-    alignmentParser.add_argument('method', type=str,
-                                 choices=['tightcrop', 'affine',
-                                          'perspective', 'homography'],
-                                 help="Alignment method.")
+    alignmentParser.add_argument('landmarks', type=str,
+                                 choices=['outerEyesAndNose', 'innerEyesAndBottomLip'],
+                                 help='The landmarks to align to.')
    alignmentParser.add_argument(
        'outputDir', type=str, help="Output directory of aligned images.")
-    alignmentParser.add_argument('--outputDebugImages', action='store_true',
-                                 help='Output annotated images for debugging and presenting.')
    alignmentParser.add_argument('--size', type=int, help="Default image size.",
-                                 default=152)
+                                 default=96)
    alignmentParser.add_argument('--fallbackLfw', type=str,
                                 help="If alignment doesn't work, fallback to copying the deep funneled version from this directory..")

    args = parser.parse_args()

-    sys.path.append(args.dlibRoot)
+    sys.path = [args.dlibRoot] + sys.path
    import openface
    import openface.helper
    from openface.data import iterImgs
--- a/util/annotate-image.py
+++ b/util/annotate-image.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python2
+#
+# Copyright 2015 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+fileDir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(fileDir, ".."))
+
+import argparse
+import cv2
+
+modelDir = os.path.join(fileDir, '..', 'models')
+dlibModelDir = os.path.join(modelDir, 'dlib')
+openfaceModelDir = os.path.join(modelDir, 'openface')
+
+
+def main(args):
+    align = NaiveDlib(args.dlibFacePredictor)
+
+    bgrImg = cv2.imread(args.img)
+    if bgrImg is None:
+        raise Exception("Unable to load image: {}".format(imgPath))
+    rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)
+
+    bb = align.getLargestFaceBoundingBox(rgbImg)
+    if bb is None:
+        raise Exception("Unable to find a face: {}".format(imgPath))
+
+    landmarks = align.align(rgbImg, bb)
+    if landmarks is None:
+        raise Exception("Unable to align image: {}".format(imgPath))
+    alignedFace = align.alignImg("affine", args.size, rgbImg, bb, landmarks)
+
+    bl = (bb.left(), bb.bottom())
+    tr = (bb.right(), bb.top())
+    cv2.rectangle(bgrImg, bl, tr, color=(153, 255, 204), thickness=3)
+    for landmark in landmarks:
+        cv2.circle(bgrImg, center=landmark, radius=3, color=(102, 204, 255), thickness=-1)
+    print("Saving image to 'annotated.png'")
+    cv2.imwrite("annotated.png", bgrImg)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('img', type=str, help="Input image.")
+    parser.add_argument('--dlibFacePredictor', type=str, help="Path to dlib's face predictor.",
+                        default=os.path.join(dlibModelDir, "shape_predictor_68_face_landmarks.dat"))
+    parser.add_argument('--dlibRoot', type=str,
+                        default=os.path.expanduser(
+                            "~/src/dlib-18.16/python_examples"),
+                        help="dlib directory with the dlib.so Python library.")
+
+    parser.add_argument('landmarks', type=str,
+                        choices=['outerEyesAndNose', 'innerEyesAndBottomLip'],
+                        help='The landmarks to align to.')
+    parser.add_argument('--size', type=int, help="Default image size.",
+                        default=96)
+    args = parser.parse_args()
+
+    sys.path = [args.dlibRoot] + sys.path
+    import openface
+    import openface.helper
+    from openface.data import iterImgs
+    from openface.alignment import NaiveDlib
+
+    main(args)