Source code for bob.ip.mtcnn.FaceDetector

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>

import numpy
import os
import bob.core

logger = bob.core.log.setup("bob.ip.mtcnn")
bob.core.log.set_verbosity_level(logger, 3)
import os
os.environ['GLOG_minloglevel'] = '2'
import caffe
import bob.ip.base
import bob.io.image

from bob.ip.facedetect import BoundingBox
from .legacy import detect_face
from .utils import rectangle2bounding_box2


[docs]class FaceDetector(object):
    """
    Detects face and 5 landmarks using the MTCNN
    (https://github.com/kpzhang93/MTCNN_face_detection_alignment) from the
    paper.

    Zhang, Kaipeng, et al. "Joint face detection and alignment using multitask
    cascaded convolutional networks." IEEE Signal Processing Letters 23.10
    (2016): 1499-1503.

    """

    def __init__(self):
        """
        Load the caffe models
        """

        caffe_base_path = FaceDetector.get_mtcnn_model_path()

        # Default value from the example
        self.minsize = 20
        self.threshold = [0.6, 0.7, 0.7]
        self.factor = 0.709
        self.fastresize = False

        # Loading the models
        caffe.set_mode_cpu()
        self.p_net = caffe.Net(
            os.path.join(caffe_base_path, "det1.prototxt"),
            os.path.join(caffe_base_path, "det1.caffemodel"), caffe.TEST)
        self.r_net = caffe.Net(
            os.path.join(caffe_base_path, "det2.prototxt"),
            os.path.join(caffe_base_path, "det2.caffemodel"), caffe.TEST)
        self.o_net = caffe.Net(
            os.path.join(caffe_base_path, "det3.prototxt"),
            os.path.join(caffe_base_path, "det3.caffemodel"), caffe.TEST)

    def _convert_list_to_landmarks(self, points):
        """
        Convert the list to 10 landmarks to a dictionary with the points
        """

        landmarks = []
        possible_landmarks = ['reye', 'leye',
                              'nose', 'mouthleft', 'mouthright']
        for i in range(points.shape[0]):
            landmark = dict()
            for offset, p in enumerate(possible_landmarks):
                landmark[p] = (int(points[i][offset + 5]),
                               int(points[i][offset]))
            landmarks.append(landmark)

        return landmarks

[docs]    def detect_all_faces(self, image, return_bob_bb=True):
        """
        Detect all the faces with its respective landmarks, if any, in a
        COLORED image

        Parameters
        ----------
        image : numpy.array
            The color image [c, w, h]
        return_bob_bb : bool, optional
            If true, will return faces wrapped using
            :any:`bob.ip.facedetect.BoundingBox`.

        Returns
        -------
        object
            Returns two lists; the first on contains the bounding boxes with
            the detected faces and the second one contains list with the faces
            landmarks. The CNN returns 5 facial landmarks (leye, reye, nose,
            mouthleft, mouthright). If there's no face, `None` will be returned

        Raises
        ------
        ValueError
            When image.ndim is not 3.

        """
        assert image is not None

        if len(image.shape) != 3:
            raise ValueError("Only color images is supported")

        bb, landmarks = detect_face(bob.io.image.to_matplotlib(
            image), self.minsize, self.p_net, self.r_net, self.o_net, self.threshold, self.fastresize, self.factor)

        # If there's no face, return none
        if len(bb) == 0:
            return None, None

        if return_bob_bb:
            bb = rectangle2bounding_box2(bb)

        return bb, self._convert_list_to_landmarks(landmarks)

[docs]    def detect_single_face(self, image):
        """
        Returns the biggest face in a COLORED image, if any.

        Parameters
        ----------
        image : numpy.array
            numpy array with color image [c, w, h]

        Returns
        -------
        The face bounding box and its respective 5 landmarks (leye, reye, nose,
        mouthleft, mouthright). If there's no face, `None` will be returned

        """

        faces, landmarks = self.detect_all_faces(image)
        # Return None if
        if faces is None:
            return None, None

        index = numpy.argmax([(f.bottomright[0] - f.topleft[0])
                              * (f.bottomright[1] - f.topleft[1]) for f in faces])

        return faces[index], landmarks[index]

[docs]    def detect_crop_align(self, image, final_image_size=(160, 160)):
        """
        Detects the biggest face and crop it based in the eyes location
        using py:class:`bob.ip.base.FaceEyesNorm`.

        Final eyes location was inspired here: https://gitlab.idiap.ch/bob/bob.bio.caffe_face/blob/master/bob/bio/caffe_face/config/preprocessor/vgg_preprocessor.py

        **Parameters**
           image: numpy array with color image [c, w, h]
           final_image_size: Image dimensions [w, h]

        **Returns**
           The cropped image. If there's no face, `None` will be returned

        """

        face, landmark = self.detect_single_face(image)

        if face is None:
            return None

        CROPPED_IMAGE_WIDTH = final_image_size[0]
        CROPPED_IMAGE_HEIGHT = final_image_size[1]

        # final image position w.r.t the image size
        RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44,
                         CROPPED_IMAGE_WIDTH / 3.02)
        LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44,
                        CROPPED_IMAGE_WIDTH / 1.49)

        extractor = bob.ip.base.FaceEyesNorm(
            (CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), RIGHT_EYE_POS, LEFT_EYE_POS)
        return extractor(image, landmark['reye'], landmark['leye'])

[docs]    def detect_crop(self, image, final_image_size=(182, 182), margin=44):
        """
        Detects the biggest face and crop it

        **Parameters**
           image: numpy array with color image [c, w, h]
           final_image_size: Image dimensions [w, h]

        **Returns**
           The cropped image. If there's no face, `None` will be returned

        """

        face, landmark = self.detect_single_face(image)

        if face is None:
            return None

        top = numpy.uint(numpy.maximum(face.top - margin / 2, 0))
        left = numpy.uint(numpy.maximum(face.left - margin / 2, 0))

        bottom = numpy.uint(numpy.minimum(
            face.bottom + margin / 2, image.shape[1]))
        right = numpy.uint(numpy.minimum(
            face.right + margin / 2, image.shape[2]))

        cropped = image[:, top:bottom, left:right]

        dst = numpy.zeros(shape=(3, final_image_size[0], final_image_size[1]))
        bob.ip.base.scale(cropped, dst)
        return dst

[docs]    @staticmethod
    def get_mtcnn_model_path():
        import pkg_resources
        return pkg_resources.resource_filename(__name__, 'data')