Source code for bob.pad.face.preprocessor.FrameDifference

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri May 12 14:14:23 2017

@author: Olegs Nikisins
"""

#==============================================================================
# Import what is needed here:

from bob.bio.base.preprocessor import Preprocessor

import numpy as np

import bob.bio.video

import bob.ip.base

import bob.ip.color

import bob.ip.facedetect

import logging

#==============================================================================
# Main body:

logger = logging.getLogger(__name__)


class FrameDifference(Preprocessor):
    """
    This class is designed to compute frame differences for both facial and
    background regions. The constraint of minimal size of the face can be
    applied to input video selecting only the frames overcoming the threshold.
    This behavior is controlled by ``check_face_size_flag`` and ``min_face_size``
    arguments of the class.
    It is also possible to compute the frame differences for a limited number
    of frames specifying the ``number_of_frames`` parameter.

    **Parameters:**

    ``number_of_frames`` : :py:class:`int`
        The number of frames to extract the frame differences from.
        If ``None``, all frames of the input video are used. Default: ``None``.

    ``min_face_size`` : :py:class:`int`
        The minimal size of the face in pixels. Only valid when ``check_face_size_flag``
        is set to True. Default: 50.
    """

    def __init__(self,
                 number_of_frames=None,
                 min_face_size=50,
                 **kwargs):

        super(FrameDifference, self).__init__(
            number_of_frames=number_of_frames,
            min_face_size=min_face_size,
            **kwargs)

        self.number_of_frames = number_of_frames
        self.min_face_size = min_face_size

    #==========================================================================
[docs] def eval_face_differences(self, previous, current, annotations): """ Evaluates the normalized frame difference on the face region. If bounding_box is None or invalid, returns 0. **Parameters:** ``previous`` : 2D :py:class:`numpy.ndarray` Previous frame as a gray-scaled image ``current`` : 2D :py:class:`numpy.ndarray` The current frame as a gray-scaled image ``annotations`` : :py:class:`dict` A dictionary containing annotations of the face bounding box. Dictionary must be as follows ``{'topleft': (row, col), 'bottomright': (row, col)}``. **Returns:** ``face`` : :py:class:`float` A size normalized integral difference of facial regions in two input images. """ prev = previous[annotations['topleft'][0]:annotations['bottomright'][ 0], annotations['topleft'][1]:annotations['bottomright'][1]] curr = current[annotations['topleft'][0]:annotations['bottomright'][0], annotations['topleft'][1]:annotations['bottomright'][1]] face_diff = abs(curr.astype('int32') - prev.astype('int32')) face = face_diff.sum() face /= float(face_diff.size) return face
#==========================================================================
[docs] def eval_background_differences(self, previous, current, annotations, border=None): """ Evaluates the normalized frame difference on the background. If bounding_box is None or invalid, returns 0. **Parameters:** ``previous`` : 2D :py:class:`numpy.ndarray` Previous frame as a gray-scaled image ``current`` : 2D :py:class:`numpy.ndarray` The current frame as a gray-scaled image ``annotations`` : :py:class:`dict` A dictionary containing annotations of the face bounding box. Dictionary must be as follows ``{'topleft': (row, col), 'bottomright': (row, col)}``. ``border`` : :py:class:`int` The border size to consider. If set to ``None``, consider all image from the face location up to the end. Default: ``None``. **Returns:** ``bg`` : :py:class:`float` A size normalized integral difference of non-facial regions in two input images. """ height = annotations['bottomright'][0] - annotations['topleft'][0] width = annotations['bottomright'][1] - annotations['topleft'][1] full_diff = abs(current.astype('int32') - previous.astype('int32')) if border is None: full = full_diff.sum() full_size = full_diff.size else: y1 = annotations['topleft'][0] - border if y1 < 0: y1 = 0 x1 = annotations['topleft'][1] - border if x1 < 0: x1 = 0 y2 = y1 + height + (2 * border) if y2 > full_diff.shape[0]: y2 = full_diff.shape[0] x2 = x1 + width + (2 * border) if x2 > full_diff.shape[1]: x2 = full_diff.shape[1] full = full_diff[y1:y2, x1:x2].sum() full_size = full_diff[y1:y2, x1:x2].size face_diff = full_diff[annotations['topleft'][0]:( annotations['topleft'][0] + height), annotations['topleft'][1]:( annotations['topleft'][1] + width)] # calculates the differences in the face and background areas face = face_diff.sum() bg = full - face normalization = float(full_size - face_diff.size) if normalization < 1: # prevents zero division bg = 0.0 else: bg /= float(full_size - face_diff.size) return bg
#==========================================================================
[docs] def check_face_size(self, frame_container, annotations, min_face_size): """ Return the FrameContainer containing the frames with faces of the size overcoming the specified threshold. The annotations for the selected frames are also returned. **Parameters:** ``frame_container`` : FrameContainer Video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` for further details. ``annotations`` : :py:class:`dict` A dictionary containing the annotations for each frame in the video. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. ``min_face_size`` : :py:class:`int` The minimal size of the face in pixels. **Returns:** ``selected_frames`` : FrameContainer Selected frames stored in the FrameContainer. ``selected_annotations`` : :py:class:`dict` A dictionary containing the annotations for selected frames. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. """ selected_frames = bob.bio.video.FrameContainer( ) # initialize the FrameContainer selected_annotations = {} selected_frame_idx = 0 for idx in range(0, len(annotations)): # idx - frame index # annotations for particular frame frame_annotations = annotations[str(idx)] if not frame_annotations: continue # Estimate bottomright and topleft if they are not available: if 'topleft' not in frame_annotations: bbx = bob.ip.facedetect.bounding_box_from_annotation( **frame_annotations) frame_annotations['topleft'] = bbx.topleft frame_annotations['bottomright'] = bbx.bottomright # size of current face face_size = np.min( np.array(frame_annotations['bottomright']) - np.array(frame_annotations['topleft'])) if face_size >= min_face_size: # check if face size is above the threshold selected_frame = frame_container[idx][1] # get current frame selected_frames.add( selected_frame_idx, selected_frame) # add current frame to FrameContainer selected_annotations[str(selected_frame_idx)] = annotations[ str(idx)] selected_frame_idx = selected_frame_idx + 1 return selected_frames, selected_annotations
#==========================================================================
[docs] def comp_face_bg_diff(self, frames, annotations, number_of_frames=None): """ This function computes the frame differences for both facial and background regions. These parameters are computed for ``number_of_frames`` frames in the input FrameContainer. **Parameters:** ``frames`` : FrameContainer RGB video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` for further details. ``annotations`` : :py:class:`dict` A dictionary containing the annotations for each frame in the video. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. ``number_of_frames`` : :py:class:`int` The number of frames to use in processing. If ``None``, all frames of the input video are used. Default: ``None``. **Returns:** ``diff`` : 2D :py:class:`numpy.ndarray` An array of the size ``(number_of_frames - 1) x 2``. The first column contains frame differences of facial regions. The second column contains frame differences of non-facial/background regions. """ # Compute the number of frames to process: if number_of_frames is not None: number_of_frames = np.min([len(frames), number_of_frames]) else: number_of_frames = len(frames) previous = frames[0][1] # the first frame in the video if len(previous.shape) == 3: # if RGB convert to gray-scale previous = bob.ip.color.rgb_to_gray(previous) diff = [] for k in range(1, number_of_frames): current = frames[k][1] if len(current.shape) == 3: # if RGB convert to gray-scale current = bob.ip.color.rgb_to_gray(current) face_diff = self.eval_face_differences(previous, current, annotations[str(k)]) bg_diff = self.eval_background_differences( previous, current, annotations[str(k)], None) diff.append((face_diff, bg_diff)) # swap buffers: current <=> previous tmp = previous previous = current current = tmp if not diff: # if list is empty diff = [(np.NaN, np.NaN)] diff = np.vstack(diff) return diff
#==========================================================================
[docs] def select_annotated_frames(self, frames, annotations): """ Select only annotated frames in the input FrameContainer ``frames``. **Parameters:** ``frames`` : FrameContainer Video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` for further details. ``annotations`` : :py:class:`dict` A dictionary containing the annotations for each frame in the video. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. **Returns:** ``cleaned_frame_container`` : FrameContainer FrameContainer containing the annotated frames only. ``cleaned_annotations`` : :py:class:`dict` A dictionary containing the annotations for each frame in the output video. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. """ annotated_frames = np.sort([ np.int(item) for item in annotations.keys() ]) # annotated frame numbers available_frames = range( 0, len(frames)) # frame numbers in the input video valid_frames = list( set(annotated_frames).intersection( available_frames)) # valid and annotated frames cleaned_frame_container = bob.bio.video.FrameContainer( ) # initialize the FrameContainer cleaned_annotations = {} for idx, valid_frame_num in enumerate(valid_frames): # valid_frame_num - is the number of the original frame having annotations cleaned_annotations[str(idx)] = annotations[str( valid_frame_num)] # correct the frame numbers selected_frame = frames[valid_frame_num][1] # get current frame cleaned_frame_container.add( idx, selected_frame) # add current frame to FrameContainer return cleaned_frame_container, cleaned_annotations
#========================================================================== def __call__(self, frames, annotations): """ This method calls the ``comp_face_bg_diff`` function of this class computing the frame differences for both facial and background regions. The frame differences are computed for selected frames, which are returned by ``check_face_size`` function of this class. **Parameters:** ``frames`` : FrameContainer RGB video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` for further details. ``annotations`` : :py:class:`dict` A dictionary containing the annotations for each frame in the video. Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` is the dictionary defining the coordinates of the face bounding box in frame N. **Returns:** ``diff`` : 2D :py:class:`numpy.ndarray` An array of the size ``(number_of_frames - 1) x 2``. The first column contains frame differences of facial regions. The second column contains frame differences of non-facial/background regions. """ if len(frames) != len(annotations): # if some annotations are missing # Select only annotated frames: frames, annotations = self.select_annotated_frames( frames, annotations) selected_frames, selected_annotations = self.check_face_size( frames, annotations, self.min_face_size) if not len(selected_annotations): logger.warn("None of the annotations are valid.") return None diff = self.comp_face_bg_diff( frames=selected_frames, annotations=selected_annotations, number_of_frames=self.number_of_frames) return diff