Source code for bob.pad.base.algorithm.OneClassGMM

# -*- coding: utf-8 -*-
"""
Created on Mon Aug 28 16:47:47 2017
@author: Olegs Nikisins
"""

# ==============================================================================
# Import what is needed here:

from bob.bio.video.utils import FrameContainer
from bob.pad.base.algorithm import Algorithm
from bob.pad.base.utils import convert_frame_cont_to_array, mean_std_normalize, convert_and_prepare_features
from sklearn import mixture
import bob.io.base
import logging
import numpy as np

logger = logging.getLogger(__name__)

# ==============================================================================
# Main body :


class OneClassGMM(Algorithm):
    """
    This class is designed to train a OneClassGMM based PAD system. The OneClassGMM is trained
    using data of one class (real class) only. The procedure is the following:

    1. First, the training data is mean-std normalized using mean and std of the
       real class only.

    2. Second, the OneClassGMM with ``n_components`` Gaussians is trained using samples
       of the real class.

    3. The input features are next classified using pre-trained OneClassGMM machine.

    **Parameters:**

    ``n_components`` : :py:class:`int`
        Number of Gaussians in the OneClassGMM. Default: 1 .

    ``random_state`` : :py:class:`int`
        A seed for the random number generator used in the initialization of
        the OneClassGMM. Default: 3 .

    ``frame_level_scores_flag`` : :py:class:`bool`
        Return scores for each frame individually if True. Otherwise, return a
        single score per video. Default: False.
    """

    def __init__(self,
                 n_components=1,
                 random_state=3,
                 frame_level_scores_flag=False,
                 covariance_type='full',
                 reg_covar=1e-06,
                 normalize_features=False,
                 ):

        Algorithm.__init__(
            self,
            n_components=n_components,
            random_state=random_state,
            frame_level_scores_flag=frame_level_scores_flag,
            performs_projection=True,
            requires_projector_training=True)

        self.n_components = n_components
        self.random_state = random_state
        self.frame_level_scores_flag = frame_level_scores_flag
        self.covariance_type = covariance_type
        self.reg_covar = reg_covar
        self.normalize_features = normalize_features

        self.machine = None  # this argument will be updated with pretrained OneClassGMM machine
        self.features_mean = None  # this argument will be updated with features mean
        self.features_std = None  # this argument will be updated with features std

        # names of the arguments of the pretrained OneClassGMM machine to be saved/loaded to/from HDF5 file:
        self.gmm_param_keys = [
            "covariance_type", "covariances_", "lower_bound_", "means_",
            "n_components", "weights_", "converged_", "precisions_",
            "precisions_cholesky_"
        ]

    # ==========================================================================
[docs] def train_gmm(self, real): """ Train OneClassGMM classifier given real class. Prior to the training the data is mean-std normalized. **Parameters:** ``real`` : 2D :py:class:`numpy.ndarray` Training features for the real class. **Returns:** ``machine`` : object A trained OneClassGMM machine. ``features_mean`` : 1D :py:class:`numpy.ndarray` Mean of the features. ``features_std`` : 1D :py:class:`numpy.ndarray` Standart deviation of the features. """ # real is now mean-std normalized if self.normalize_features: features_norm, features_mean, features_std = mean_std_normalize(real, copy=False) else: features_norm = real features_mean = np.zeros(real.shape[1:], dtype=real.dtype) features_std = np.ones(real.shape[1:], dtype=real.dtype) if isinstance(self.n_components, (tuple, list)) or isinstance(self.covariance_type, (tuple, list)): # perform grid search on covariance_type and n_components n_components = self.n_components if isinstance(self.n_components, (tuple, list)) else [self.n_components] covariance_type = self.covariance_type if isinstance(self.covariance_type, (tuple, list)) else [self.covariance_type] logger.info("Performing grid search for GMM on covariance_type: %s and n_components: %s", self.covariance_type, self.n_components) bic = [] lowest_bic = np.infty for cv_type in covariance_type: for nc in n_components: logger.info("Testing for n_components: %s, covariance_type: %s", nc, cv_type) gmm = mixture.GaussianMixture( n_components=nc, covariance_type=cv_type, reg_covar=self.reg_covar, verbose=logger.level) try: gmm.fit(features_norm) except Exception: logger.warn("Failed to train current GMM", exc_info=True) continue bic.append(gmm.bic(features_norm)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] logger.info("Best parameters so far: nc %s, cv_type: %s", nc, cv_type) machine = gmm else: machine = mixture.GaussianMixture( n_components=self.n_components, random_state=self.random_state, covariance_type=self.covariance_type, reg_covar=self.reg_covar, verbose=logger.level) machine.fit(features_norm) return machine, features_mean, features_std
# ==========================================================================
[docs] def save_gmm_machine_and_mean_std(self, projector_file, machine, features_mean, features_std): """ Saves the OneClassGMM machine, features mean and std to the hdf5 file. The absolute name of the file is specified in ``projector_file`` string. **Parameters:** ``projector_file`` : :py:class:`str` Absolute name of the file to save the data to, as returned by ``bob.pad.base`` framework. ``machine`` : object The OneClassGMM machine to be saved. As returned by sklearn.linear_model module. ``features_mean`` : 1D :py:class:`numpy.ndarray` Mean of the features. ``features_std`` : 1D :py:class:`numpy.ndarray` Standart deviation of the features. """ # open hdf5 file to save to with bob.io.base.HDF5File(projector_file, 'w') as f: for key in self.gmm_param_keys: data = getattr(machine, key) f.set(key, data) f.set("features_mean", features_mean) f.set("features_std", features_std)
# ==========================================================================
[docs] def train_projector(self, training_features, projector_file): """ Train OneClassGMM for feature projection and save it to file. The ``requires_projector_training = True`` flag must be set to True to enable this function. **Parameters:** ``training_features`` : [[FrameContainer], [FrameContainer]] A list containing two elements: [0] - a list of Frame Containers with feature vectors for the real class; [1] - a list of Frame Containers with feature vectors for the attack class. ``projector_file`` : :py:class:`str` The file to save the trained projector to, as returned by the ``bob.pad.base`` framework. """ del training_features[1] # training_features[0] - training features for the REAL class. real = convert_and_prepare_features(training_features[0], dtype=None) del training_features[0] # training_features[1] - training features for the ATTACK class. # attack = self.convert_and_prepare_features(training_features[1]) # output is array # Train the OneClassGMM machine and get normalizers: machine, features_mean, features_std = self.train_gmm(real=real) # Save the GNN machine and normalizers: self.save_gmm_machine_and_mean_std(projector_file, machine, features_mean, features_std) logger.info("Finished training the GMM.")
# ==========================================================================
[docs] def load_gmm_machine_and_mean_std(self, projector_file): """ Loads the machine, features mean and std from the hdf5 file. The absolute name of the file is specified in ``projector_file`` string. **Parameters:** ``projector_file`` : :py:class:`str` Absolute name of the file to load the trained projector from, as returned by ``bob.pad.base`` framework. **Returns:** ``machine`` : object The loaded OneClassGMM machine. As returned by sklearn.mixture module. ``features_mean`` : 1D :py:class:`numpy.ndarray` Mean of the features. ``features_std`` : 1D :py:class:`numpy.ndarray` Standart deviation of the features. """ # file to read the machine from with bob.io.base.HDF5File(projector_file, 'r') as f: # initialize the machine: machine = mixture.GaussianMixture() # set the params of the machine: for key in self.gmm_param_keys: data = f.read(key) setattr(machine, key, data) features_mean = f.read("features_mean") features_std = f.read("features_std") return machine, features_mean, features_std
# ==========================================================================
[docs] def load_projector(self, projector_file): """ Loads the machine, features mean and std from the hdf5 file. The absolute name of the file is specified in ``projector_file`` string. This function sets the arguments ``self.machine``, ``self.features_mean`` and ``self.features_std`` of this class with loaded machines. The function must be capable of reading the data saved with the :py:meth:`train_projector` method of this class. Please register `performs_projection = True` in the constructor to enable this function. **Parameters:** ``projector_file`` : :py:class:`str` The file to read the projector from, as returned by the ``bob.pad.base`` framework. In this class the names of the files to read the projectors from are modified, see ``load_machine`` and ``load_cascade_of_machines`` methods of this class for more details. """ machine, features_mean, features_std = self.load_gmm_machine_and_mean_std( projector_file) self.machine = machine self.features_mean = features_mean self.features_std = features_std
# ==========================================================================
[docs] def project(self, feature): """ This function computes a vector of scores for each sample in the input array of features. The following steps are applied: 1. First, the input data is mean-std normalized using mean and std of the real class only. 2. The input features are next classified using pre-trained OneClassGMM machine. Set ``performs_projection = True`` in the constructor to enable this function. It is assured that the :py:meth:`load_projector` was **called before** the ``project`` function is executed. **Parameters:** ``feature`` : FrameContainer or 2D :py:class:`numpy.ndarray` Two types of inputs are accepted. A Frame Container conteining the features of an individual, see ``bob.bio.video.utils.FrameContainer``. Or a 2D feature array of the size (N_samples x N_features). **Returns:** ``scores`` : 1D :py:class:`numpy.ndarray` Vector of scores. Scores for the real class are expected to be higher, than the scores of the negative / attack class. In this case scores are the weighted log probabilities. """ # 1. Convert input array to numpy array if necessary. if isinstance( feature, FrameContainer): # if FrameContainer convert to 2D numpy array features = convert_frame_cont_to_array(feature) else: features = feature features_norm, _, _ = mean_std_normalize( features, self.features_mean, self.features_std, copy=False) del features scores = self.machine.score_samples(features_norm) return scores
# ==========================================================================
[docs] def score(self, toscore): """ Returns a probability of a sample being a real class. **Parameters:** ``toscore`` : 1D :py:class:`numpy.ndarray` Vector with scores for each frame/sample defining the probability of the frame being a sample of the real class. **Returns:** ``score`` : [:py:class:`float`] If ``frame_level_scores_flag = False`` a single score is returned. One score per video. This score is placed into a list, because the ``score`` must be an iterable. Score is a probability of a sample being a real class. If ``frame_level_scores_flag = True`` a list of scores is returned. One score per frame/sample. """ if self.frame_level_scores_flag: score = list(toscore) else: score = [np.mean(toscore)] # compute a single score per video return score