Source code for bob.pad.base.algorithm.OneClassGMM2

# -*- coding: utf-8 -*-
# @author: Amir Mohammadi

from bob.pad.base.algorithm import Algorithm
from bob.pad.base.utils import convert_and_prepare_features
from bob.bio.gmm.algorithm import GMM
import logging
import numpy as np
from collections.abc import Iterable
from multiprocessing import cpu_count
import joblib

logger = logging.getLogger(__name__)


def bic(trainer, machine, X):
    """Bayesian information criterion for the current model on the input X.

    Parameters
    ----------
    X : array of shape (n_samples, n_dimensions)

    Returns
    -------
    bic : float
        The lower the better.
    """
    log_likelihood = trainer.compute_likelihood(machine)
    n_parameters = (
        machine.means.size + machine.variances.size + len(machine.weights) - 1
    )
    return -2 * log_likelihood * X.shape[0] + n_parameters * np.log(X.shape[0])


class OneClassGMM2(Algorithm):
    """A one class GMM implementation based on Bob's GMM implementation which is more
    stable than scikit-learn's one."""

    def __init__(
        self,
        # parameters for the GMM
        number_of_gaussians,
        # parameters of UBM training
        kmeans_training_iterations=25,  # Maximum number of iterations for K-Means
        gmm_training_iterations=25,  # Maximum number of iterations for ML GMM Training
        training_threshold=5e-4,  # Threshold to end the ML training
        variance_threshold=5e-4,  # Minimum value that a variance can reach
        update_weights=True,
        update_means=True,
        update_variances=True,
        n_threads=cpu_count(),
        preprocessor=None,  # a scikit learn preprocessor, can be PCA for example
        **kwargs
    ):
        kwargs.setdefault("performs_projection", True)
        kwargs.setdefault("requires_projector_training", True)
        super().__init__(**kwargs)
        self.gmm_alg = GMM(
            number_of_gaussians=number_of_gaussians,
            kmeans_training_iterations=kmeans_training_iterations,
            gmm_training_iterations=gmm_training_iterations,
            training_threshold=training_threshold,
            variance_threshold=variance_threshold,
            update_weights=update_weights,
            update_means=update_means,
            update_variances=update_variances,
            n_threads=n_threads,
        )
        self.number_of_gaussians = number_of_gaussians
        self.preprocessor = preprocessor

[docs] def train_projector(self, training_features, projector_file): del training_features[1] real = convert_and_prepare_features(training_features[0], dtype="float64") del training_features[0] if self.preprocessor is not None: real = self.preprocessor.fit_transform(real) joblib.dump(self.preprocessor, projector_file + ".pkl") if isinstance(self.number_of_gaussians, Iterable): logger.info( "Performing grid search for GMM on number_of_gaussians: %s", self.number_of_gaussians, ) lowest_bic = np.infty best_n_gaussians = None for nc in self.number_of_gaussians: logger.info("Testing for number_of_gaussians: %s", nc) self.gmm_alg.gaussians = nc self.gmm_alg.train_ubm(real) bic_ = bic(self.gmm_alg.ubm_trainer, self.gmm_alg.ubm, real) logger.info("BIC for number_of_gaussians: %s is %s", nc, bic_) if bic_ < lowest_bic: gmm = self.gmm_alg.ubm lowest_bic = bic_ best_n_gaussians = nc logger.info("Best parameters so far: number_of_gaussians %s", nc) assert best_n_gaussians is not None self.gmm_alg.gaussians = best_n_gaussians else: self.gmm_alg.train_ubm(real) gmm = self.gmm_alg.ubm self.gmm_alg.ubm = gmm self.gmm_alg.save_ubm(projector_file)
[docs] def load_projector(self, projector_file): self.gmm_alg.load_ubm(projector_file) if self.preprocessor is not None: self.preprocessor = joblib.load(projector_file + ".pkl")
[docs] def project(self, feature): feature = convert_and_prepare_features([feature], dtype="float64")[0] if self.preprocessor is not None: feature = self.preprocessor.transform(feature) return self.gmm_alg.ubm(feature)
[docs] def score(self, toscore): return [toscore]