Source code for bob.bio.base.pipelines.vanilla_biometrics.pipelines

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""
Implementation of the Vanilla Biometrics pipeline using Dask :ref:`bob.bio.base.struct_bio_rec_sys`_

This file contains simple processing blocks meant to be used
for bob.bio experiments
"""

import logging
import numpy
from .score_writers import FourColumnsScoreWriter
from bob.pipelines.utils import isinstance_nested
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator
from bob.pipelines import SampleWrapper, wrap
from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import BioAlgorithm

logger = logging.getLogger(__name__)
import tempfile
import os


class VanillaBiometricsPipeline(object):
    """
    Vanilla Biometrics Pipeline

    This is the backbone of most biometric recognition systems.
    It implements three subpipelines and they are the following:

     - :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer.
        It will run :py:meth:`sklearn.base.BaseEstimator.fit`

     - :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll`

     - :py:class:`VanillaBiometrics.compute_scores`: Computes scores
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score`


    Example
    -------
       >>> from bob.pipelines.transformers import Linearize
       >>> from sklearn.pipeline import make_pipeline
       >>> from bob.bio.base.pipelines.vanilla_biometrics import Distance, VanillaBiometricsPipeline
       >>> estimator_1 = Linearize()
       >>> transformer = make_pipeline(estimator_1)
       >>> biometric_algoritm = Distance()
       >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
       >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring)  # doctest: +SKIP


    To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`.

    Example
    -------
      >>> from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
      >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
      >>> pipeline = dask_vanilla_biometrics(pipeline)
      >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute()  # doctest: +SKIP


    Parameters
    ----------

      transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator`
        Transformer that will preprocess your data

      biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm`
        Biometrics algorithm object that implements the methods `enroll` and `score` methods

      score_writer: :any:`bob.bio.base.pipelines.vanilla_biometrics.ScoreWriter`
          Format to write scores. Default to :any:`bob.bio.base.pipelines.vanilla_biometrics.FourColumnsScoreWriter`

    """

    def __init__(
        self, transformer, biometric_algorithm, score_writer=None,
    ):
        self.transformer = transformer
        self.biometric_algorithm = biometric_algorithm
        self.score_writer = score_writer
        if self.score_writer is None:
            tempdir = tempfile.TemporaryDirectory()
            self.score_writer = FourColumnsScoreWriter(tempdir.name)

        check_valid_pipeline(self)

    def __call__(
        self,
        background_model_samples,
        biometric_reference_samples,
        probe_samples,
        allow_scoring_with_all_biometric_references=True,
    ):
        logger.info(
            f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}"
        )

        # Training background model (fit will return even if samples is ``None``,
        # in which case we suppose the algorithm is not trainable in any way)
        self.transformer = self.train_background_model(background_model_samples)

        logger.info(
            f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}"
        )

        # Create biometric samples
        biometric_references = self.create_biometric_reference(
            biometric_reference_samples
        )

        logger.info(
            f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}"
        )

        # Scores all probes
        scores, _ = self.compute_scores(
            probe_samples,
            biometric_references,
            allow_scoring_with_all_biometric_references,
        )

        return scores

[docs]    def train_background_model(self, background_model_samples):
        # background_model_samples is a list of Samples

        # We might have algorithms that has no data for training
        if len(background_model_samples) <= 0:
            logger.warning(
                "There's no data to train background model."
                "For the rest of the execution it will be assumed that the pipeline is stateless."
            )
            return self.transformer

        return self.transformer.fit(background_model_samples)

[docs]    def create_biometric_reference(self, biometric_reference_samples):
        biometric_reference_features = self.transformer.transform(
            biometric_reference_samples
        )

        biometric_references = self.biometric_algorithm.enroll_samples(
            biometric_reference_features
        )

        # models is a list of Samples
        return biometric_references

[docs]    def compute_scores(
        self,
        probe_samples,
        biometric_references,
        allow_scoring_with_all_biometric_references=True,
    ):

        # probes is a list of SampleSets
        probe_features = self.transformer.transform(probe_samples)
        scores = self.biometric_algorithm.score_samples(
            probe_features,
            biometric_references,
            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
        )

        # scores is a list of Samples
        return scores, probe_features

[docs]    def write_scores(self, scores):
        if self.score_writer is None:
            raise ValueError("No score writer defined in the pipeline")
        return self.score_writer.write(scores)

[docs]    def post_process(self, score_paths, filename):
        if self.score_writer is None:
            raise ValueError("No score writer defined in the pipeline")

        return self.score_writer.post_process(score_paths, filename)


def check_valid_pipeline(vanilla_pipeline):
    """
    Applying some checks in the vanilla biometrics pipeline
    """

    ## CHECKING THE TRANSFORMER
    # Checking if it's a Scikit Pipeline or a estimator
    if isinstance(vanilla_pipeline.transformer, Pipeline):

        # Checking if all steps are wrapped as samples, if not, we should wrap them
        for p in vanilla_pipeline.transformer:
            if not isinstance_nested(p, "estimator", SampleWrapper):
                wrap(["sample"], p)

    # In this case it can be a simple estimator. AND
    # Checking if it's sample wrapper, if not, do it
    elif isinstance_nested(
        vanilla_pipeline.transformer, "estimator", BaseEstimator
    ) and isinstance_nested(vanilla_pipeline.transformer, "estimator", BaseEstimator):
        wrap(["sample"], vanilla_pipeline.transformer)
    else:
        raise ValueError(
            f"VanillaBiometricsPipeline.transformer should be instance of either `sklearn.pipeline.Pipeline` or"
            f"sklearn.base.BaseEstimator, not {vanilla_pipeline.transformer}"
        )

    ## Checking the Biometric algorithm
    if not isinstance(vanilla_pipeline.biometric_algorithm, BioAlgorithm):
        raise ValueError(
            f"VanillaBiometricsPipeline.biometric_algorithm should be instance of `BioAlgorithm`"
            f"not {vanilla_pipeline.biometric_algorithm}"
        )

    return True