Source code for bob.bio.base.pipelines.vanilla_biometrics.pipelines

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""
Implementation of the Vanilla Biometrics pipeline using Dask :ref:`bob.bio.base.struct_bio_rec_sys`_

This file contains simple processing blocks meant to be used
for bob.bio experiments
"""

import logging
import numpy
from .score_writers import FourColumnsScoreWriter
from bob.pipelines.utils import isinstance_nested
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator
from bob.pipelines import SampleWrapper, wrap
from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import BioAlgorithm

logger = logging.getLogger(__name__)
import tempfile
import os


class VanillaBiometricsPipeline(object):
    """
    Vanilla Biometrics Pipeline

    This is the backbone of most biometric recognition systems.
    It implements three subpipelines and they are the following:

     - :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer.
        It will run :py:meth:`sklearn.base.BaseEstimator.fit`

     - :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll`

     - :py:class:`VanillaBiometrics.compute_scores`: Computes scores
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score`


    Example
    -------
       >>> from bob.pipelines.transformers import Linearize
       >>> from sklearn.pipeline import make_pipeline
       >>> from bob.bio.base.pipelines.vanilla_biometrics import Distance, VanillaBiometricsPipeline
       >>> estimator_1 = Linearize()
       >>> transformer = make_pipeline(estimator_1)
       >>> biometric_algoritm = Distance()
       >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
       >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring)  # doctest: +SKIP


    To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`.

    Example
    -------
      >>> from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
      >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
      >>> pipeline = dask_vanilla_biometrics(pipeline)
      >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute()  # doctest: +SKIP


    Parameters
    ----------

      transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator`
        Transformer that will preprocess your data

      biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm`
        Biometrics algorithm object that implements the methods `enroll` and `score` methods

      score_writer: :any:`bob.bio.base.pipelines.vanilla_biometrics.ScoreWriter`
          Format to write scores. Default to :any:`bob.bio.base.pipelines.vanilla_biometrics.FourColumnsScoreWriter`

    """

    def __init__(
        self, transformer, biometric_algorithm, score_writer=None,
    ):
        self.transformer = transformer
        self.biometric_algorithm = biometric_algorithm
        self.score_writer = score_writer
        if self.score_writer is None:
            tempdir = tempfile.TemporaryDirectory()
            self.score_writer = FourColumnsScoreWriter(tempdir.name)

        check_valid_pipeline(self)

    def __call__(
        self,
        background_model_samples,
        biometric_reference_samples,
        probe_samples,
        allow_scoring_with_all_biometric_references=True,
    ):
        logger.info(
            f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}"
        )

        # Training background model (fit will return even if samples is ``None``,
        # in which case we suppose the algorithm is not trainable in any way)
        self.transformer = self.train_background_model(background_model_samples)

        logger.info(
            f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}"
        )

        # Create biometric samples
        biometric_references = self.create_biometric_reference(
            biometric_reference_samples
        )

        logger.info(
            f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}"
        )

        # Scores all probes
        scores, _ = self.compute_scores(
            probe_samples,
            biometric_references,
            allow_scoring_with_all_biometric_references,
        )

        return scores

[docs] def train_background_model(self, background_model_samples): # background_model_samples is a list of Samples # We might have algorithms that has no data for training if len(background_model_samples) <= 0: logger.warning( "There's no data to train background model." "For the rest of the execution it will be assumed that the pipeline is stateless." ) return self.transformer return self.transformer.fit(background_model_samples)
[docs] def create_biometric_reference(self, biometric_reference_samples): biometric_reference_features = self.transformer.transform( biometric_reference_samples ) biometric_references = self.biometric_algorithm.enroll_samples( biometric_reference_features ) # models is a list of Samples return biometric_references
[docs] def compute_scores( self, probe_samples, biometric_references, allow_scoring_with_all_biometric_references=True, ): # probes is a list of SampleSets probe_features = self.transformer.transform(probe_samples) scores = self.biometric_algorithm.score_samples( probe_features, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) # scores is a list of Samples return scores, probe_features
[docs] def write_scores(self, scores): if self.score_writer is None: raise ValueError("No score writer defined in the pipeline") return self.score_writer.write(scores)
[docs] def post_process(self, score_paths, filename): if self.score_writer is None: raise ValueError("No score writer defined in the pipeline") return self.score_writer.post_process(score_paths, filename)
def check_valid_pipeline(vanilla_pipeline): """ Applying some checks in the vanilla biometrics pipeline """ ## CHECKING THE TRANSFORMER # Checking if it's a Scikit Pipeline or a estimator if isinstance(vanilla_pipeline.transformer, Pipeline): # Checking if all steps are wrapped as samples, if not, we should wrap them for p in vanilla_pipeline.transformer: if not isinstance_nested(p, "estimator", SampleWrapper): wrap(["sample"], p) # In this case it can be a simple estimator. AND # Checking if it's sample wrapper, if not, do it elif isinstance_nested( vanilla_pipeline.transformer, "estimator", BaseEstimator ) and isinstance_nested(vanilla_pipeline.transformer, "estimator", BaseEstimator): wrap(["sample"], vanilla_pipeline.transformer) else: raise ValueError( f"VanillaBiometricsPipeline.transformer should be instance of either `sklearn.pipeline.Pipeline` or" f"sklearn.base.BaseEstimator, not {vanilla_pipeline.transformer}" ) ## Checking the Biometric algorithm if not isinstance(vanilla_pipeline.biometric_algorithm, BioAlgorithm): raise ValueError( f"VanillaBiometricsPipeline.biometric_algorithm should be instance of `BioAlgorithm`" f"not {vanilla_pipeline.biometric_algorithm}" ) return True