Source code for bob.bio.base.pipelines.vanilla_biometrics.abstract_classes

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :


from abc import ABCMeta, abstractmethod
from bob.pipelines.sample import SAMPLE_DATA_ATTRS, Sample, SampleSet, DelayedSample
import functools
import numpy as np
import os


def average_scores(scores):
    """
    Given a :any:`numpy.ndarray` coming from multiple probes,
    average them
    """
    return np.mean(scores, axis=0)


class BioAlgorithm(metaclass=ABCMeta):
    """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`bob.bio.base.biometric_algorithm`.

    biometric model enrollment, via ``enroll()`` and scoring, with
    ``score()``.

    Parameters
    ----------

        score_reduction_operation: ``collections.callable``
           Callable containing the score reduction function to be applied in the samples in a sampleset

    """

    def __init__(self, score_reduction_operation=average_scores, **kwargs):
        self.stacked_biometric_references = None
        self.score_reduction_operation = average_scores

[docs]    def clear_caches(self):
        """
        Clean all cached objects from BioAlgorithm
        """
        self.stacked_biometric_references = None

[docs]    def enroll_samples(self, biometric_references):
        """This method should implement the enrollment sub-pipeline of the Vanilla Biometrics Pipeline. TODO REF

        It handles the creation of biometric references

        Parameters
        ----------
            biometric_references : list
                A list of :py:class:`SampleSet` objects to be used for
                creating biometric references.  The sets must be identified
                with a unique id and a path, for eventual checkpointing.
        """

        retval = []
        for k in biometric_references:
            # compute on-the-fly
            retval.append(self._enroll_sample_set(k))

        return retval

    def _enroll_sample_set(self, sampleset):

        # Unpack the sampleset
        data = [s.data for s in sampleset.samples]

        # Enroll
        return Sample(self.enroll(data), parent=sampleset)

[docs]    @abstractmethod
    def enroll(self, data):
        """
        It handles the creation of ONE biometric reference for the vanilla pipeline

        Parameters
        ----------

            data:
                Data used for the creation of ONE BIOMETRIC REFERENCE

        """
        pass

[docs]    def score_samples(
        self,
        probe_features,
        biometric_references,
        allow_scoring_with_all_biometric_references=True,
    ):
        """Scores a new sample against multiple (potential) references

        Parameters
        ----------

            probes : list
                A list of :py:class:`SampleSet` objects to be used for
                scoring the input references

            biometric_references : list
                A list of :py:class:`Sample` objects to be used for
                scoring the input probes, must have an ``id`` attribute that
                will be used to cross-reference which probes need to be scored.

            allow_scoring_with_all_biometric_references: bool
                If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes.
                This optimization is useful when all probes needs to be compared with all biometric references AND
                your scoring function allows this broadcast computation.


        Returns
        -------

            scores : list
                For each sample in a probe, returns as many scores as there are
                samples in the probe, together with the probes and the
                relevant reference's subject identifiers.

        """

        retval = []
        for p in probe_features:
            retval.append(
                self._score_sample_set(
                    p,
                    biometric_references,
                    allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
                )
            )
        self.clear_caches()
        return retval

    def _score_sample_set(
        self,
        sampleset,
        biometric_references,
        allow_scoring_with_all_biometric_references,
    ):
        """Given one sampleset for probing, compute the scores and returns a sample set with the scores
        """
        scores_biometric_references = []
        if allow_scoring_with_all_biometric_references:
            # Optimized scoring
            # This is useful when you scoring function can be compared with a
            # static batch of biometric references
            total_scores = []
            for probe_sample in sampleset:
                # Multiple scoring
                if self.stacked_biometric_references is None:
                    self.stacked_biometric_references = [
                        ref.data for ref in biometric_references
                    ]
                scores = self.score_multiple_biometric_references(
                    self.stacked_biometric_references, probe_sample.data
                )
                total_scores.append(scores)

            # Reducing them
            total_scores = self.score_reduction_operation(total_scores)

            # Wrapping the scores in samples
            for ref, score in zip(biometric_references, total_scores):
                scores_biometric_references.append(Sample(score, parent=ref))

        else:
            # Non optimizing scoring
            # There are some protocols where each probe has
            # to be scored with a specific list of biometric_references
            total_scores = []
            if self.stacked_biometric_references is None:
                self.stacked_biometric_references = dict()

            def cache_references(probe_refererences):
                """
                Stack referecences in a dictionary
                """
                for r in biometric_references:
                    if (
                        str(r.reference_id) in probe_refererences
                        and str(r.reference_id) not in self.stacked_biometric_references
                    ):
                        self.stacked_biometric_references[str(r.reference_id)] = r.data

            for probe_sample in sampleset:
                cache_references(sampleset.references)
                references = [
                    self.stacked_biometric_references[str(r.reference_id)]
                    for r in biometric_references
                    if str(r.reference_id) in sampleset.references
                ]

                if len(references) == 0:
                    raise ValueError(
                        f"The probe {sampleset} can't be compared with any biometric reference. "
                        "Something is probably wrong with your database interface."
                    )

                scores = self.score_multiple_biometric_references(
                    references, probe_sample.data
                )

                total_scores.append(scores)

            total_scores = self.score_reduction_operation(np.array(total_scores))

            for ref, score in zip(
                [
                    r
                    for r in biometric_references
                    if str(r.reference_id) in sampleset.references
                ],
                total_scores,
            ):

                scores_biometric_references.append(Sample(score, parent=ref))

        return SampleSet(scores_biometric_references, parent=sampleset)

[docs]    @abstractmethod
    def score(self, biometric_reference, data):
        """It handles the score computation for one sample

        Parameters
        ----------

            biometric_reference : list
                Biometric reference to be compared

            data : list
                Data to be compared

        Returns
        -------

            scores : list
                For each sample in a probe, returns as many scores as there are
                samples in the probe, together with the probe's and the
                relevant reference's subject identifiers.

        """
        pass

[docs]    def score_multiple_biometric_references(self, biometric_references, data):
        """
        It handles the score computation of one probe against multiple biometric references
        This method is called if `allow_scoring_multiple_references` is set to true

        Parameters
        ----------

            biometric_references: list
                List of biometric references to be scored
            data:
                Data used for the creation of ONE BIOMETRIC REFERENCE

        """
        raise NotImplementedError(
            "Your BioAlgorithm implementation should implement score_multiple_biometric_references."
        )


class Database(metaclass=ABCMeta):
    """Base class for Vanilla Biometric pipeline
    """

[docs]    @abstractmethod
    def background_model_samples(self):
        """Returns :py:class:`Sample`'s to train a background model


        Returns
        -------
        samples : list
            List of samples for background model training.

        """
        pass

[docs]    @abstractmethod
    def references(self, group="dev"):
        """Returns :py:class:`Reference`'s to enroll biometric references


        Parameters
        ----------
        group : :py:class:`str`, optional
            Limits samples to this group


        Returns
        -------
        references : list
            List of samples for the creation of biometric references.

        """
        pass

[docs]    @abstractmethod
    def probes(self, group):
        """Returns :py:class:`Probe`'s to score biometric references


        Parameters
        ----------
        group : str
            Limits samples to this group


        Returns
        -------
        probes : list
            List of samples for the creation of biometric probes.

        """
        pass

[docs]    @abstractmethod
    def all_samples(self, groups=None):
        """Returns all the samples of the dataset

        Parameters
        ----------
        groups: list or `None`
            List of groups to consider (like 'dev' or 'eval'). If `None`, will
            return samples from all the groups.

        Returns
        -------
        samples: list
            List of all the samples of the dataset.
        """
        pass

[docs]    def groups(self):
        pass

[docs]    def reference_ids(self, group):
        return [s.reference_id for s in self.references(group=group)]


class ScoreWriter(metaclass=ABCMeta):
    """
    Defines base methods to read, write scores and concatenate scores
    for :py:class:`BioAlgorithm`
    """

    def __init__(self, path, extension=".txt"):
        self.path = path
        self.extension = extension

[docs]    @abstractmethod
    def write(self, sampleset, path):
        pass

[docs]    def post_process(self, score_paths, filename):
        def _post_process(score_paths, filename):
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            with open(filename, "w") as f:
                for path in score_paths:
                    with open(path) as f2:
                        f.writelines(f2.readlines())
            return filename

        import dask.bag
        import dask

        if isinstance(score_paths, dask.bag.Bag):
            all_paths = dask.delayed(list)(score_paths)
            return dask.delayed(_post_process)(all_paths, filename)
        return _post_process(score_paths, filename)