Source code for bob.bio.base.pipelines.vanilla_biometrics.abstract_classes

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :


from abc import ABCMeta, abstractmethod
from bob.pipelines.sample import Sample, SampleSet
import numpy as np
import os
import logging

logger = logging.getLogger(__name__)


def average_scores(scores):
    """
    Given a :any:`numpy.ndarray` coming from multiple probes,
    average them.

    This function needs to handle the 2 cases.
    The first case is when one Sample (not a `SampleSet`) points to one score
    The second case is when one Sample points to several scores (while dealing with `VideoLikeContainer`)

    """

    ## axis=0 points to each sample in a sampleset
    ## axis=1 points to the score w.r.t each biometric reference
    ## axis=2 points to each individual score of a sample (in image-based cases is one score per sample,
    # and in video-based cases can be multiple scores)

    # First we have to average w.r.t tp individual samples, than between samples

    return np.mean(np.array([np.mean(x, axis=1) for x in scores]), axis=0)


class BioAlgorithm(metaclass=ABCMeta):
    """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`bob.bio.base.biometric_algorithm`.

    biometric model enrollment, via ``enroll()`` and scoring, with
    ``score()``.

    Parameters
    ----------

        score_reduction_operation: ``collections.callable``
           Callable containing the score reduction function to be applied in the samples in a sampleset

    """

    def __init__(self, score_reduction_operation=average_scores, **kwargs):
        self.stacked_biometric_references = None
        self.score_reduction_operation = score_reduction_operation

[docs] def clear_caches(self): """ Clean all cached objects from BioAlgorithm """ self.stacked_biometric_references = None
[docs] def enroll_samples(self, biometric_references): """This method should implement the enrollment sub-pipeline of the Vanilla Biometrics Pipeline. TODO REF It handles the creation of biometric references Parameters ---------- biometric_references : list A list of :any:`bob.pipelines.SampleSet` objects to be used for creating biometric references. The sets must be identified with a unique id and a path, for eventual checkpointing. """ retval = [] for k in biometric_references: # compute on-the-fly retval.append(self._enroll_sample_set(k)) return retval
def _enroll_sample_set(self, sampleset): # Unpack the sampleset data = [s.data for s in sampleset.samples] valid_data = [d for d in data if d is not None] if len(data) != len(valid_data): logger.warning( f"Removed {len(data)-len(valid_data)} invalid enrollment samples." ) if not valid_data: raise ValueError( f"None of the enrollment samples were valid for {sampleset}." ) # Enroll return Sample(self.enroll(valid_data), parent=sampleset)
[docs] @abstractmethod def enroll(self, data): """ It handles the creation of ONE biometric reference for the vanilla pipeline Parameters ---------- data: Data used for the creation of ONE BIOMETRIC REFERENCE """ pass
[docs] def score_samples( self, probe_features, biometric_references, allow_scoring_with_all_biometric_references=True, ): """Scores a new sample against multiple (potential) references Parameters ---------- probes : list A list of :any:`bob.pipelines.SampleSet` objects to be used for scoring the input references biometric_references : list A list of :any:`bob.pipelines.Sample` objects to be used for scoring the input probes, must have an ``id`` attribute that will be used to cross-reference which probes need to be scored. allow_scoring_with_all_biometric_references: bool If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes. This optimization is useful when all probes needs to be compared with all biometric references AND your scoring function allows this broadcast computation. Returns ------- scores : list For each sample in a probe, returns as many scores as there are samples in the probe, together with the probes and the relevant reference's subject identifiers. """ retval = [] for p in probe_features: retval.append( self._score_sample_set( p, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) ) self.clear_caches() return retval
def _score_sample_set( self, sampleset, biometric_references, allow_scoring_with_all_biometric_references, ): """Given one sampleset for probing, compute the scores and returns a sample set with the scores""" scores_biometric_references = [] if allow_scoring_with_all_biometric_references: # Optimized scoring # This is useful when you scoring function can be compared with a # static batch of biometric references total_scores = [] for probe_sample in sampleset: # Multiple scoring if self.stacked_biometric_references is None: self.stacked_biometric_references = [ ref.data for ref in biometric_references ] if probe_sample.data is None: # Probe processing has failed. Mark invalid scores for FTA count scores = [[None]] * len(self.stacked_biometric_references) else: scores = self.score_multiple_biometric_references( self.stacked_biometric_references, probe_sample.data ) total_scores.append(scores) # Reducing them total_scores = self.score_reduction_operation( np.array(total_scores, dtype=np.float) ) # Wrapping the scores in samples for ref, score in zip(biometric_references, total_scores): scores_biometric_references.append(Sample(score, parent=ref)) else: # Non optimizing scoring # There are some protocols where each probe has # to be scored with a specific list of biometric_references total_scores = [] if self.stacked_biometric_references is None: self.stacked_biometric_references = dict() def cache_references(probe_refererences): """ Stack references in a dictionary """ for r in biometric_references: if ( str(r.reference_id) in probe_refererences and str(r.reference_id) not in self.stacked_biometric_references ): self.stacked_biometric_references[str(r.reference_id)] = r.data for probe_sample in sampleset: cache_references(sampleset.references) references = [ self.stacked_biometric_references[str(r.reference_id)] for r in biometric_references if str(r.reference_id) in sampleset.references ] if len(references) == 0: raise ValueError( f"The probe {sampleset} can't be compared with any biometric reference. " "Something is probably wrong with your database interface." ) if probe_sample.data is None: # Probe processing has failed scores = [[None]] * len(self.stacked_biometric_references) else: scores = self.score_multiple_biometric_references( references, probe_sample.data ) total_scores.append(scores) total_scores = self.score_reduction_operation( np.array(total_scores, dtype=np.float) ) for ref, score in zip( [ r for r in biometric_references if str(r.reference_id) in sampleset.references ], total_scores, ): scores_biometric_references.append(Sample(score, parent=ref)) return SampleSet(scores_biometric_references, parent=sampleset)
[docs] @abstractmethod def score(self, biometric_reference, data): """It handles the score computation for one sample Parameters ---------- biometric_reference : list Biometric reference to be compared data : list Data to be compared Returns ------- scores : list For each sample in a probe, returns as many scores as there are samples in the probe, together with the probe's and the relevant reference's subject identifiers. """ pass
[docs] def score_multiple_biometric_references(self, biometric_references, data): """Score one probe against multiple biometric references (models). This method is called if `allow_scoring_multiple_references` is set to true. You may want to override this method to improve the performance of computations. Parameters ---------- biometric_references : list List of biometric references (models) to be scored [description] data Data used for the creation of ONE biometric probe. Returns ------- list A list of scores for the comparison of the probe against multiple models. """ return [self.score(model, data) for model in biometric_references]
class Database(metaclass=ABCMeta): """Base class for Vanilla Biometric pipeline""" def __init__( self, name, protocol, allow_scoring_with_all_biometric_references=False, annotation_type=None, fixed_positions=None, memory_demanding=False, **kwargs, ): super().__init__(**kwargs) self.name = name self.protocol = protocol self.allow_scoring_with_all_biometric_references = ( allow_scoring_with_all_biometric_references ) self.annotation_type = annotation_type self.fixed_positions = fixed_positions self.memory_demanding = memory_demanding
[docs] @abstractmethod def background_model_samples(self): """Returns :any:`bob.pipelines.Sample`'s to train a background model Returns ------- samples : list List of samples for background model training. """ pass
[docs] @abstractmethod def references(self, group="dev"): """Returns references to enroll biometric references Parameters ---------- group : :py:class:`str`, optional Limits samples to this group Returns ------- references : list List of samples for the creation of biometric references. """ pass
[docs] @abstractmethod def probes(self, group): """Returns probes to score biometric references Parameters ---------- group : str Limits samples to this group Returns ------- probes : list List of samples for the creation of biometric probes. """ pass
[docs] @abstractmethod def all_samples(self, groups=None): """Returns all the samples of the dataset Parameters ---------- groups: list or `None` List of groups to consider (like 'dev' or 'eval'). If `None`, will return samples from all the groups. Returns ------- samples: list List of all the samples of the dataset. """ pass
[docs] @abstractmethod def groups(self): pass
[docs] @abstractmethod def protocols(self): pass
[docs] def reference_ids(self, group): return [s.reference_id for s in self.references(group=group)]
class ScoreWriter(metaclass=ABCMeta): """ Defines base methods to read, write scores and concatenate scores for :any:`bob.bio.base.pipelines.vanilla_biometrics.BioAlgorithm` """ def __init__(self, path, extension=".txt"): self.path = path self.extension = extension
[docs] @abstractmethod def write(self, sampleset, path): pass
[docs] def post_process(self, score_paths, filename): def _post_process(score_paths, filename): os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "w") as f: for path in score_paths: with open(path) as f2: f.writelines(f2.readlines()) return filename import dask.bag import dask if isinstance(score_paths, dask.bag.Bag): all_paths = dask.delayed(list)(score_paths) return dask.delayed(_post_process)(all_paths, filename) return _post_process(score_paths, filename)