#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
from abc import ABCMeta, abstractmethod
from bob.pipelines.sample import Sample, SampleSet
import numpy as np
import os
import logging
logger = logging.getLogger(__name__)
def average_scores(scores):
"""
Given a :any:`numpy.ndarray` coming from multiple probes,
average them.
This function needs to handle the 2 cases.
The first case is when one Sample (not a `SampleSet`) points to one score
The second case is when one Sample points to several scores (while dealing with `VideoLikeContainer`)
"""
## axis=0 points to each sample in a sampleset
## axis=1 points to the score w.r.t each biometric reference
## axis=2 points to each individual score of a sample (in image-based cases is one score per sample,
# and in video-based cases can be multiple scores)
# First we have to average w.r.t tp individual samples, than between samples
return np.mean(np.array([np.mean(x, axis=1) for x in scores]), axis=0)
class BioAlgorithm(metaclass=ABCMeta):
"""Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`bob.bio.base.biometric_algorithm`.
biometric model enrollment, via ``enroll()`` and scoring, with
``score()``.
Parameters
----------
score_reduction_operation: ``collections.callable``
Callable containing the score reduction function to be applied in the samples in a sampleset
"""
def __init__(self, score_reduction_operation=average_scores, **kwargs):
self.stacked_biometric_references = None
self.score_reduction_operation = score_reduction_operation
[docs] def clear_caches(self):
"""
Clean all cached objects from BioAlgorithm
"""
self.stacked_biometric_references = None
[docs] def enroll_samples(self, biometric_references):
"""This method should implement the enrollment sub-pipeline of the Vanilla Biometrics Pipeline. TODO REF
It handles the creation of biometric references
Parameters
----------
biometric_references : list
A list of :any:`bob.pipelines.SampleSet` objects to be used for
creating biometric references. The sets must be identified
with a unique id and a path, for eventual checkpointing.
"""
retval = []
for k in biometric_references:
# compute on-the-fly
retval.append(self._enroll_sample_set(k))
return retval
def _enroll_sample_set(self, sampleset):
# Unpack the sampleset
data = [s.data for s in sampleset.samples]
valid_data = [d for d in data if d is not None]
if len(data) != len(valid_data):
logger.warning(
f"Removed {len(data)-len(valid_data)} invalid enrollment samples."
)
if not valid_data:
raise ValueError(
f"None of the enrollment samples were valid for {sampleset}."
)
# Enroll
return Sample(self.enroll(valid_data), parent=sampleset)
[docs] @abstractmethod
def enroll(self, data):
"""
It handles the creation of ONE biometric reference for the vanilla pipeline
Parameters
----------
data:
Data used for the creation of ONE BIOMETRIC REFERENCE
"""
pass
[docs] def score_samples(
self,
probe_features,
biometric_references,
allow_scoring_with_all_biometric_references=True,
):
"""Scores a new sample against multiple (potential) references
Parameters
----------
probes : list
A list of :any:`bob.pipelines.SampleSet` objects to be used for
scoring the input references
biometric_references : list
A list of :any:`bob.pipelines.Sample` objects to be used for
scoring the input probes, must have an ``id`` attribute that
will be used to cross-reference which probes need to be scored.
allow_scoring_with_all_biometric_references: bool
If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes.
This optimization is useful when all probes needs to be compared with all biometric references AND
your scoring function allows this broadcast computation.
Returns
-------
scores : list
For each sample in a probe, returns as many scores as there are
samples in the probe, together with the probes and the
relevant reference's subject identifiers.
"""
retval = []
for p in probe_features:
retval.append(
self._score_sample_set(
p,
biometric_references,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
)
)
self.clear_caches()
return retval
def _score_sample_set(
self,
sampleset,
biometric_references,
allow_scoring_with_all_biometric_references,
):
"""Given one sampleset for probing, compute the scores and returns a sample set with the scores"""
scores_biometric_references = []
if allow_scoring_with_all_biometric_references:
# Optimized scoring
# This is useful when you scoring function can be compared with a
# static batch of biometric references
total_scores = []
for probe_sample in sampleset:
# Multiple scoring
if self.stacked_biometric_references is None:
self.stacked_biometric_references = [
ref.data for ref in biometric_references
]
if probe_sample.data is None:
# Probe processing has failed. Mark invalid scores for FTA count
scores = [[None]] * len(self.stacked_biometric_references)
else:
scores = self.score_multiple_biometric_references(
self.stacked_biometric_references, probe_sample.data
)
total_scores.append(scores)
# Reducing them
total_scores = self.score_reduction_operation(
np.array(total_scores, dtype=np.float)
)
# Wrapping the scores in samples
for ref, score in zip(biometric_references, total_scores):
scores_biometric_references.append(Sample(score, parent=ref))
else:
# Non optimizing scoring
# There are some protocols where each probe has
# to be scored with a specific list of biometric_references
total_scores = []
if self.stacked_biometric_references is None:
self.stacked_biometric_references = dict()
def cache_references(probe_refererences):
"""
Stack references in a dictionary
"""
for r in biometric_references:
if (
str(r.reference_id) in probe_refererences
and str(r.reference_id) not in self.stacked_biometric_references
):
self.stacked_biometric_references[str(r.reference_id)] = r.data
for probe_sample in sampleset:
cache_references(sampleset.references)
references = [
self.stacked_biometric_references[str(r.reference_id)]
for r in biometric_references
if str(r.reference_id) in sampleset.references
]
if len(references) == 0:
raise ValueError(
f"The probe {sampleset} can't be compared with any biometric reference. "
"Something is probably wrong with your database interface."
)
if probe_sample.data is None:
# Probe processing has failed
scores = [[None]] * len(self.stacked_biometric_references)
else:
scores = self.score_multiple_biometric_references(
references, probe_sample.data
)
total_scores.append(scores)
total_scores = self.score_reduction_operation(
np.array(total_scores, dtype=np.float)
)
for ref, score in zip(
[
r
for r in biometric_references
if str(r.reference_id) in sampleset.references
],
total_scores,
):
scores_biometric_references.append(Sample(score, parent=ref))
return SampleSet(scores_biometric_references, parent=sampleset)
[docs] @abstractmethod
def score(self, biometric_reference, data):
"""It handles the score computation for one sample
Parameters
----------
biometric_reference : list
Biometric reference to be compared
data : list
Data to be compared
Returns
-------
scores : list
For each sample in a probe, returns as many scores as there are
samples in the probe, together with the probe's and the
relevant reference's subject identifiers.
"""
pass
[docs] def score_multiple_biometric_references(self, biometric_references, data):
"""Score one probe against multiple biometric references (models).
This method is called if `allow_scoring_multiple_references` is set to true.
You may want to override this method to improve the performance of computations.
Parameters
----------
biometric_references : list
List of biometric references (models) to be scored
[description]
data
Data used for the creation of ONE biometric probe.
Returns
-------
list
A list of scores for the comparison of the probe against multiple models.
"""
return [self.score(model, data) for model in biometric_references]
class Database(metaclass=ABCMeta):
"""Base class for Vanilla Biometric pipeline"""
def __init__(
self,
name,
protocol,
allow_scoring_with_all_biometric_references=False,
annotation_type=None,
fixed_positions=None,
memory_demanding=False,
**kwargs,
):
super().__init__(**kwargs)
self.name = name
self.protocol = protocol
self.allow_scoring_with_all_biometric_references = (
allow_scoring_with_all_biometric_references
)
self.annotation_type = annotation_type
self.fixed_positions = fixed_positions
self.memory_demanding = memory_demanding
[docs] @abstractmethod
def background_model_samples(self):
"""Returns :any:`bob.pipelines.Sample`'s to train a background model
Returns
-------
samples : list
List of samples for background model training.
"""
pass
[docs] @abstractmethod
def references(self, group="dev"):
"""Returns references to enroll biometric references
Parameters
----------
group : :py:class:`str`, optional
Limits samples to this group
Returns
-------
references : list
List of samples for the creation of biometric references.
"""
pass
[docs] @abstractmethod
def probes(self, group):
"""Returns probes to score biometric references
Parameters
----------
group : str
Limits samples to this group
Returns
-------
probes : list
List of samples for the creation of biometric probes.
"""
pass
[docs] @abstractmethod
def all_samples(self, groups=None):
"""Returns all the samples of the dataset
Parameters
----------
groups: list or `None`
List of groups to consider (like 'dev' or 'eval'). If `None`, will
return samples from all the groups.
Returns
-------
samples: list
List of all the samples of the dataset.
"""
pass
[docs] @abstractmethod
def groups(self):
pass
[docs] @abstractmethod
def protocols(self):
pass
[docs] def reference_ids(self, group):
return [s.reference_id for s in self.references(group=group)]
class ScoreWriter(metaclass=ABCMeta):
"""
Defines base methods to read, write scores and concatenate scores
for :any:`bob.bio.base.pipelines.vanilla_biometrics.BioAlgorithm`
"""
def __init__(self, path, extension=".txt"):
self.path = path
self.extension = extension
[docs] @abstractmethod
def write(self, sampleset, path):
pass
[docs] def post_process(self, score_paths, filename):
def _post_process(score_paths, filename):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "w") as f:
for path in score_paths:
with open(path) as f2:
f.writelines(f2.readlines())
return filename
import dask.bag
import dask
if isinstance(score_paths, dask.bag.Bag):
all_paths = dask.delayed(list)(score_paths)
return dask.delayed(_post_process)(all_paths, filename)
return _post_process(score_paths, filename)