# Source code for bob.bio.base.algorithm.distance

```
import numpy as np
from scipy.spatial.distance import cdist
from ..pipelines import BioAlgorithm
class Distance(BioAlgorithm):
"""A distance algorithm to compare feature vectors.
Many biometric algorithms are based on comparing feature vectors that
are usually extracted by using deep neural networks.
The most common distance function is the cosine similarity, which is
the default in this class.
"""
def __init__(
self,
distance_function="cosine",
factor=-1,
average_on_enroll=True,
average_probes=False,
probes_score_fusion="max",
enrolls_score_fusion="max",
**kwargs,
):
"""
Parameters
----------
distance_function : str or :py:class:`function`, optional
function to be used to measure the distance of probe and model
features compatible with :any:`scipy.spatial.distance.cdist`. If the
function exists in scipy.spatial.distance, provide its string name
as scipy will run an optimized version.
factor : float
A coefficient which is multiplied to distance (after
distance_function) to find score between probe and model features.
In bob.bio.base, the scores should be similarity scores (higher
score for a genuine pair) so use this factor to make sure you are
using similarity scores.
average_on_enroll : bool
Some database protocols contain multiple samples (e.g. face images)
to create one enrollment template. This option is useful in case of
those databases. If True, the algorithm will average the enroll
features to create a single template. If False, the algorithm will
use the enroll features as is and will compare the probe template
against all features. The final score will be computed based on the
``enrolls_score_fusion`` option.
average_probes : bool
Some database protocols contain multiple samples (e.g. face images)
to create one probe template. This option is useful in case of those
databases. If True, the algorithm will average the probe features to
create a single template. If False, the algorithm will use the probe
features as is and will compare the enrollment template against all
features. The final score will be computed based on the
``probes_score_fusion`` option.
probes_score_fusion : str
How to fuse the scores of the probes if average_probes is False and
the database contains multiple probe samples.
enrolls_score_fusion : str
How to fuse the scores of the enrolls if average_on_enroll is False
and the database contains multiple enroll samples.
"""
super().__init__(
probes_score_fusion=probes_score_fusion,
enrolls_score_fusion=enrolls_score_fusion,
**kwargs,
)
self.distance_function = distance_function
self.factor = factor
self.average_on_enroll = average_on_enroll
self.average_probes = average_probes
[docs] def create_templates(self, list_of_feature_sets, enroll):
"""Creates templates from the given feature sets.
Will make sure the features are 2 dimensional before creating templates.
Will average features over samples if ``average_on_enroll`` is True or
``average_probes`` is True.
"""
list_of_feature_sets = [
self._make_2d(data) for data in list_of_feature_sets
]
# shape of list_of_feature_sets is Nx?xD
if (enroll and self.average_on_enroll) or (
not enroll and self.average_probes
):
# we cannot call np.mean(list_of_feature_sets, axis=1) because the size of
# axis 1 is diffent for each feature set.
# output will be NxD
return np.array(
[np.mean(feat, axis=0) for feat in list_of_feature_sets]
)
# output shape is Nx?xD
return list_of_feature_sets
def _make_2d(self, X):
"""Makes sure that the features are 2 dimensional before creating enroll
and probe templates.
For instance, when the source is `VideoLikeContainer` the input of
``create_templates`` is [`VideoLikeContainer`, ....]. The concatenation
of them makes and array of `ZxNxD`. Hence we need to stack them in `Z`.
"""
if not len(X):
return [[]]
if X[0].ndim == 2:
X = np.vstack(X)
return np.atleast_2d(X)
[docs] def compare(self, enroll_templates, probe_templates):
"""Compares the probe templates to the enroll templates.
Depending on the ``average_on_enroll`` and ``average_probes`` options,
the templates have different shapes.
"""
# returns scores NxM where N is the number of enroll templates and M is
# the number of probe templates
if self.average_on_enroll and self.average_probes:
# enroll_templates is NxD
enroll_templates = np.asarray(enroll_templates)
# probe_templates is MxD
probe_templates = np.asarray(probe_templates)
return self.factor * cdist(
enroll_templates, probe_templates, self.distance_function
)
elif self.average_on_enroll:
# enroll_templates is NxD
enroll_templates = np.asarray(enroll_templates)
# probe_templates is Mx?xD
scores = []
for probe in probe_templates:
s = self.factor * cdist(
enroll_templates, probe, self.distance_function
)
# s is Nx?, we want s to be N
s = self.fuse_probe_scores(s, axis=1)
scores.append(s)
return np.array(scores).T
elif self.average_probes:
# enroll_templates is Nx?xD
# probe_templates is MxD
probe_templates = np.asarray(probe_templates)
scores = []
for enroll in enroll_templates:
s = self.factor * cdist(
enroll, probe_templates, self.distance_function
)
# s is ?xM, we want s to be M
s = self.fuse_enroll_scores(s, axis=0)
scores.append(s)
return np.array(scores)
else:
# enroll_templates is Nx?1xD
# probe_templates is Mx?2xD
scores = []
for enroll in enroll_templates:
scores.append([])
for probe in probe_templates:
s = self.factor * cdist(
enroll, probe, self.distance_function
)
# s is ?1x?2, we want s to be scalar
s = self.fuse_probe_scores(s, axis=1)
s = self.fuse_enroll_scores(s, axis=0)
scores[-1].append(s)
return np.array(scores)
```