# Source code for bob.measure

```
# import Libraries of other lib packages
import numpy
from . import calibration, load, plot # noqa: F401
from ._library import * # noqa: F401, F403
from ._library import eer_threshold, farfrr, logger, roc
[docs]def fprfnr(negatives, positives, threshold):
"""Alias for :py:func:`bob.measure.farfrr`"""
return farfrr(negatives, positives, threshold)
[docs]def mse(estimation, target):
r"""Mean square error between a set of outputs and target values
Uses the formula:
.. math::
MSE(\hat{\Theta}) = E[(\hat{\Theta} - \Theta)^2]
Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to
have 2 dimensions. Different examples are organized as rows while different
features in the estimated values or targets are organized as different
columns.
Parameters:
estimation (array): an N-dimensional array that corresponds to the value
estimated by your procedure
target (array): an N-dimensional array that corresponds to the expected
value
Returns:
float: The average of the squared error between the estimated value and the
target
"""
return numpy.mean((estimation - target) ** 2, 0)
[docs]def rmse(estimation, target):
r"""Calculates the root mean square error between a set of outputs and target
Uses the formula:
.. math::
RMSE(\hat{\Theta}) = \sqrt(E[(\hat{\Theta} - \Theta)^2])
Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to
have 2 dimensions. Different examples are organized as rows while different
features in the estimated values or targets are organized as different
columns.
Parameters:
estimation (array): an N-dimensional array that corresponds to the value
estimated by your procedure
target (array): an N-dimensional array that corresponds to the expected
value
Returns:
float: The square-root of the average of the squared error between the
estimated value and the target
"""
return numpy.sqrt(mse(estimation, target))
[docs]def relevance(input, machine):
"""Calculates the relevance of every input feature to the estimation process
Uses the formula:
Neural Triggering System Operating on High Resolution Calorimetry
Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
Physics Research, volume 559, pages 134-138
.. math::
R(x_{i}) = |E[(o(x) - o(x|x_{i}=E[x_{i}]))^2]|
In other words, the relevance of a certain input feature **i** is the change
on the machine output value when such feature is replaced by its mean for all
input vectors. For this to work, the `input` parameter has to be a 2D array
with features arranged column-wise while different examples are arranged
row-wise.
Parameters:
input (array): an N-dimensional array that corresponds to the value
estimated by your model
machine (object): A machine that can be called to "process" your input
Returns:
array: An 1D float array as large as the number of columns (second
dimension) of your input array, estimating the "relevance" of each input
column (or feature) to the score provided by the machine.
"""
o = machine(input)
i2 = input.copy()
retval = numpy.ndarray((input.shape[1],), "float64")
retval.fill(0)
for k in range(input.shape[1]):
i2[:, :] = input # reset
i2[:, k] = numpy.mean(input[:, k])
retval[k] = (mse(machine(i2), o).sum()) ** 0.5
return retval
[docs]def recognition_rate(cmc_scores, threshold=None, rank=1):
"""Calculates the recognition rate from the given input
It is identical to the CMC value for the given ``rank``.
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative :math:`\\{S_p^-\\}` and the positive
:math:`\\{S_p^+\\}` scores for one probe item :math:`p`, or ``None`` in case
of open set recognition.
If ``threshold`` is set to ``None``, the rank 1 recognition rate is defined
as the number of test items, for which the highest positive
:math:`\\max\\{S_p^+\\}` score is greater than or equal to all negative
scores, divided by the number of all probe items :math:`P`:
.. math::
\\mathrm{RR} = \\frac{1}{P} \\sum_{p=1}^{P} \\begin{cases} 1 & \\mathrm{if } \\max\\{S_p^+\\} >= \\max\\{S_p^-\\}\\\\ 0 & \\mathrm{otherwise} \\end{cases}
For a given rank :math:`r>1`, up to :math:`r` negative scores that are higher
than the highest positive score are allowed to still count as correctly
classified in the top :math:`r` rank.
If ``threshold`` :math:`\\theta` is given, **all** scores below threshold
will be filtered out. Hence, if all positive scores are below threshold
:math:`\\max\\{S_p^+\\} < \\theta`, the probe will be misclassified **at any
rank**.
For open set recognition, i.e., when there exist a tuple including negative
scores without corresponding positive scores (``None``), and **all** negative
scores are below ``threshold`` :math:`\\max\\{S_p^+\\} < \\theta`, the probe
item is correctly rejected, **and it does not count into the denominator**
:math:`P`. When no ``threshold`` is provided, the open set probes will
**always** count as misclassified, regardless of the ``rank``.
.. warn:
For open set tests, this rate does not correspond to a standard rate.
Please use :py:func:`detection_identification_rate` and
:py:func:`false_alarm_rate` instead.
Parameters:
cmc_scores (:py:class:`list`): A list in the format ``[(negatives,
positives), ...]`` containing the CMC scores (i.e. :py:class:`list`:
A list of tuples, where each tuple contains the
``negative`` and ``positive`` scores for one probe of the database).
Each pair contains the ``negative`` and the ``positive`` scores for **one
probe item**. Each pair can contain up to one empty array (or ``None``),
i.e., in case of open set recognition.
threshold (:obj:`float`, optional): Decision threshold. If not ``None``, **all**
scores will be filtered by the threshold. In an open set recognition
problem, all open set scores (negatives with no corresponding positive)
for which all scores are below threshold, will be counted as correctly
rejected and **removed** from the probe list (i.e., the denominator).
rank (:obj:`int`, optional):
The rank for which the recognition rate should be computed, 1 by default.
Returns:
float: The (open set) recognition rate for the given rank, a value between
0 and 1.
"""
# If no scores are given, the recognition rate is exactly 0.
if not cmc_scores:
return 0.0
correct = 0
counter = 0
for neg, pos in cmc_scores:
# set all values that are empty before to None
if pos is not None and not numpy.array(pos).size:
pos = None
if neg is not None and not numpy.array(neg).size:
neg = None
if pos is None and neg is None:
raise ValueError(
"One pair of the CMC scores has neither positive nor negative values"
)
# filter out any negative or positive scores below threshold; scores with exactly the threshold are also filtered out
# now, None and an empty array have different meanings.
if threshold is not None:
if neg is not None:
neg = numpy.array(neg)[neg > threshold]
if pos is not None:
pos = numpy.array(pos)[pos > threshold]
if pos is None:
# no positives, so we definitely do not have a match;
# check if we have negatives above threshold
if not neg.size:
# we have no negative scores over the threshold, so we have correctly rejected the probe
# don't increase any of the two counters...
continue
# we have negatives over threshold, so we have incorrect classifications; independent on the actual rank
counter += 1
else:
# we have a positive, so we need to count the probe
counter += 1
if not numpy.array(pos).size:
# all positive scores have been filtered out by the threshold, we definitely have a mis-match
continue
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
if neg is None or not numpy.array(neg).size:
# if we had no negatives, or all negatives were below threshold, we have a match at rank 1
correct += 1
else:
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
if index < rank:
correct += 1
return float(correct) / float(counter)
[docs]def cmc(cmc_scores):
"""Calculates the cumulative match characteristic (CMC) from the given input.
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative and the positive scores for one probe
item.
For each probe item the probability that the rank :math:`r` of the positive
score is calculated. The rank is computed as the number of negative scores
that are higher than the positive score. If several positive scores for one
test item exist, the **highest** positive score is taken. The CMC finally
computes how many test items have rank r or higher, divided by the total
number of test values.
.. note::
The CMC is not available for open set classification. Please use the
:py:func:`detection_identification_rate` and :py:func:`false_alarm_rate`
instead.
Parameters
----------
cmc_scores : :py:class:`list`
A list in the format ``[(negatives, positives), ...]`` containing the CMC
scores.
Each pair contains the ``negative`` and the ``positive`` scores for **one
probe item**. Each pair can contain up to one empty array (or ``None``),
i.e., in case of open set recognition.
Returns
-------
1D :py:class:`numpy.ndarray` of `float`
A 1D float array representing the CMC curve.
The rank 1 recognition rate can be found in ``array[0]``, rank 2 rate in
``array[1]``, and so on. The number of ranks (``array.shape[0]``) is the
number of gallery items. Values are in range ``[0,1]``.
"""
# If no scores are given, we cannot plot anything
probe_count = float(len(cmc_scores))
if not probe_count:
raise ValueError("The given set of scores is empty")
# compute MC
match_characteristic = numpy.zeros(
(max([len(neg) for neg, _ in cmc_scores if neg is not None]) + 1,),
numpy.int,
)
for neg, pos in cmc_scores:
if pos is None or not numpy.array(pos).size:
raise ValueError(
"For the CMC computation at least one positive score per pair is necessary."
)
if neg is None:
neg = []
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
match_characteristic[index] += 1
# cumulate
cumulative_match_characteristic = numpy.cumsum(
match_characteristic, dtype=numpy.float64
)
return cumulative_match_characteristic / probe_count
[docs]def detection_identification_rate(cmc_scores, threshold, rank=1):
"""Computes the `detection and identification rate` for the given threshold.
This value is designed to be used in an open set identification protocol, and
defined in Chapter 14.1 of [LiJain2005]_.
Although the detection and identification rate is designed to be computed on
an open set protocol, it uses only the probe elements, for which a
corresponding gallery element exists. For closed set identification
protocols, this function is identical to :py:func:`recognition_rate`. The
only difference is that for this function, a ``threshold`` for the scores
need to be defined, while for :py:func:`recognition_rate` it is optional.
Parameters:
cmc_scores (:py:class:`list`): A list in the format ``[(negatives,
positives), ...]`` containing the CMC.
Each pair contains the ``negative`` and the ``positive`` scores for **one
probe item**. Each pair can contain up to one empty array (or ``None``),
i.e., in case of open set recognition.
threshold (float): The decision threshold :math:`\\tau``.
rank (:obj:`int`, optional): The rank for which the curve should be plotted
Returns:
float: The detection and identification rate for the given threshold.
"""
# count the correctly classifier probes
correct = 0
counter = 0
for neg, pos in cmc_scores:
if pos is None or not numpy.array(pos).size:
# we only consider probes with corresponding gallery items
continue
# we have an in-gallery probe
counter += 1
# check, if it is correctly classified
if neg is None:
neg = []
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
index = numpy.sum(
neg >= max_pos
) # compute the rank (in fact, rank - 1)
if max_pos >= threshold and index < rank:
correct += 1
if not counter:
logger.warn("No in-gallery probe was found")
return 0.0
return float(correct) / float(counter)
[docs]def false_alarm_rate(cmc_scores, threshold):
"""Computes the `false alarm rate` for the given threshold,.
This value is designed to be used in an open set identification protocol, and
defined in Chapter 14.1 of [LiJain2005]_.
The false alarm rate is designed to be computed on an open set protocol, it
uses only the probe elements, for which **no** corresponding gallery element
exists.
Parameters:
cmc_scores (:py:class:`list`): A list in the format ``[(negatives,
positives), ...]`` containing the CMC scores (i.e. :py:class:`list`:
A list of tuples, where each tuple contains the
``negative`` and ``positive`` scores for one probe of the database).
Each pair contains the ``negative`` and the ``positive`` scores for **one
probe item**. Each pair can contain up to one empty array (or ``None``),
i.e., in case of open set recognition.
threshold (float): The decision threshold :math:`\\tau``.
Returns:
float: The false alarm rate.
"""
incorrect = 0
counter = 0
for neg, pos in cmc_scores:
# we only consider the out-of-gallery probes, i.e., with no positive scores
if pos is None or not numpy.array(pos).size:
counter += 1
# check if the probe is above threshold
if neg is None or not numpy.array(neg).size:
raise ValueError(
"One pair of the CMC scores has neither positive nor negative values"
)
if numpy.max(neg) >= threshold:
incorrect += 1
if not counter:
logger.warn("No out-of-gallery probe was found")
return 0.0
return float(incorrect) / float(counter)
[docs]def eer(negatives, positives, is_sorted=False, also_farfrr=False):
"""Calculates the Equal Error Rate (EER).
Please note that it is possible that eer != fpr != fnr.
This function returns (fpr + fnr) / 2 as eer.
If you also need the fpr and fnr values, set ``also_farfrr`` to ``True``.
Parameters
----------
negatives : ``array_like (1D, float)``
The scores for comparisons of objects of different classes.
positives : ``array_like (1D, float)``
The scores for comparisons of objects of the same class.
is_sorted : bool
Are both sets of scores already in ascendantly sorted order?
also_farfrr : bool
If True, it will also return far and frr.
Returns
-------
eer : float
The Equal Error Rate (EER).
fpr : float
The False Positive Rate (FPR). Returned only when ``also_farfrr`` is
``True``.
fnr : float
The False Negative Rate (FNR). Returned only when ``also_farfrr`` is
``True``.
"""
threshold = eer_threshold(negatives, positives, is_sorted)
far, frr = farfrr(negatives, positives, threshold)
if also_farfrr:
return (far + frr) / 2.0, far, frr
return (far + frr) / 2.0
[docs]def roc_auc_score(
negatives, positives, npoints=2000, min_far=-8, log_scale=False
):
"""Area Under the ROC Curve.
Computes the area under the ROC curve. This is useful when you want to report one
number that represents an ROC curve. This implementation uses the trapezoidal rule for
the integration of the ROC curve. For more information, see:
https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
Parameters
----------
negatives : array_like
The negative scores.
positives : array_like
The positive scores.
npoints : int, optional
Number of points in the ROC curve. Higher numbers leads to more accurate ROC.
min_far : float, optional
Min FAR and FRR values to consider when calculating ROC.
log_scale : bool, optional
If True, converts the x axis (FPR) to log10 scale before calculating AUC. This is
useful in cases where len(negatives) >> len(positives)
Returns
-------
float
The ROC AUC. If ``log_scale`` is False, the value should be between 0 and 1.
"""
fpr, fnr = roc(negatives, positives, npoints, min_far=min_far)
tpr = 1 - fnr
if log_scale:
fpr_pos = fpr > 0
fpr, tpr = fpr[fpr_pos], tpr[fpr_pos]
fpr = numpy.log10(fpr)
area = -1 * numpy.trapz(tpr, fpr)
return area
```