#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy
import scipy.special
import torch
[docs]def tricky_division(n, d):
"""Divides n by d. Returns 0.0 in case of a division by zero"""
return n / (d + (d == 0))
[docs]def base_measures(tp, fp, tn, fn):
"""Calculates frequentist measures from true/false positive and negative
counts
This function can return (frequentist versions of) standard machine
learning measures from true and false positive counts of positives and
negatives. For a thorough look into these and alternate names for the
returned values, please check Wikipedia's entry on `Precision and Recall
<https://en.wikipedia.org/wiki/Precision_and_recall>`_.
Parameters
----------
tp : int
True positive count, AKA "hit"
fp : int
False positive count, AKA "false alarm", or "Type I error"
tn : int
True negative count, AKA "correct rejection"
fn : int
False Negative count, AKA "miss", or "Type II error"
Returns
-------
precision : float
P, AKA positive predictive value (PPV). It corresponds arithmetically
to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns
zero for precision.
recall : float
R, AKA sensitivity, hit rate, or true positive rate (TPR). It
corresponds arithmetically to ``tp/(tp+fn)``. In the special case
where ``tp+fn == 0``, this function returns zero for recall.
specificity : float
S, AKA selectivity or true negative rate (TNR). It
corresponds arithmetically to ``tn/(tn+fp)``. In the special case
where ``tn+fp == 0``, this function returns zero for specificity.
accuracy : float
A, see `Accuracy
<https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
the proportion of correct predictions (both true positives and true
negatives) among the total number of pixels examined. It corresponds
arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
both true-negatives and positives in the numerator, what makes it
sensitive to data or regions without annotations.
jaccard : float
J, see `Jaccard Index or Similarity
<https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds
arithmetically to ``tp/(tp+fp+fn)``. In the special case where
``tn+fp+fn == 0``, this function returns zero for the Jaccard index.
The Jaccard index depends on a TP-only numerator, similarly to the F1
score. For regions where there are no annotations, the Jaccard index
will always be zero, irrespective of the model output. Accuracy may be
a better proxy if one needs to consider the true abscence of
annotations in a region as part of the measure.
f1_score : float
F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It
corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``.
In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function
returns zero for the Jaccard index. The F1 or Dice score depends on a
TP-only numerator, similarly to the Jaccard index. For regions where
there are no annotations, the F1-score will always be zero,
irrespective of the model output. Accuracy may be a better proxy if
one needs to consider the true abscence of annotations in a region as
part of the measure.
"""
return (
tricky_division(tp, tp + fp), # precision
tricky_division(tp, tp + fn), # recall
tricky_division(tn, fp + tn), # specificity
tricky_division(tp + tn, tp + fp + fn + tn), # accuracy
tricky_division(tp, tp + fp + fn), # jaccard index
tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score
)
[docs]def beta_credible_region(k, i, lambda_, coverage):
"""
Returns the mode, upper and lower bounds of the equal-tailed credible
region of a probability estimate following Bernoulli trials.
This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k`
successes and :math:`l` failures (:math:`n = k+l` total trials) are issued
from a series of Bernoulli trials (likelihood is binomial). The posterior
is derivated using the Bayes Theorem with a beta prior. As there is no
reason to favour high vs. low precision, we use a symmetric Beta prior
(:math:`\\alpha=\\beta`):
.. math::
P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\
P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\
P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\
P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1}
The mode for this posterior (also the maximum a posteriori) is:
.. math::
\\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2}
Concretely, the prior may be flat (all rates are equally likely,
:math:`\\lambda=1`) or we may use Jeoffrey's prior
(:math:`\\lambda=0.5`), that is invariant through re-parameterisation.
Jeffrey's prior indicate that rates close to zero or one are more likely.
The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is
usually the case for a resonably well tunned system, with more than a few
samples for analysis. In the limit of the system performance, :math:`k`
may be 0, which will make the mode become zero.
For our purposes, it may be more suitable to represent :math:`n = k + l`,
with :math:`k`, the number of successes and :math:`l`, the number of
failures in the binomial experiment, and find this more suitable
representation:
.. math::
P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\
\\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2}
This can be mapped to most rates calculated in the context of binary
classification this way:
* Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP
* Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN
* Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP
* F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN
* Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN
* Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN
Contrary to frequentist approaches, in which one can only
say that if the test were repeated an infinite number of times,
and one constructed a confidence interval each time, then X%
of the confidence intervals would contain the true rate, here
we can say that given our observed data, there is a X% probability
that the true value of :math:`k/n` falls within the provided
interval.
.. note::
For a disambiguation with Confidence Interval, read
https://en.wikipedia.org/wiki/Credible_interval.
Parameters
==========
k : int
Number of successes observed on the experiment
i : int
Number of failures observed on the experiment
lambda__ : :py:class:`float`, Optional
The parameterisation of the Beta prior to consider. Use
:math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
Jeffrey's prior (the default).
coverage : :py:class:`float`, Optional
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95%
of the area under the probability density of the posterior
is covered by the returned equal-tailed interval.
Returns
=======
mean : float
The mean of the posterior distribution
mode : float
The mode of the posterior distributA questão do volume eion
lower, upper: float
The lower and upper bounds of the credible region
"""
# we return the equally-tailed range
right = (1.0 - coverage) / 2 # half-width in each side
lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right)
upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right)
# evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution)
alpha = k + lambda_
beta = i + lambda_
E = alpha / (alpha + beta)
# the mode of a beta distribution is a bit tricky
if alpha > 1 and beta > 1:
mode = (alpha - 1) / (alpha + beta - 2)
elif alpha == 1 and beta == 1:
# In the case of precision, if the threshold is close to 1.0, both TP
# and FP can be zero, which may cause this condition to be reached, if
# the prior is exactly 1 (flat prior). This is a weird situation,
# because effectively we are trying to compute the posterior when the
# total number of experiments is zero. So, only the prior counts - but
# the prior is flat, so we should just pick a value. We choose the
# middle of the range.
mode = 0.0 # any value would do, we just pick this one
elif alpha <= 1 and beta > 1:
mode = 0.0
elif alpha > 1 and beta <= 1:
mode = 1.0
else: # elif alpha < 1 and beta < 1:
# in the case of precision, if the threshold is close to 1.0, both TP
# and FP can be zero, which may cause this condition to be reached, if
# the prior is smaller than 1. This is a weird situation, because
# effectively we are trying to compute the posterior when the total
# number of experiments is zero. So, only the prior counts - but the
# prior is bimodal, so we should just pick a value. We choose the
# left of the range.
mode = 0.0 # could also be 1.0 as the prior is bimodal
return E, mode, lower, upper
[docs]def bayesian_measures(tp, fp, tn, fn, lambda_, coverage):
"""Calculates mean and mode from true/false positive and negative counts
with credible regions
This function can return bayesian estimates of standard machine learning
measures from true and false positive counts of positives and negatives.
For a thorough look into these and alternate names for the returned values,
please check Wikipedia's entry on `Precision and Recall
<https://en.wikipedia.org/wiki/Precision_and_recall>`_. See
:py:func:`beta_credible_region` for details on the calculation of returned
values.
Parameters
----------
tp : int
True positive count, AKA "hit"
fp : int
False positive count, AKA "false alarm", or "Type I error"
tn : int
True negative count, AKA "correct rejection"
fn : int
False Negative count, AKA "miss", or "Type II error"
lambda_ : float
The parameterisation of the Beta prior to consider. Use
:math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
Jeffrey's prior.
coverage : float
A floating-point number between 0 and 1.0 indicating the
coverage you're expecting. A value of 0.95 will ensure 95%
of the area under the probability density of the posterior
is covered by the returned equal-tailed interval.
Returns
-------
precision : (float, float, float, float)
P, AKA positive predictive value (PPV), mean, mode and credible
intervals (95% CI). It corresponds arithmetically
to ``tp/(tp+fp)``.
recall : (float, float, float, float)
R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode
and credible intervals (95% CI). It corresponds arithmetically to
``tp/(tp+fn)``.
specificity : (float, float, float, float)
S, AKA selectivity or true negative rate (TNR), mean, mode and credible
intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``.
accuracy : (float, float, float, float)
A, mean, mode and credible intervals (95% CI). See `Accuracy
<https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
the proportion of correct predictions (both true positives and true
negatives) among the total number of pixels examined. It corresponds
arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
both true-negatives and positives in the numerator, what makes it
sensitive to data or regions without annotations.
jaccard : (float, float, float, float)
J, mean, mode and credible intervals (95% CI). See `Jaccard Index or
Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It
corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index
depends on a TP-only numerator, similarly to the F1 score. For regions
where there are no annotations, the Jaccard index will always be zero,
irrespective of the model output. Accuracy may be a better proxy if
one needs to consider the true abscence of annotations in a region as
part of the measure.
f1_score : (float, float, float, float)
F1, mean, mode and credible intervals (95% CI). See `F1-score
<https://en.wikipedia.org/wiki/F1_score>`_. It corresponds
arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or
Dice score depends on a TP-only numerator, similarly to the Jaccard
index. For regions where there are no annotations, the F1-score will
always be zero, irrespective of the model output. Accuracy may be a
better proxy if one needs to consider the true abscence of annotations
in a region as part of the measure.
"""
return (
beta_credible_region(tp, fp, lambda_, coverage), # precision
beta_credible_region(tp, fn, lambda_, coverage), # recall
beta_credible_region(tn, fp, lambda_, coverage), # specificity
beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy
beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index
beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score
)
[docs]def auc(x, y):
"""Calculates the area under the precision-recall curve (AUC)
This function requires a minimum of 2 points and will use the trapezoidal
method to calculate the area under a curve bound between ``[0.0, 1.0]``.
It interpolates missing points if required. The input ``x`` should be
continuously increasing or decreasing.
Parameters
----------
x : numpy.ndarray
A 1D numpy array containing continuously increasing or decreasing
values for the X coordinate.
y : numpy.ndarray
A 1D numpy array containing the Y coordinates of the X values provided
in ``x``.
"""
x = numpy.array(x)
y = numpy.array(y)
assert len(x) == len(y), "x and y sequences must have the same length"
dx = numpy.diff(x)
if numpy.any(dx < 0):
if numpy.all(dx <= 0):
# invert direction
x = x[::-1]
y = y[::-1]
else:
raise ValueError(
"x is neither increasing nor decreasing " ": {}.".format(x)
)
y_interp = numpy.interp(
numpy.arange(0, 1, 0.001),
numpy.array(x),
numpy.array(y),
left=1.0,
right=0.0,
)
return y_interp.sum() * 0.001
[docs]def get_intersection(pred_box, gt_box, multiplier):
"""Calculate intersection of boxes.
Parameters
----------
pred_box : torch.Tensor
A 1D numpy array containing predicted box coords.
gt_box : torch.Tensor
A 1D numpy array containing groud truth box coords.
multiplier: float
A number to increase the predicted bounding box by.
"""
x1t, y1t, x2t, y2t = gt_box.numpy()
x1, y1, x2, y2 = pred_box.numpy()
m = numpy.sqrt(multiplier)
d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0
d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0
x1 = max(x1 - d_x, 0)
x2 = x2 + d_x
y1 = max(y1 - d_y, 0)
y2 = y2 + d_y
gt_tensor = gt_box.detach().clone().unsqueeze(0)
pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0)
lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2]
rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2]
wh = (rb - lt).clamp(min=0) # [N,M,2]
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
gt_area = (x2t - x1t) * (y2t - y1t)
if gt_area > 0:
return inter.item() / gt_area
else:
return 0