Coverage for src/deepdraw/utils/measure.py: 67%
58 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
2#
3# SPDX-License-Identifier: GPL-3.0-or-later
5import numpy
6import scipy.special
7import torch
10def tricky_division(n, d):
11 """Divides n by d.
13 Returns 0.0 in case of a division by zero
14 """
16 return n / (d + (d == 0))
19def base_measures(tp, fp, tn, fn):
20 """Calculates frequentist measures from true/false positive and negative
21 counts.
23 This function can return (frequentist versions of) standard machine
24 learning measures from true and false positive counts of positives and
25 negatives. For a thorough look into these and alternate names for the
26 returned values, please check Wikipedia's entry on `Precision and Recall
27 <https://en.wikipedia.org/wiki/Precision_and_recall>`_.
30 Parameters
31 ----------
33 tp : int
34 True positive count, AKA "hit"
36 fp : int
37 False positive count, AKA "false alarm", or "Type I error"
39 tn : int
40 True negative count, AKA "correct rejection"
42 fn : int
43 False Negative count, AKA "miss", or "Type II error"
46 Returns
47 -------
49 precision : float
50 P, AKA positive predictive value (PPV). It corresponds arithmetically
51 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns
52 zero for precision.
54 recall : float
55 R, AKA sensitivity, hit rate, or true positive rate (TPR). It
56 corresponds arithmetically to ``tp/(tp+fn)``. In the special case
57 where ``tp+fn == 0``, this function returns zero for recall.
59 specificity : float
60 S, AKA selectivity or true negative rate (TNR). It
61 corresponds arithmetically to ``tn/(tn+fp)``. In the special case
62 where ``tn+fp == 0``, this function returns zero for specificity.
64 accuracy : float
65 A, see `Accuracy
66 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
67 the proportion of correct predictions (both true positives and true
68 negatives) among the total number of pixels examined. It corresponds
69 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
70 both true-negatives and positives in the numerator, what makes it
71 sensitive to data or regions without annotations.
73 jaccard : float
74 J, see `Jaccard Index or Similarity
75 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds
76 arithmetically to ``tp/(tp+fp+fn)``. In the special case where
77 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index.
78 The Jaccard index depends on a TP-only numerator, similarly to the F1
79 score. For regions where there are no annotations, the Jaccard index
80 will always be zero, irrespective of the model output. Accuracy may be
81 a better proxy if one needs to consider the true abscence of
82 annotations in a region as part of the measure.
84 f1_score : float
85 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It
86 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``.
87 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function
88 returns zero for the Jaccard index. The F1 or Dice score depends on a
89 TP-only numerator, similarly to the Jaccard index. For regions where
90 there are no annotations, the F1-score will always be zero,
91 irrespective of the model output. Accuracy may be a better proxy if
92 one needs to consider the true abscence of annotations in a region as
93 part of the measure.
94 """
96 return (
97 tricky_division(tp, tp + fp), # precision
98 tricky_division(tp, tp + fn), # recall
99 tricky_division(tn, fp + tn), # specificity
100 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy
101 tricky_division(tp, tp + fp + fn), # jaccard index
102 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score
103 )
106def beta_credible_region(k, i, lambda_, coverage):
107 """Returns the mode, upper and lower bounds of the equal-tailed credible
108 region of a probability estimate following Bernoulli trials.
110 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k`
111 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued
112 from a series of Bernoulli trials (likelihood is binomial). The posterior
113 is derivated using the Bayes Theorem with a beta prior. As there is no
114 reason to favour high vs. low precision, we use a symmetric Beta prior
115 (:math:`\\alpha=\\beta`):
117 .. math::
119 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\
120 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\
121 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\
122 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1}
124 The mode for this posterior (also the maximum a posteriori) is:
126 .. math::
128 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2}
130 Concretely, the prior may be flat (all rates are equally likely,
131 :math:`\\lambda=1`) or we may use Jeoffrey's prior
132 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation.
133 Jeffrey's prior indicate that rates close to zero or one are more likely.
135 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is
136 usually the case for a resonably well tunned system, with more than a few
137 samples for analysis. In the limit of the system performance, :math:`k`
138 may be 0, which will make the mode become zero.
140 For our purposes, it may be more suitable to represent :math:`n = k + l`,
141 with :math:`k`, the number of successes and :math:`l`, the number of
142 failures in the binomial experiment, and find this more suitable
143 representation:
145 .. math::
147 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\
148 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2}
150 This can be mapped to most rates calculated in the context of binary
151 classification this way:
153 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP
154 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN
155 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP
156 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN
157 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN
158 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN
160 Contrary to frequentist approaches, in which one can only
161 say that if the test were repeated an infinite number of times,
162 and one constructed a confidence interval each time, then X%
163 of the confidence intervals would contain the true rate, here
164 we can say that given our observed data, there is a X% probability
165 that the true value of :math:`k/n` falls within the provided
166 interval.
169 .. note::
171 For a disambiguation with Confidence Interval, read
172 https://en.wikipedia.org/wiki/Credible_interval.
175 Parameters
176 ==========
178 k : int
179 Number of successes observed on the experiment
181 i : int
182 Number of failures observed on the experiment
184 lambda__ : :py:class:`float`, Optional
185 The parameterisation of the Beta prior to consider. Use
186 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
187 Jeffrey's prior (the default).
189 coverage : :py:class:`float`, Optional
190 A floating-point number between 0 and 1.0 indicating the
191 coverage you're expecting. A value of 0.95 will ensure 95%
192 of the area under the probability density of the posterior
193 is covered by the returned equal-tailed interval.
196 Returns
197 =======
199 mean : float
200 The mean of the posterior distribution
202 mode : float
203 The mode of the posterior distributA questão do volume eion
205 lower, upper: float
206 The lower and upper bounds of the credible region
207 """
209 # we return the equally-tailed range
210 right = (1.0 - coverage) / 2 # half-width in each side
211 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right)
212 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right)
214 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution)
215 alpha = k + lambda_
216 beta = i + lambda_
218 E = alpha / (alpha + beta)
220 # the mode of a beta distribution is a bit tricky
221 if alpha > 1 and beta > 1:
222 mode = (alpha - 1) / (alpha + beta - 2)
223 elif alpha == 1 and beta == 1:
224 # In the case of precision, if the threshold is close to 1.0, both TP
225 # and FP can be zero, which may cause this condition to be reached, if
226 # the prior is exactly 1 (flat prior). This is a weird situation,
227 # because effectively we are trying to compute the posterior when the
228 # total number of experiments is zero. So, only the prior counts - but
229 # the prior is flat, so we should just pick a value. We choose the
230 # middle of the range.
231 mode = 0.0 # any value would do, we just pick this one
232 elif alpha <= 1 and beta > 1:
233 mode = 0.0
234 elif alpha > 1 and beta <= 1:
235 mode = 1.0
236 else: # elif alpha < 1 and beta < 1:
237 # in the case of precision, if the threshold is close to 1.0, both TP
238 # and FP can be zero, which may cause this condition to be reached, if
239 # the prior is smaller than 1. This is a weird situation, because
240 # effectively we are trying to compute the posterior when the total
241 # number of experiments is zero. So, only the prior counts - but the
242 # prior is bimodal, so we should just pick a value. We choose the
243 # left of the range.
244 mode = 0.0 # could also be 1.0 as the prior is bimodal
246 return E, mode, lower, upper
249def bayesian_measures(tp, fp, tn, fn, lambda_, coverage):
250 """Calculates mean and mode from true/false positive and negative counts
251 with credible regions.
253 This function can return bayesian estimates of standard machine learning
254 measures from true and false positive counts of positives and negatives.
255 For a thorough look into these and alternate names for the returned values,
256 please check Wikipedia's entry on `Precision and Recall
257 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See
258 :py:func:`beta_credible_region` for details on the calculation of returned
259 values.
262 Parameters
263 ----------
265 tp : int
266 True positive count, AKA "hit"
268 fp : int
269 False positive count, AKA "false alarm", or "Type I error"
271 tn : int
272 True negative count, AKA "correct rejection"
274 fn : int
275 False Negative count, AKA "miss", or "Type II error"
277 lambda_ : float
278 The parameterisation of the Beta prior to consider. Use
279 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
280 Jeffrey's prior.
282 coverage : float
283 A floating-point number between 0 and 1.0 indicating the
284 coverage you're expecting. A value of 0.95 will ensure 95%
285 of the area under the probability density of the posterior
286 is covered by the returned equal-tailed interval.
290 Returns
291 -------
293 precision : (float, float, float, float)
294 P, AKA positive predictive value (PPV), mean, mode and credible
295 intervals (95% CI). It corresponds arithmetically
296 to ``tp/(tp+fp)``.
298 recall : (float, float, float, float)
299 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode
300 and credible intervals (95% CI). It corresponds arithmetically to
301 ``tp/(tp+fn)``.
303 specificity : (float, float, float, float)
304 S, AKA selectivity or true negative rate (TNR), mean, mode and credible
305 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``.
307 accuracy : (float, float, float, float)
308 A, mean, mode and credible intervals (95% CI). See `Accuracy
309 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
310 the proportion of correct predictions (both true positives and true
311 negatives) among the total number of pixels examined. It corresponds
312 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
313 both true-negatives and positives in the numerator, what makes it
314 sensitive to data or regions without annotations.
316 jaccard : (float, float, float, float)
317 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or
318 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It
319 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index
320 depends on a TP-only numerator, similarly to the F1 score. For regions
321 where there are no annotations, the Jaccard index will always be zero,
322 irrespective of the model output. Accuracy may be a better proxy if
323 one needs to consider the true abscence of annotations in a region as
324 part of the measure.
326 f1_score : (float, float, float, float)
327 F1, mean, mode and credible intervals (95% CI). See `F1-score
328 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds
329 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or
330 Dice score depends on a TP-only numerator, similarly to the Jaccard
331 index. For regions where there are no annotations, the F1-score will
332 always be zero, irrespective of the model output. Accuracy may be a
333 better proxy if one needs to consider the true abscence of annotations
334 in a region as part of the measure.
335 """
337 return (
338 beta_credible_region(tp, fp, lambda_, coverage), # precision
339 beta_credible_region(tp, fn, lambda_, coverage), # recall
340 beta_credible_region(tn, fp, lambda_, coverage), # specificity
341 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy
342 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index
343 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score
344 )
347def auc(x, y):
348 """Calculates the area under the precision-recall curve (AUC)
350 This function requires a minimum of 2 points and will use the trapezoidal
351 method to calculate the area under a curve bound between ``[0.0, 1.0]``.
352 It interpolates missing points if required. The input ``x`` should be
353 continuously increasing or decreasing.
356 Parameters
357 ----------
359 x : numpy.ndarray
360 A 1D numpy array containing continuously increasing or decreasing
361 values for the X coordinate.
363 y : numpy.ndarray
364 A 1D numpy array containing the Y coordinates of the X values provided
365 in ``x``.
366 """
368 x = numpy.array(x)
369 y = numpy.array(y)
371 assert len(x) == len(y), "x and y sequences must have the same length"
373 dx = numpy.diff(x)
374 if numpy.any(dx < 0):
375 if numpy.all(dx <= 0):
376 # invert direction
377 x = x[::-1]
378 y = y[::-1]
379 else:
380 raise ValueError(
381 "x is neither increasing nor decreasing " ": {}.".format(x)
382 )
384 y_interp = numpy.interp(
385 numpy.arange(0, 1, 0.001),
386 numpy.array(x),
387 numpy.array(y),
388 left=1.0,
389 right=0.0,
390 )
391 return y_interp.sum() * 0.001
394def get_intersection(pred_box, gt_box, multiplier):
395 """Calculate intersection of boxes.
397 Parameters
398 ----------
399 pred_box : torch.Tensor
400 A 1D numpy array containing predicted box coords.
402 gt_box : torch.Tensor
403 A 1D numpy array containing groud truth box coords.
405 multiplier: float
406 A number to increase the predicted bounding box by.
407 """
408 x1t, y1t, x2t, y2t = gt_box.numpy()
409 x1, y1, x2, y2 = pred_box.numpy()
411 m = numpy.sqrt(multiplier)
412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0
413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0
414 x1 = max(x1 - d_x, 0)
415 x2 = x2 + d_x
416 y1 = max(y1 - d_y, 0)
417 y2 = y2 + d_y
419 gt_tensor = gt_box.detach().clone().unsqueeze(0)
420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0)
422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2]
423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2]
425 wh = (rb - lt).clamp(min=0) # [N,M,2]
426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
428 gt_area = (x2t - x1t) * (y2t - y1t)
430 if gt_area > 0:
431 return inter.item() / gt_area
433 else:
434 return 0