1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import numpy
5import scipy.special
6import torch
7
8
9def tricky_division(n, d):
10 """Divides n by d. Returns 0.0 in case of a division by zero"""
11
12 return n / (d + (d == 0))
13
14
15def base_measures(tp, fp, tn, fn):
16 """Calculates frequentist measures from true/false positive and negative
17 counts
18
19 This function can return (frequentist versions of) standard machine
20 learning measures from true and false positive counts of positives and
21 negatives. For a thorough look into these and alternate names for the
22 returned values, please check Wikipedia's entry on `Precision and Recall
23 <https://en.wikipedia.org/wiki/Precision_and_recall>`_.
24
25
26 Parameters
27 ----------
28
29 tp : int
30 True positive count, AKA "hit"
31
32 fp : int
33 False positive count, AKA "false alarm", or "Type I error"
34
35 tn : int
36 True negative count, AKA "correct rejection"
37
38 fn : int
39 False Negative count, AKA "miss", or "Type II error"
40
41
42 Returns
43 -------
44
45 precision : float
46 P, AKA positive predictive value (PPV). It corresponds arithmetically
47 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns
48 zero for precision.
49
50 recall : float
51 R, AKA sensitivity, hit rate, or true positive rate (TPR). It
52 corresponds arithmetically to ``tp/(tp+fn)``. In the special case
53 where ``tp+fn == 0``, this function returns zero for recall.
54
55 specificity : float
56 S, AKA selectivity or true negative rate (TNR). It
57 corresponds arithmetically to ``tn/(tn+fp)``. In the special case
58 where ``tn+fp == 0``, this function returns zero for specificity.
59
60 accuracy : float
61 A, see `Accuracy
62 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
63 the proportion of correct predictions (both true positives and true
64 negatives) among the total number of pixels examined. It corresponds
65 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
66 both true-negatives and positives in the numerator, what makes it
67 sensitive to data or regions without annotations.
68
69 jaccard : float
70 J, see `Jaccard Index or Similarity
71 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds
72 arithmetically to ``tp/(tp+fp+fn)``. In the special case where
73 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index.
74 The Jaccard index depends on a TP-only numerator, similarly to the F1
75 score. For regions where there are no annotations, the Jaccard index
76 will always be zero, irrespective of the model output. Accuracy may be
77 a better proxy if one needs to consider the true abscence of
78 annotations in a region as part of the measure.
79
80 f1_score : float
81 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It
82 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``.
83 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function
84 returns zero for the Jaccard index. The F1 or Dice score depends on a
85 TP-only numerator, similarly to the Jaccard index. For regions where
86 there are no annotations, the F1-score will always be zero,
87 irrespective of the model output. Accuracy may be a better proxy if
88 one needs to consider the true abscence of annotations in a region as
89 part of the measure.
90
91 """
92
93 return (
94 tricky_division(tp, tp + fp), # precision
95 tricky_division(tp, tp + fn), # recall
96 tricky_division(tn, fp + tn), # specificity
97 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy
98 tricky_division(tp, tp + fp + fn), # jaccard index
99 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score
100 )
101
102
103def beta_credible_region(k, i, lambda_, coverage):
104 """
105 Returns the mode, upper and lower bounds of the equal-tailed credible
106 region of a probability estimate following Bernoulli trials.
107
108 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k`
109 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued
110 from a series of Bernoulli trials (likelihood is binomial). The posterior
111 is derivated using the Bayes Theorem with a beta prior. As there is no
112 reason to favour high vs. low precision, we use a symmetric Beta prior
113 (:math:`\\alpha=\\beta`):
114
115 .. math::
116
117 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\
118 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\
119 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\
120 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1}
121
122 The mode for this posterior (also the maximum a posteriori) is:
123
124 .. math::
125
126 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2}
127
128 Concretely, the prior may be flat (all rates are equally likely,
129 :math:`\\lambda=1`) or we may use Jeoffrey's prior
130 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation.
131 Jeffrey's prior indicate that rates close to zero or one are more likely.
132
133 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is
134 usually the case for a resonably well tunned system, with more than a few
135 samples for analysis. In the limit of the system performance, :math:`k`
136 may be 0, which will make the mode become zero.
137
138 For our purposes, it may be more suitable to represent :math:`n = k + l`,
139 with :math:`k`, the number of successes and :math:`l`, the number of
140 failures in the binomial experiment, and find this more suitable
141 representation:
142
143 .. math::
144
145 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\
146 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2}
147
148 This can be mapped to most rates calculated in the context of binary
149 classification this way:
150
151 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP
152 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN
153 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP
154 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN
155 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN
156 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN
157
158 Contrary to frequentist approaches, in which one can only
159 say that if the test were repeated an infinite number of times,
160 and one constructed a confidence interval each time, then X%
161 of the confidence intervals would contain the true rate, here
162 we can say that given our observed data, there is a X% probability
163 that the true value of :math:`k/n` falls within the provided
164 interval.
165
166
167 .. note::
168
169 For a disambiguation with Confidence Interval, read
170 https://en.wikipedia.org/wiki/Credible_interval.
171
172
173 Parameters
174 ==========
175
176 k : int
177 Number of successes observed on the experiment
178
179 i : int
180 Number of failures observed on the experiment
181
182 lambda__ : :py:class:`float`, Optional
183 The parameterisation of the Beta prior to consider. Use
184 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
185 Jeffrey's prior (the default).
186
187 coverage : :py:class:`float`, Optional
188 A floating-point number between 0 and 1.0 indicating the
189 coverage you're expecting. A value of 0.95 will ensure 95%
190 of the area under the probability density of the posterior
191 is covered by the returned equal-tailed interval.
192
193
194 Returns
195 =======
196
197 mean : float
198 The mean of the posterior distribution
199
200 mode : float
201 The mode of the posterior distributA questão do volume eion
202
203 lower, upper: float
204 The lower and upper bounds of the credible region
205
206 """
207
208 # we return the equally-tailed range
209 right = (1.0 - coverage) / 2 # half-width in each side
210 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right)
211 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right)
212
213 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution)
214 alpha = k + lambda_
215 beta = i + lambda_
216
217 E = alpha / (alpha + beta)
218
219 # the mode of a beta distribution is a bit tricky
220 if alpha > 1 and beta > 1:
221 mode = (alpha - 1) / (alpha + beta - 2)
222 elif alpha == 1 and beta == 1:
223 # In the case of precision, if the threshold is close to 1.0, both TP
224 # and FP can be zero, which may cause this condition to be reached, if
225 # the prior is exactly 1 (flat prior). This is a weird situation,
226 # because effectively we are trying to compute the posterior when the
227 # total number of experiments is zero. So, only the prior counts - but
228 # the prior is flat, so we should just pick a value. We choose the
229 # middle of the range.
230 mode = 0.0 # any value would do, we just pick this one
231 elif alpha <= 1 and beta > 1:
232 mode = 0.0
233 elif alpha > 1 and beta <= 1:
234 mode = 1.0
235 else: # elif alpha < 1 and beta < 1:
236 # in the case of precision, if the threshold is close to 1.0, both TP
237 # and FP can be zero, which may cause this condition to be reached, if
238 # the prior is smaller than 1. This is a weird situation, because
239 # effectively we are trying to compute the posterior when the total
240 # number of experiments is zero. So, only the prior counts - but the
241 # prior is bimodal, so we should just pick a value. We choose the
242 # left of the range.
243 mode = 0.0 # could also be 1.0 as the prior is bimodal
244
245 return E, mode, lower, upper
246
247
248def bayesian_measures(tp, fp, tn, fn, lambda_, coverage):
249 """Calculates mean and mode from true/false positive and negative counts
250 with credible regions
251
252 This function can return bayesian estimates of standard machine learning
253 measures from true and false positive counts of positives and negatives.
254 For a thorough look into these and alternate names for the returned values,
255 please check Wikipedia's entry on `Precision and Recall
256 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See
257 :py:func:`beta_credible_region` for details on the calculation of returned
258 values.
259
260
261 Parameters
262 ----------
263
264 tp : int
265 True positive count, AKA "hit"
266
267 fp : int
268 False positive count, AKA "false alarm", or "Type I error"
269
270 tn : int
271 True negative count, AKA "correct rejection"
272
273 fn : int
274 False Negative count, AKA "miss", or "Type II error"
275
276 lambda_ : float
277 The parameterisation of the Beta prior to consider. Use
278 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for
279 Jeffrey's prior.
280
281 coverage : float
282 A floating-point number between 0 and 1.0 indicating the
283 coverage you're expecting. A value of 0.95 will ensure 95%
284 of the area under the probability density of the posterior
285 is covered by the returned equal-tailed interval.
286
287
288
289 Returns
290 -------
291
292 precision : (float, float, float, float)
293 P, AKA positive predictive value (PPV), mean, mode and credible
294 intervals (95% CI). It corresponds arithmetically
295 to ``tp/(tp+fp)``.
296
297 recall : (float, float, float, float)
298 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode
299 and credible intervals (95% CI). It corresponds arithmetically to
300 ``tp/(tp+fn)``.
301
302 specificity : (float, float, float, float)
303 S, AKA selectivity or true negative rate (TNR), mean, mode and credible
304 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``.
305
306 accuracy : (float, float, float, float)
307 A, mean, mode and credible intervals (95% CI). See `Accuracy
308 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is
309 the proportion of correct predictions (both true positives and true
310 negatives) among the total number of pixels examined. It corresponds
311 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes
312 both true-negatives and positives in the numerator, what makes it
313 sensitive to data or regions without annotations.
314
315 jaccard : (float, float, float, float)
316 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or
317 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It
318 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index
319 depends on a TP-only numerator, similarly to the F1 score. For regions
320 where there are no annotations, the Jaccard index will always be zero,
321 irrespective of the model output. Accuracy may be a better proxy if
322 one needs to consider the true abscence of annotations in a region as
323 part of the measure.
324
325 f1_score : (float, float, float, float)
326 F1, mean, mode and credible intervals (95% CI). See `F1-score
327 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds
328 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or
329 Dice score depends on a TP-only numerator, similarly to the Jaccard
330 index. For regions where there are no annotations, the F1-score will
331 always be zero, irrespective of the model output. Accuracy may be a
332 better proxy if one needs to consider the true abscence of annotations
333 in a region as part of the measure.
334 """
335
336 return (
337 beta_credible_region(tp, fp, lambda_, coverage), # precision
338 beta_credible_region(tp, fn, lambda_, coverage), # recall
339 beta_credible_region(tn, fp, lambda_, coverage), # specificity
340 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy
341 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index
342 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score
343 )
344
345
346def auc(x, y):
347 """Calculates the area under the precision-recall curve (AUC)
348
349 This function requires a minimum of 2 points and will use the trapezoidal
350 method to calculate the area under a curve bound between ``[0.0, 1.0]``.
351 It interpolates missing points if required. The input ``x`` should be
352 continuously increasing or decreasing.
353
354
355 Parameters
356 ----------
357
358 x : numpy.ndarray
359 A 1D numpy array containing continuously increasing or decreasing
360 values for the X coordinate.
361
362 y : numpy.ndarray
363 A 1D numpy array containing the Y coordinates of the X values provided
364 in ``x``.
365
366 """
367
368 x = numpy.array(x)
369 y = numpy.array(y)
370
371 assert len(x) == len(y), "x and y sequences must have the same length"
372
373 dx = numpy.diff(x)
374 if numpy.any(dx < 0):
375 if numpy.all(dx <= 0):
376 # invert direction
377 x = x[::-1]
378 y = y[::-1]
379 else:
380 raise ValueError(
381 "x is neither increasing nor decreasing " ": {}.".format(x)
382 )
383
384 y_interp = numpy.interp(
385 numpy.arange(0, 1, 0.001),
386 numpy.array(x),
387 numpy.array(y),
388 left=1.0,
389 right=0.0,
390 )
391 return y_interp.sum() * 0.001
392
393
394def get_intersection(pred_box, gt_box, multiplier):
395 """Calculate intersection of boxes.
396
397 Parameters
398 ----------
399 pred_box : torch.Tensor
400 A 1D numpy array containing predicted box coords.
401
402 gt_box : torch.Tensor
403 A 1D numpy array containing groud truth box coords.
404
405 multiplier: float
406 A number to increase the predicted bounding box by.
407 """
408 x1t, y1t, x2t, y2t = gt_box.numpy()
409 x1, y1, x2, y2 = pred_box.numpy()
410
411 m = numpy.sqrt(multiplier)
412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0
413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0
414 x1 = max(x1 - d_x, 0)
415 x2 = x2 + d_x
416 y1 = max(y1 - d_y, 0)
417 y2 = y2 + d_y
418
419 gt_tensor = gt_box.detach().clone().unsqueeze(0)
420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0)
421
422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2]
423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2]
424
425 wh = (rb - lt).clamp(min=0) # [N,M,2]
426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
427
428 gt_area = (x2t - x1t) * (y2t - y1t)
429
430 if gt_area > 0:
431 return inter.item() / gt_area
432
433 else:
434 return 0