Coverage for src/deepdraw/utils/measure.py: 67%

58 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-11-30 15:00 +0100

1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> 

2# 

3# SPDX-License-Identifier: GPL-3.0-or-later 

4 

5import numpy 

6import scipy.special 

7import torch 

8 

9 

10def tricky_division(n, d): 

11 """Divides n by d. 

12 

13 Returns 0.0 in case of a division by zero 

14 """ 

15 

16 return n / (d + (d == 0)) 

17 

18 

19def base_measures(tp, fp, tn, fn): 

20 """Calculates frequentist measures from true/false positive and negative 

21 counts. 

22 

23 This function can return (frequentist versions of) standard machine 

24 learning measures from true and false positive counts of positives and 

25 negatives. For a thorough look into these and alternate names for the 

26 returned values, please check Wikipedia's entry on `Precision and Recall 

27 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. 

28 

29 

30 Parameters 

31 ---------- 

32 

33 tp : int 

34 True positive count, AKA "hit" 

35 

36 fp : int 

37 False positive count, AKA "false alarm", or "Type I error" 

38 

39 tn : int 

40 True negative count, AKA "correct rejection" 

41 

42 fn : int 

43 False Negative count, AKA "miss", or "Type II error" 

44 

45 

46 Returns 

47 ------- 

48 

49 precision : float 

50 P, AKA positive predictive value (PPV). It corresponds arithmetically 

51 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns 

52 zero for precision. 

53 

54 recall : float 

55 R, AKA sensitivity, hit rate, or true positive rate (TPR). It 

56 corresponds arithmetically to ``tp/(tp+fn)``. In the special case 

57 where ``tp+fn == 0``, this function returns zero for recall. 

58 

59 specificity : float 

60 S, AKA selectivity or true negative rate (TNR). It 

61 corresponds arithmetically to ``tn/(tn+fp)``. In the special case 

62 where ``tn+fp == 0``, this function returns zero for specificity. 

63 

64 accuracy : float 

65 A, see `Accuracy 

66 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is 

67 the proportion of correct predictions (both true positives and true 

68 negatives) among the total number of pixels examined. It corresponds 

69 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes 

70 both true-negatives and positives in the numerator, what makes it 

71 sensitive to data or regions without annotations. 

72 

73 jaccard : float 

74 J, see `Jaccard Index or Similarity 

75 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds 

76 arithmetically to ``tp/(tp+fp+fn)``. In the special case where 

77 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index. 

78 The Jaccard index depends on a TP-only numerator, similarly to the F1 

79 score. For regions where there are no annotations, the Jaccard index 

80 will always be zero, irrespective of the model output. Accuracy may be 

81 a better proxy if one needs to consider the true abscence of 

82 annotations in a region as part of the measure. 

83 

84 f1_score : float 

85 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It 

86 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. 

87 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function 

88 returns zero for the Jaccard index. The F1 or Dice score depends on a 

89 TP-only numerator, similarly to the Jaccard index. For regions where 

90 there are no annotations, the F1-score will always be zero, 

91 irrespective of the model output. Accuracy may be a better proxy if 

92 one needs to consider the true abscence of annotations in a region as 

93 part of the measure. 

94 """ 

95 

96 return ( 

97 tricky_division(tp, tp + fp), # precision 

98 tricky_division(tp, tp + fn), # recall 

99 tricky_division(tn, fp + tn), # specificity 

100 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy 

101 tricky_division(tp, tp + fp + fn), # jaccard index 

102 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score 

103 ) 

104 

105 

106def beta_credible_region(k, i, lambda_, coverage): 

107 """Returns the mode, upper and lower bounds of the equal-tailed credible 

108 region of a probability estimate following Bernoulli trials. 

109 

110 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k` 

111 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued 

112 from a series of Bernoulli trials (likelihood is binomial). The posterior 

113 is derivated using the Bayes Theorem with a beta prior. As there is no 

114 reason to favour high vs. low precision, we use a symmetric Beta prior 

115 (:math:`\\alpha=\\beta`): 

116 

117 .. math:: 

118 

119 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\ 

120 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\ 

121 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\ 

122 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1} 

123 

124 The mode for this posterior (also the maximum a posteriori) is: 

125 

126 .. math:: 

127 

128 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2} 

129 

130 Concretely, the prior may be flat (all rates are equally likely, 

131 :math:`\\lambda=1`) or we may use Jeoffrey's prior 

132 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation. 

133 Jeffrey's prior indicate that rates close to zero or one are more likely. 

134 

135 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is 

136 usually the case for a resonably well tunned system, with more than a few 

137 samples for analysis. In the limit of the system performance, :math:`k` 

138 may be 0, which will make the mode become zero. 

139 

140 For our purposes, it may be more suitable to represent :math:`n = k + l`, 

141 with :math:`k`, the number of successes and :math:`l`, the number of 

142 failures in the binomial experiment, and find this more suitable 

143 representation: 

144 

145 .. math:: 

146 

147 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\ 

148 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2} 

149 

150 This can be mapped to most rates calculated in the context of binary 

151 classification this way: 

152 

153 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP 

154 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN 

155 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP 

156 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN 

157 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN 

158 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN 

159 

160 Contrary to frequentist approaches, in which one can only 

161 say that if the test were repeated an infinite number of times, 

162 and one constructed a confidence interval each time, then X% 

163 of the confidence intervals would contain the true rate, here 

164 we can say that given our observed data, there is a X% probability 

165 that the true value of :math:`k/n` falls within the provided 

166 interval. 

167 

168 

169 .. note:: 

170 

171 For a disambiguation with Confidence Interval, read 

172 https://en.wikipedia.org/wiki/Credible_interval. 

173 

174 

175 Parameters 

176 ========== 

177 

178 k : int 

179 Number of successes observed on the experiment 

180 

181 i : int 

182 Number of failures observed on the experiment 

183 

184 lambda__ : :py:class:`float`, Optional 

185 The parameterisation of the Beta prior to consider. Use 

186 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for 

187 Jeffrey's prior (the default). 

188 

189 coverage : :py:class:`float`, Optional 

190 A floating-point number between 0 and 1.0 indicating the 

191 coverage you're expecting. A value of 0.95 will ensure 95% 

192 of the area under the probability density of the posterior 

193 is covered by the returned equal-tailed interval. 

194 

195 

196 Returns 

197 ======= 

198 

199 mean : float 

200 The mean of the posterior distribution 

201 

202 mode : float 

203 The mode of the posterior distributA questão do volume eion 

204 

205 lower, upper: float 

206 The lower and upper bounds of the credible region 

207 """ 

208 

209 # we return the equally-tailed range 

210 right = (1.0 - coverage) / 2 # half-width in each side 

211 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right) 

212 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right) 

213 

214 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution) 

215 alpha = k + lambda_ 

216 beta = i + lambda_ 

217 

218 E = alpha / (alpha + beta) 

219 

220 # the mode of a beta distribution is a bit tricky 

221 if alpha > 1 and beta > 1: 

222 mode = (alpha - 1) / (alpha + beta - 2) 

223 elif alpha == 1 and beta == 1: 

224 # In the case of precision, if the threshold is close to 1.0, both TP 

225 # and FP can be zero, which may cause this condition to be reached, if 

226 # the prior is exactly 1 (flat prior). This is a weird situation, 

227 # because effectively we are trying to compute the posterior when the 

228 # total number of experiments is zero. So, only the prior counts - but 

229 # the prior is flat, so we should just pick a value. We choose the 

230 # middle of the range. 

231 mode = 0.0 # any value would do, we just pick this one 

232 elif alpha <= 1 and beta > 1: 

233 mode = 0.0 

234 elif alpha > 1 and beta <= 1: 

235 mode = 1.0 

236 else: # elif alpha < 1 and beta < 1: 

237 # in the case of precision, if the threshold is close to 1.0, both TP 

238 # and FP can be zero, which may cause this condition to be reached, if 

239 # the prior is smaller than 1. This is a weird situation, because 

240 # effectively we are trying to compute the posterior when the total 

241 # number of experiments is zero. So, only the prior counts - but the 

242 # prior is bimodal, so we should just pick a value. We choose the 

243 # left of the range. 

244 mode = 0.0 # could also be 1.0 as the prior is bimodal 

245 

246 return E, mode, lower, upper 

247 

248 

249def bayesian_measures(tp, fp, tn, fn, lambda_, coverage): 

250 """Calculates mean and mode from true/false positive and negative counts 

251 with credible regions. 

252 

253 This function can return bayesian estimates of standard machine learning 

254 measures from true and false positive counts of positives and negatives. 

255 For a thorough look into these and alternate names for the returned values, 

256 please check Wikipedia's entry on `Precision and Recall 

257 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See 

258 :py:func:`beta_credible_region` for details on the calculation of returned 

259 values. 

260 

261 

262 Parameters 

263 ---------- 

264 

265 tp : int 

266 True positive count, AKA "hit" 

267 

268 fp : int 

269 False positive count, AKA "false alarm", or "Type I error" 

270 

271 tn : int 

272 True negative count, AKA "correct rejection" 

273 

274 fn : int 

275 False Negative count, AKA "miss", or "Type II error" 

276 

277 lambda_ : float 

278 The parameterisation of the Beta prior to consider. Use 

279 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for 

280 Jeffrey's prior. 

281 

282 coverage : float 

283 A floating-point number between 0 and 1.0 indicating the 

284 coverage you're expecting. A value of 0.95 will ensure 95% 

285 of the area under the probability density of the posterior 

286 is covered by the returned equal-tailed interval. 

287 

288 

289 

290 Returns 

291 ------- 

292 

293 precision : (float, float, float, float) 

294 P, AKA positive predictive value (PPV), mean, mode and credible 

295 intervals (95% CI). It corresponds arithmetically 

296 to ``tp/(tp+fp)``. 

297 

298 recall : (float, float, float, float) 

299 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode 

300 and credible intervals (95% CI). It corresponds arithmetically to 

301 ``tp/(tp+fn)``. 

302 

303 specificity : (float, float, float, float) 

304 S, AKA selectivity or true negative rate (TNR), mean, mode and credible 

305 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``. 

306 

307 accuracy : (float, float, float, float) 

308 A, mean, mode and credible intervals (95% CI). See `Accuracy 

309 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is 

310 the proportion of correct predictions (both true positives and true 

311 negatives) among the total number of pixels examined. It corresponds 

312 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes 

313 both true-negatives and positives in the numerator, what makes it 

314 sensitive to data or regions without annotations. 

315 

316 jaccard : (float, float, float, float) 

317 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or 

318 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It 

319 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index 

320 depends on a TP-only numerator, similarly to the F1 score. For regions 

321 where there are no annotations, the Jaccard index will always be zero, 

322 irrespective of the model output. Accuracy may be a better proxy if 

323 one needs to consider the true abscence of annotations in a region as 

324 part of the measure. 

325 

326 f1_score : (float, float, float, float) 

327 F1, mean, mode and credible intervals (95% CI). See `F1-score 

328 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds 

329 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or 

330 Dice score depends on a TP-only numerator, similarly to the Jaccard 

331 index. For regions where there are no annotations, the F1-score will 

332 always be zero, irrespective of the model output. Accuracy may be a 

333 better proxy if one needs to consider the true abscence of annotations 

334 in a region as part of the measure. 

335 """ 

336 

337 return ( 

338 beta_credible_region(tp, fp, lambda_, coverage), # precision 

339 beta_credible_region(tp, fn, lambda_, coverage), # recall 

340 beta_credible_region(tn, fp, lambda_, coverage), # specificity 

341 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy 

342 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index 

343 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score 

344 ) 

345 

346 

347def auc(x, y): 

348 """Calculates the area under the precision-recall curve (AUC) 

349 

350 This function requires a minimum of 2 points and will use the trapezoidal 

351 method to calculate the area under a curve bound between ``[0.0, 1.0]``. 

352 It interpolates missing points if required. The input ``x`` should be 

353 continuously increasing or decreasing. 

354 

355 

356 Parameters 

357 ---------- 

358 

359 x : numpy.ndarray 

360 A 1D numpy array containing continuously increasing or decreasing 

361 values for the X coordinate. 

362 

363 y : numpy.ndarray 

364 A 1D numpy array containing the Y coordinates of the X values provided 

365 in ``x``. 

366 """ 

367 

368 x = numpy.array(x) 

369 y = numpy.array(y) 

370 

371 assert len(x) == len(y), "x and y sequences must have the same length" 

372 

373 dx = numpy.diff(x) 

374 if numpy.any(dx < 0): 

375 if numpy.all(dx <= 0): 

376 # invert direction 

377 x = x[::-1] 

378 y = y[::-1] 

379 else: 

380 raise ValueError( 

381 "x is neither increasing nor decreasing " ": {}.".format(x) 

382 ) 

383 

384 y_interp = numpy.interp( 

385 numpy.arange(0, 1, 0.001), 

386 numpy.array(x), 

387 numpy.array(y), 

388 left=1.0, 

389 right=0.0, 

390 ) 

391 return y_interp.sum() * 0.001 

392 

393 

394def get_intersection(pred_box, gt_box, multiplier): 

395 """Calculate intersection of boxes. 

396 

397 Parameters 

398 ---------- 

399 pred_box : torch.Tensor 

400 A 1D numpy array containing predicted box coords. 

401 

402 gt_box : torch.Tensor 

403 A 1D numpy array containing groud truth box coords. 

404 

405 multiplier: float 

406 A number to increase the predicted bounding box by. 

407 """ 

408 x1t, y1t, x2t, y2t = gt_box.numpy() 

409 x1, y1, x2, y2 = pred_box.numpy() 

410 

411 m = numpy.sqrt(multiplier) 

412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0 

413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0 

414 x1 = max(x1 - d_x, 0) 

415 x2 = x2 + d_x 

416 y1 = max(y1 - d_y, 0) 

417 y2 = y2 + d_y 

418 

419 gt_tensor = gt_box.detach().clone().unsqueeze(0) 

420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0) 

421 

422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2] 

423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2] 

424 

425 wh = (rb - lt).clamp(min=0) # [N,M,2] 

426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 

427 

428 gt_area = (x2t - x1t) * (y2t - y1t) 

429 

430 if gt_area > 0: 

431 return inter.item() / gt_area 

432 

433 else: 

434 return 0