Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1673966692152/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.10/site-packages/bob/ip/common/utils/measure.py: 98%

58 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 15:03 +0000

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4import numpy 

5import scipy.special 

6import torch 

7 

8 

9def tricky_division(n, d): 

10 """Divides n by d. Returns 0.0 in case of a division by zero""" 

11 

12 return n / (d + (d == 0)) 

13 

14 

15def base_measures(tp, fp, tn, fn): 

16 """Calculates frequentist measures from true/false positive and negative 

17 counts 

18 

19 This function can return (frequentist versions of) standard machine 

20 learning measures from true and false positive counts of positives and 

21 negatives. For a thorough look into these and alternate names for the 

22 returned values, please check Wikipedia's entry on `Precision and Recall 

23 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. 

24 

25 

26 Parameters 

27 ---------- 

28 

29 tp : int 

30 True positive count, AKA "hit" 

31 

32 fp : int 

33 False positive count, AKA "false alarm", or "Type I error" 

34 

35 tn : int 

36 True negative count, AKA "correct rejection" 

37 

38 fn : int 

39 False Negative count, AKA "miss", or "Type II error" 

40 

41 

42 Returns 

43 ------- 

44 

45 precision : float 

46 P, AKA positive predictive value (PPV). It corresponds arithmetically 

47 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns 

48 zero for precision. 

49 

50 recall : float 

51 R, AKA sensitivity, hit rate, or true positive rate (TPR). It 

52 corresponds arithmetically to ``tp/(tp+fn)``. In the special case 

53 where ``tp+fn == 0``, this function returns zero for recall. 

54 

55 specificity : float 

56 S, AKA selectivity or true negative rate (TNR). It 

57 corresponds arithmetically to ``tn/(tn+fp)``. In the special case 

58 where ``tn+fp == 0``, this function returns zero for specificity. 

59 

60 accuracy : float 

61 A, see `Accuracy 

62 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is 

63 the proportion of correct predictions (both true positives and true 

64 negatives) among the total number of pixels examined. It corresponds 

65 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes 

66 both true-negatives and positives in the numerator, what makes it 

67 sensitive to data or regions without annotations. 

68 

69 jaccard : float 

70 J, see `Jaccard Index or Similarity 

71 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds 

72 arithmetically to ``tp/(tp+fp+fn)``. In the special case where 

73 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index. 

74 The Jaccard index depends on a TP-only numerator, similarly to the F1 

75 score. For regions where there are no annotations, the Jaccard index 

76 will always be zero, irrespective of the model output. Accuracy may be 

77 a better proxy if one needs to consider the true abscence of 

78 annotations in a region as part of the measure. 

79 

80 f1_score : float 

81 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It 

82 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. 

83 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function 

84 returns zero for the Jaccard index. The F1 or Dice score depends on a 

85 TP-only numerator, similarly to the Jaccard index. For regions where 

86 there are no annotations, the F1-score will always be zero, 

87 irrespective of the model output. Accuracy may be a better proxy if 

88 one needs to consider the true abscence of annotations in a region as 

89 part of the measure. 

90 

91 """ 

92 

93 return ( 

94 tricky_division(tp, tp + fp), # precision 

95 tricky_division(tp, tp + fn), # recall 

96 tricky_division(tn, fp + tn), # specificity 

97 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy 

98 tricky_division(tp, tp + fp + fn), # jaccard index 

99 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score 

100 ) 

101 

102 

103def beta_credible_region(k, i, lambda_, coverage): 

104 """ 

105 Returns the mode, upper and lower bounds of the equal-tailed credible 

106 region of a probability estimate following Bernoulli trials. 

107 

108 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k` 

109 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued 

110 from a series of Bernoulli trials (likelihood is binomial). The posterior 

111 is derivated using the Bayes Theorem with a beta prior. As there is no 

112 reason to favour high vs. low precision, we use a symmetric Beta prior 

113 (:math:`\\alpha=\\beta`): 

114 

115 .. math:: 

116 

117 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\ 

118 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\ 

119 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\ 

120 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1} 

121 

122 The mode for this posterior (also the maximum a posteriori) is: 

123 

124 .. math:: 

125 

126 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2} 

127 

128 Concretely, the prior may be flat (all rates are equally likely, 

129 :math:`\\lambda=1`) or we may use Jeoffrey's prior 

130 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation. 

131 Jeffrey's prior indicate that rates close to zero or one are more likely. 

132 

133 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is 

134 usually the case for a resonably well tunned system, with more than a few 

135 samples for analysis. In the limit of the system performance, :math:`k` 

136 may be 0, which will make the mode become zero. 

137 

138 For our purposes, it may be more suitable to represent :math:`n = k + l`, 

139 with :math:`k`, the number of successes and :math:`l`, the number of 

140 failures in the binomial experiment, and find this more suitable 

141 representation: 

142 

143 .. math:: 

144 

145 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\ 

146 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2} 

147 

148 This can be mapped to most rates calculated in the context of binary 

149 classification this way: 

150 

151 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP 

152 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN 

153 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP 

154 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN 

155 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN 

156 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN 

157 

158 Contrary to frequentist approaches, in which one can only 

159 say that if the test were repeated an infinite number of times, 

160 and one constructed a confidence interval each time, then X% 

161 of the confidence intervals would contain the true rate, here 

162 we can say that given our observed data, there is a X% probability 

163 that the true value of :math:`k/n` falls within the provided 

164 interval. 

165 

166 

167 .. note:: 

168 

169 For a disambiguation with Confidence Interval, read 

170 https://en.wikipedia.org/wiki/Credible_interval. 

171 

172 

173 Parameters 

174 ========== 

175 

176 k : int 

177 Number of successes observed on the experiment 

178 

179 i : int 

180 Number of failures observed on the experiment 

181 

182 lambda__ : :py:class:`float`, Optional 

183 The parameterisation of the Beta prior to consider. Use 

184 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for 

185 Jeffrey's prior (the default). 

186 

187 coverage : :py:class:`float`, Optional 

188 A floating-point number between 0 and 1.0 indicating the 

189 coverage you're expecting. A value of 0.95 will ensure 95% 

190 of the area under the probability density of the posterior 

191 is covered by the returned equal-tailed interval. 

192 

193 

194 Returns 

195 ======= 

196 

197 mean : float 

198 The mean of the posterior distribution 

199 

200 mode : float 

201 The mode of the posterior distributA questão do volume eion 

202 

203 lower, upper: float 

204 The lower and upper bounds of the credible region 

205 

206 """ 

207 

208 # we return the equally-tailed range 

209 right = (1.0 - coverage) / 2 # half-width in each side 

210 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right) 

211 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right) 

212 

213 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution) 

214 alpha = k + lambda_ 

215 beta = i + lambda_ 

216 

217 E = alpha / (alpha + beta) 

218 

219 # the mode of a beta distribution is a bit tricky 

220 if alpha > 1 and beta > 1: 

221 mode = (alpha - 1) / (alpha + beta - 2) 

222 elif alpha == 1 and beta == 1: 

223 # In the case of precision, if the threshold is close to 1.0, both TP 

224 # and FP can be zero, which may cause this condition to be reached, if 

225 # the prior is exactly 1 (flat prior). This is a weird situation, 

226 # because effectively we are trying to compute the posterior when the 

227 # total number of experiments is zero. So, only the prior counts - but 

228 # the prior is flat, so we should just pick a value. We choose the 

229 # middle of the range. 

230 mode = 0.0 # any value would do, we just pick this one 

231 elif alpha <= 1 and beta > 1: 

232 mode = 0.0 

233 elif alpha > 1 and beta <= 1: 

234 mode = 1.0 

235 else: # elif alpha < 1 and beta < 1: 

236 # in the case of precision, if the threshold is close to 1.0, both TP 

237 # and FP can be zero, which may cause this condition to be reached, if 

238 # the prior is smaller than 1. This is a weird situation, because 

239 # effectively we are trying to compute the posterior when the total 

240 # number of experiments is zero. So, only the prior counts - but the 

241 # prior is bimodal, so we should just pick a value. We choose the 

242 # left of the range. 

243 mode = 0.0 # could also be 1.0 as the prior is bimodal 

244 

245 return E, mode, lower, upper 

246 

247 

248def bayesian_measures(tp, fp, tn, fn, lambda_, coverage): 

249 """Calculates mean and mode from true/false positive and negative counts 

250 with credible regions 

251 

252 This function can return bayesian estimates of standard machine learning 

253 measures from true and false positive counts of positives and negatives. 

254 For a thorough look into these and alternate names for the returned values, 

255 please check Wikipedia's entry on `Precision and Recall 

256 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See 

257 :py:func:`beta_credible_region` for details on the calculation of returned 

258 values. 

259 

260 

261 Parameters 

262 ---------- 

263 

264 tp : int 

265 True positive count, AKA "hit" 

266 

267 fp : int 

268 False positive count, AKA "false alarm", or "Type I error" 

269 

270 tn : int 

271 True negative count, AKA "correct rejection" 

272 

273 fn : int 

274 False Negative count, AKA "miss", or "Type II error" 

275 

276 lambda_ : float 

277 The parameterisation of the Beta prior to consider. Use 

278 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for 

279 Jeffrey's prior. 

280 

281 coverage : float 

282 A floating-point number between 0 and 1.0 indicating the 

283 coverage you're expecting. A value of 0.95 will ensure 95% 

284 of the area under the probability density of the posterior 

285 is covered by the returned equal-tailed interval. 

286 

287 

288 

289 Returns 

290 ------- 

291 

292 precision : (float, float, float, float) 

293 P, AKA positive predictive value (PPV), mean, mode and credible 

294 intervals (95% CI). It corresponds arithmetically 

295 to ``tp/(tp+fp)``. 

296 

297 recall : (float, float, float, float) 

298 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode 

299 and credible intervals (95% CI). It corresponds arithmetically to 

300 ``tp/(tp+fn)``. 

301 

302 specificity : (float, float, float, float) 

303 S, AKA selectivity or true negative rate (TNR), mean, mode and credible 

304 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``. 

305 

306 accuracy : (float, float, float, float) 

307 A, mean, mode and credible intervals (95% CI). See `Accuracy 

308 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is 

309 the proportion of correct predictions (both true positives and true 

310 negatives) among the total number of pixels examined. It corresponds 

311 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes 

312 both true-negatives and positives in the numerator, what makes it 

313 sensitive to data or regions without annotations. 

314 

315 jaccard : (float, float, float, float) 

316 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or 

317 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It 

318 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index 

319 depends on a TP-only numerator, similarly to the F1 score. For regions 

320 where there are no annotations, the Jaccard index will always be zero, 

321 irrespective of the model output. Accuracy may be a better proxy if 

322 one needs to consider the true abscence of annotations in a region as 

323 part of the measure. 

324 

325 f1_score : (float, float, float, float) 

326 F1, mean, mode and credible intervals (95% CI). See `F1-score 

327 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds 

328 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or 

329 Dice score depends on a TP-only numerator, similarly to the Jaccard 

330 index. For regions where there are no annotations, the F1-score will 

331 always be zero, irrespective of the model output. Accuracy may be a 

332 better proxy if one needs to consider the true abscence of annotations 

333 in a region as part of the measure. 

334 """ 

335 

336 return ( 

337 beta_credible_region(tp, fp, lambda_, coverage), # precision 

338 beta_credible_region(tp, fn, lambda_, coverage), # recall 

339 beta_credible_region(tn, fp, lambda_, coverage), # specificity 

340 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy 

341 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index 

342 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score 

343 ) 

344 

345 

346def auc(x, y): 

347 """Calculates the area under the precision-recall curve (AUC) 

348 

349 This function requires a minimum of 2 points and will use the trapezoidal 

350 method to calculate the area under a curve bound between ``[0.0, 1.0]``. 

351 It interpolates missing points if required. The input ``x`` should be 

352 continuously increasing or decreasing. 

353 

354 

355 Parameters 

356 ---------- 

357 

358 x : numpy.ndarray 

359 A 1D numpy array containing continuously increasing or decreasing 

360 values for the X coordinate. 

361 

362 y : numpy.ndarray 

363 A 1D numpy array containing the Y coordinates of the X values provided 

364 in ``x``. 

365 

366 """ 

367 

368 x = numpy.array(x) 

369 y = numpy.array(y) 

370 

371 assert len(x) == len(y), "x and y sequences must have the same length" 

372 

373 dx = numpy.diff(x) 

374 if numpy.any(dx < 0): 

375 if numpy.all(dx <= 0): 

376 # invert direction 

377 x = x[::-1] 

378 y = y[::-1] 

379 else: 

380 raise ValueError( 

381 "x is neither increasing nor decreasing " ": {}.".format(x) 

382 ) 

383 

384 y_interp = numpy.interp( 

385 numpy.arange(0, 1, 0.001), 

386 numpy.array(x), 

387 numpy.array(y), 

388 left=1.0, 

389 right=0.0, 

390 ) 

391 return y_interp.sum() * 0.001 

392 

393 

394def get_intersection(pred_box, gt_box, multiplier): 

395 """Calculate intersection of boxes. 

396 

397 Parameters 

398 ---------- 

399 pred_box : torch.Tensor 

400 A 1D numpy array containing predicted box coords. 

401 

402 gt_box : torch.Tensor 

403 A 1D numpy array containing groud truth box coords. 

404 

405 multiplier: float 

406 A number to increase the predicted bounding box by. 

407 """ 

408 x1t, y1t, x2t, y2t = gt_box.numpy() 

409 x1, y1, x2, y2 = pred_box.numpy() 

410 

411 m = numpy.sqrt(multiplier) 

412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0 

413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0 

414 x1 = max(x1 - d_x, 0) 

415 x2 = x2 + d_x 

416 y1 = max(y1 - d_y, 0) 

417 y2 = y2 + d_y 

418 

419 gt_tensor = gt_box.detach().clone().unsqueeze(0) 

420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0) 

421 

422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2] 

423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2] 

424 

425 wh = (rb - lt).clamp(min=0) # [N,M,2] 

426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 

427 

428 gt_area = (x2t - x1t) * (y2t - y1t) 

429 

430 if gt_area > 0: 

431 return inter.item() / gt_area 

432 

433 else: 

434 return 0