Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1673966692152/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.10/site-packages/bob/ip/common/utils/measure.py: 98%

1#!/usr/bin/env python

2# -*- coding: utf-8 -*-

4import numpy

5import scipy.special

6import torch

9def tricky_division(n, d):

10 """Divides n by d. Returns 0.0 in case of a division by zero"""

12 return n / (d + (d == 0))

15def base_measures(tp, fp, tn, fn):

16 """Calculates frequentist measures from true/false positive and negative

17 counts

19 This function can return (frequentist versions of) standard machine

20 learning measures from true and false positive counts of positives and

21 negatives. For a thorough look into these and alternate names for the

22 returned values, please check Wikipedia's entry on `Precision and Recall

23 <https://en.wikipedia.org/wiki/Precision_and_recall>`_.

26 Parameters

27 ----------

29 tp : int

30 True positive count, AKA "hit"

32 fp : int

33 False positive count, AKA "false alarm", or "Type I error"

35 tn : int

36 True negative count, AKA "correct rejection"

38 fn : int

39 False Negative count, AKA "miss", or "Type II error"

42 Returns

43 -------

45 precision : float

46 P, AKA positive predictive value (PPV). It corresponds arithmetically

47 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns

48 zero for precision.

50 recall : float

51 R, AKA sensitivity, hit rate, or true positive rate (TPR). It

52 corresponds arithmetically to ``tp/(tp+fn)``. In the special case

53 where ``tp+fn == 0``, this function returns zero for recall.

55 specificity : float

56 S, AKA selectivity or true negative rate (TNR). It

57 corresponds arithmetically to ``tn/(tn+fp)``. In the special case

58 where ``tn+fp == 0``, this function returns zero for specificity.

60 accuracy : float

61 A, see `Accuracy

62 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is

63 the proportion of correct predictions (both true positives and true

64 negatives) among the total number of pixels examined. It corresponds

65 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes

66 both true-negatives and positives in the numerator, what makes it

67 sensitive to data or regions without annotations.

69 jaccard : float

70 J, see `Jaccard Index or Similarity

71 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds

72 arithmetically to ``tp/(tp+fp+fn)``. In the special case where

73 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index.

74 The Jaccard index depends on a TP-only numerator, similarly to the F1

75 score. For regions where there are no annotations, the Jaccard index

76 will always be zero, irrespective of the model output. Accuracy may be

77 a better proxy if one needs to consider the true abscence of

78 annotations in a region as part of the measure.

80 f1_score : float

81 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It

82 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``.

83 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function

84 returns zero for the Jaccard index. The F1 or Dice score depends on a

85 TP-only numerator, similarly to the Jaccard index. For regions where

86 there are no annotations, the F1-score will always be zero,

87 irrespective of the model output. Accuracy may be a better proxy if

88 one needs to consider the true abscence of annotations in a region as

89 part of the measure.

91 """

93 return (

94 tricky_division(tp, tp + fp), # precision

95 tricky_division(tp, tp + fn), # recall

96 tricky_division(tn, fp + tn), # specificity

97 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy

98 tricky_division(tp, tp + fp + fn), # jaccard index

99 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score

100 )

101

102

103def beta_credible_region(k, i, lambda_, coverage):

104 """

105 Returns the mode, upper and lower bounds of the equal-tailed credible

106 region of a probability estimate following Bernoulli trials.

107

108 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k`

109 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued

110 from a series of Bernoulli trials (likelihood is binomial). The posterior

111 is derivated using the Bayes Theorem with a beta prior. As there is no

112 reason to favour high vs. low precision, we use a symmetric Beta prior

113 (:math:`\\alpha=\\beta`):

114

115 .. math::

116

117 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\

118 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\

119 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\

120 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1}

121

122 The mode for this posterior (also the maximum a posteriori) is:

123

124 .. math::

125

126 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2}

127

128 Concretely, the prior may be flat (all rates are equally likely,

129 :math:`\\lambda=1`) or we may use Jeoffrey's prior

130 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation.

131 Jeffrey's prior indicate that rates close to zero or one are more likely.

132

133 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is

134 usually the case for a resonably well tunned system, with more than a few

135 samples for analysis. In the limit of the system performance, :math:`k`

136 may be 0, which will make the mode become zero.

137

138 For our purposes, it may be more suitable to represent :math:`n = k + l`,

139 with :math:`k`, the number of successes and :math:`l`, the number of

140 failures in the binomial experiment, and find this more suitable

141 representation:

142

143 .. math::

144

145 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\

146 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2}

147

148 This can be mapped to most rates calculated in the context of binary

149 classification this way:

150

151 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP

152 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN

153 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP

154 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN

155 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN

156 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN

157

158 Contrary to frequentist approaches, in which one can only

159 say that if the test were repeated an infinite number of times,

160 and one constructed a confidence interval each time, then X%

161 of the confidence intervals would contain the true rate, here

162 we can say that given our observed data, there is a X% probability

163 that the true value of :math:`k/n` falls within the provided

164 interval.

165

166

167 .. note::

168

169 For a disambiguation with Confidence Interval, read

170 https://en.wikipedia.org/wiki/Credible_interval.

171

172

173 Parameters

174 ==========

175

176 k : int

177 Number of successes observed on the experiment

178

179 i : int

180 Number of failures observed on the experiment

181

182 lambda__ : :py:class:`float`, Optional

183 The parameterisation of the Beta prior to consider. Use

184 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for

185 Jeffrey's prior (the default).

186

187 coverage : :py:class:`float`, Optional

188 A floating-point number between 0 and 1.0 indicating the

189 coverage you're expecting. A value of 0.95 will ensure 95%

190 of the area under the probability density of the posterior

191 is covered by the returned equal-tailed interval.

192

193

194 Returns

195 =======

196

197 mean : float

198 The mean of the posterior distribution

199

200 mode : float

201 The mode of the posterior distributA questão do volume eion

202

203 lower, upper: float

204 The lower and upper bounds of the credible region

205

206 """

207

208 # we return the equally-tailed range

209 right = (1.0 - coverage) / 2 # half-width in each side

210 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right)

211 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right)

212

213 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution)

214 alpha = k + lambda_

215 beta = i + lambda_

216

217 E = alpha / (alpha + beta)

218

219 # the mode of a beta distribution is a bit tricky

220 if alpha > 1 and beta > 1:

221 mode = (alpha - 1) / (alpha + beta - 2)

222 elif alpha == 1 and beta == 1:

223 # In the case of precision, if the threshold is close to 1.0, both TP

224 # and FP can be zero, which may cause this condition to be reached, if

225 # the prior is exactly 1 (flat prior). This is a weird situation,

226 # because effectively we are trying to compute the posterior when the

227 # total number of experiments is zero. So, only the prior counts - but

228 # the prior is flat, so we should just pick a value. We choose the

229 # middle of the range.

230 mode = 0.0 # any value would do, we just pick this one

231 elif alpha <= 1 and beta > 1:

232 mode = 0.0

233 elif alpha > 1 and beta <= 1:

234 mode = 1.0

235 else: # elif alpha < 1 and beta < 1:

236 # in the case of precision, if the threshold is close to 1.0, both TP

237 # and FP can be zero, which may cause this condition to be reached, if

238 # the prior is smaller than 1. This is a weird situation, because

239 # effectively we are trying to compute the posterior when the total

240 # number of experiments is zero. So, only the prior counts - but the

241 # prior is bimodal, so we should just pick a value. We choose the

242 # left of the range.

243 mode = 0.0 # could also be 1.0 as the prior is bimodal

244

245 return E, mode, lower, upper

246

247

248def bayesian_measures(tp, fp, tn, fn, lambda_, coverage):

249 """Calculates mean and mode from true/false positive and negative counts

250 with credible regions

251

252 This function can return bayesian estimates of standard machine learning

253 measures from true and false positive counts of positives and negatives.

254 For a thorough look into these and alternate names for the returned values,

255 please check Wikipedia's entry on `Precision and Recall

256 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See

257 :py:func:`beta_credible_region` for details on the calculation of returned

258 values.

259

260

261 Parameters

262 ----------

263

264 tp : int

265 True positive count, AKA "hit"

266

267 fp : int

268 False positive count, AKA "false alarm", or "Type I error"

269

270 tn : int

271 True negative count, AKA "correct rejection"

272

273 fn : int

274 False Negative count, AKA "miss", or "Type II error"

275

276 lambda_ : float

277 The parameterisation of the Beta prior to consider. Use

278 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for

279 Jeffrey's prior.

280

281 coverage : float

282 A floating-point number between 0 and 1.0 indicating the

283 coverage you're expecting. A value of 0.95 will ensure 95%

284 of the area under the probability density of the posterior

285 is covered by the returned equal-tailed interval.

286

287

288

289 Returns

290 -------

291

292 precision : (float, float, float, float)

293 P, AKA positive predictive value (PPV), mean, mode and credible

294 intervals (95% CI). It corresponds arithmetically

295 to ``tp/(tp+fp)``.

296

297 recall : (float, float, float, float)

298 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode

299 and credible intervals (95% CI). It corresponds arithmetically to

300 ``tp/(tp+fn)``.

301

302 specificity : (float, float, float, float)

303 S, AKA selectivity or true negative rate (TNR), mean, mode and credible

304 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``.

305

306 accuracy : (float, float, float, float)

307 A, mean, mode and credible intervals (95% CI). See `Accuracy

308 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is

309 the proportion of correct predictions (both true positives and true

310 negatives) among the total number of pixels examined. It corresponds

311 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes

312 both true-negatives and positives in the numerator, what makes it

313 sensitive to data or regions without annotations.

314

315 jaccard : (float, float, float, float)

316 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or

317 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It

318 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index

319 depends on a TP-only numerator, similarly to the F1 score. For regions

320 where there are no annotations, the Jaccard index will always be zero,

321 irrespective of the model output. Accuracy may be a better proxy if

322 one needs to consider the true abscence of annotations in a region as

323 part of the measure.

324

325 f1_score : (float, float, float, float)

326 F1, mean, mode and credible intervals (95% CI). See `F1-score

327 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds

328 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or

329 Dice score depends on a TP-only numerator, similarly to the Jaccard

330 index. For regions where there are no annotations, the F1-score will

331 always be zero, irrespective of the model output. Accuracy may be a

332 better proxy if one needs to consider the true abscence of annotations

333 in a region as part of the measure.

334 """

335

336 return (

337 beta_credible_region(tp, fp, lambda_, coverage), # precision

338 beta_credible_region(tp, fn, lambda_, coverage), # recall

339 beta_credible_region(tn, fp, lambda_, coverage), # specificity

340 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy

341 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index

342 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score

343 )

344

345

346def auc(x, y):

347 """Calculates the area under the precision-recall curve (AUC)

348

349 This function requires a minimum of 2 points and will use the trapezoidal

350 method to calculate the area under a curve bound between ``[0.0, 1.0]``.

351 It interpolates missing points if required. The input ``x`` should be

352 continuously increasing or decreasing.

353

354

355 Parameters

356 ----------

357

358 x : numpy.ndarray

359 A 1D numpy array containing continuously increasing or decreasing

360 values for the X coordinate.

361

362 y : numpy.ndarray

363 A 1D numpy array containing the Y coordinates of the X values provided

364 in ``x``.

365

366 """

367

368 x = numpy.array(x)

369 y = numpy.array(y)

370

371 assert len(x) == len(y), "x and y sequences must have the same length"

372

373 dx = numpy.diff(x)

374 if numpy.any(dx < 0):

375 if numpy.all(dx <= 0):

376 # invert direction

377 x = x[::-1]

378 y = y[::-1]

379 else:

380 raise ValueError(

381 "x is neither increasing nor decreasing " ": {}.".format(x)

382 )

383

384 y_interp = numpy.interp(

385 numpy.arange(0, 1, 0.001),

386 numpy.array(x),

387 numpy.array(y),

388 left=1.0,

389 right=0.0,

390 )

391 return y_interp.sum() * 0.001

392

393

394def get_intersection(pred_box, gt_box, multiplier):

395 """Calculate intersection of boxes.

396

397 Parameters

398 ----------

399 pred_box : torch.Tensor

400 A 1D numpy array containing predicted box coords.

401

402 gt_box : torch.Tensor

403 A 1D numpy array containing groud truth box coords.

404

405 multiplier: float

406 A number to increase the predicted bounding box by.

407 """

408 x1t, y1t, x2t, y2t = gt_box.numpy()

409 x1, y1, x2, y2 = pred_box.numpy()

410

411 m = numpy.sqrt(multiplier)

412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0

413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0

414 x1 = max(x1 - d_x, 0)

415 x2 = x2 + d_x

416 y1 = max(y1 - d_y, 0)

417 y2 = y2 + d_y

418

419 gt_tensor = gt_box.detach().clone().unsqueeze(0)

420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0)

421

422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2]

423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2]

424

425 wh = (rb - lt).clamp(min=0) # [N,M,2]

426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]

427

428 gt_area = (x2t - x1t) * (y2t - y1t)

429

430 if gt_area > 0:

431 return inter.item() / gt_area

432

433 else:

434 return 0