Coverage for src/deepdraw/utils/measure.py: 67%

3# SPDX-License-Identifier: GPL-3.0-or-later

5import numpy

6import scipy.special

7import torch

10def tricky_division(n, d):

11 """Divides n by d.

13 Returns 0.0 in case of a division by zero

14 """

16 return n / (d + (d == 0))

19def base_measures(tp, fp, tn, fn):

20 """Calculates frequentist measures from true/false positive and negative

21 counts.

23 This function can return (frequentist versions of) standard machine

24 learning measures from true and false positive counts of positives and

25 negatives. For a thorough look into these and alternate names for the

26 returned values, please check Wikipedia's entry on `Precision and Recall

27 <https://en.wikipedia.org/wiki/Precision_and_recall>`_.

30 Parameters

31 ----------

33 tp : int

34 True positive count, AKA "hit"

36 fp : int

37 False positive count, AKA "false alarm", or "Type I error"

39 tn : int

40 True negative count, AKA "correct rejection"

42 fn : int

43 False Negative count, AKA "miss", or "Type II error"

46 Returns

47 -------

49 precision : float

50 P, AKA positive predictive value (PPV). It corresponds arithmetically

51 to ``tp/(tp+fp)``. In the case ``tp+fp == 0``, this function returns

52 zero for precision.

54 recall : float

55 R, AKA sensitivity, hit rate, or true positive rate (TPR). It

56 corresponds arithmetically to ``tp/(tp+fn)``. In the special case

57 where ``tp+fn == 0``, this function returns zero for recall.

59 specificity : float

60 S, AKA selectivity or true negative rate (TNR). It

61 corresponds arithmetically to ``tn/(tn+fp)``. In the special case

62 where ``tn+fp == 0``, this function returns zero for specificity.

64 accuracy : float

65 A, see `Accuracy

66 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is

67 the proportion of correct predictions (both true positives and true

68 negatives) among the total number of pixels examined. It corresponds

69 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes

70 both true-negatives and positives in the numerator, what makes it

71 sensitive to data or regions without annotations.

73 jaccard : float

74 J, see `Jaccard Index or Similarity

75 <https://en.wikipedia.org/wiki/Jaccard_index>`_. It corresponds

76 arithmetically to ``tp/(tp+fp+fn)``. In the special case where

77 ``tn+fp+fn == 0``, this function returns zero for the Jaccard index.

78 The Jaccard index depends on a TP-only numerator, similarly to the F1

79 score. For regions where there are no annotations, the Jaccard index

80 will always be zero, irrespective of the model output. Accuracy may be

81 a better proxy if one needs to consider the true abscence of

82 annotations in a region as part of the measure.

84 f1_score : float

85 F1, see `F1-score <https://en.wikipedia.org/wiki/F1_score>`_. It

86 corresponds arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``.

87 In the special case where ``P+R == (2*tp+fp+fn) == 0``, this function

88 returns zero for the Jaccard index. The F1 or Dice score depends on a

89 TP-only numerator, similarly to the Jaccard index. For regions where

90 there are no annotations, the F1-score will always be zero,

91 irrespective of the model output. Accuracy may be a better proxy if

92 one needs to consider the true abscence of annotations in a region as

93 part of the measure.

94 """

96 return (

97 tricky_division(tp, tp + fp), # precision

98 tricky_division(tp, tp + fn), # recall

99 tricky_division(tn, fp + tn), # specificity

100 tricky_division(tp + tn, tp + fp + fn + tn), # accuracy

101 tricky_division(tp, tp + fp + fn), # jaccard index

102 tricky_division(2 * tp, (2 * tp) + fp + fn), # f1-score

103 )

104

105

106def beta_credible_region(k, i, lambda_, coverage):

107 """Returns the mode, upper and lower bounds of the equal-tailed credible

108 region of a probability estimate following Bernoulli trials.

109

110 This implemetnation is based on [GOUTTE-2005]_. It assumes :math:`k`

111 successes and :math:`l` failures (:math:`n = k+l` total trials) are issued

112 from a series of Bernoulli trials (likelihood is binomial). The posterior

113 is derivated using the Bayes Theorem with a beta prior. As there is no

114 reason to favour high vs. low precision, we use a symmetric Beta prior

115 (:math:`\\alpha=\\beta`):

116

117 .. math::

118

119 P(p|k,n) &= \\frac{P(k,n|p)P(p)}{P(k,n)} \\\\

120 P(p|k,n) &= \\frac{\\frac{n!}{k!(n-k)!}p^{k}(1-p)^{n-k}P(p)}{P(k)} \\\\

121 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\beta)}p^{k+\\alpha-1}(1-p)^{n-k+\\beta-1} \\\\

122 P(p|k,n) &= \\frac{1}{B(k+\\alpha, n-k+\\alpha)}p^{k+\\alpha-1}(1-p)^{n-k+\\alpha-1}

123

124 The mode for this posterior (also the maximum a posteriori) is:

125

126 .. math::

127

128 \\text{mode}(p) = \\frac{k+\\lambda-1}{n+2\\lambda-2}

129

130 Concretely, the prior may be flat (all rates are equally likely,

131 :math:`\\lambda=1`) or we may use Jeoffrey's prior

132 (:math:`\\lambda=0.5`), that is invariant through re-parameterisation.

133 Jeffrey's prior indicate that rates close to zero or one are more likely.

134

135 The mode above works if :math:`k+{\\alpha},n-k+{\\alpha} > 1`, which is

136 usually the case for a resonably well tunned system, with more than a few

137 samples for analysis. In the limit of the system performance, :math:`k`

138 may be 0, which will make the mode become zero.

139

140 For our purposes, it may be more suitable to represent :math:`n = k + l`,

141 with :math:`k`, the number of successes and :math:`l`, the number of

142 failures in the binomial experiment, and find this more suitable

143 representation:

144

145 .. math::

146

147 P(p|k,l) &= \\frac{1}{B(k+\\alpha, l+\\alpha)}p^{k+\\alpha-1}(1-p)^{l+\\alpha-1} \\\\

148 \\text{mode}(p) &= \\frac{k+\\lambda-1}{k+l+2\\lambda-2}

149

150 This can be mapped to most rates calculated in the context of binary

151 classification this way:

152

153 * Precision or Positive-Predictive Value (PPV): p = TP/(TP+FP), so k=TP, l=FP

154 * Recall, Sensitivity, or True Positive Rate: r = TP/(TP+FN), so k=TP, l=FN

155 * Specificity or True Negative Rage: s = TN/(TN+FP), so k=TN, l=FP

156 * F1-score: f1 = 2TP/(2TP+FP+FN), so k=2TP, l=FP+FN

157 * Accuracy: acc = TP+TN/(TP+TN+FP+FN), so k=TP+TN, l=FP+FN

158 * Jaccard: j = TP/(TP+FP+FN), so k=TP, l=FP+FN

159

160 Contrary to frequentist approaches, in which one can only

161 say that if the test were repeated an infinite number of times,

162 and one constructed a confidence interval each time, then X%

163 of the confidence intervals would contain the true rate, here

164 we can say that given our observed data, there is a X% probability

165 that the true value of :math:`k/n` falls within the provided

166 interval.

167

168

169 .. note::

170

171 For a disambiguation with Confidence Interval, read

172 https://en.wikipedia.org/wiki/Credible_interval.

173

174

175 Parameters

176 ==========

177

178 k : int

179 Number of successes observed on the experiment

180

181 i : int

182 Number of failures observed on the experiment

183

184 lambda__ : :py:class:`float`, Optional

185 The parameterisation of the Beta prior to consider. Use

186 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for

187 Jeffrey's prior (the default).

188

189 coverage : :py:class:`float`, Optional

190 A floating-point number between 0 and 1.0 indicating the

191 coverage you're expecting. A value of 0.95 will ensure 95%

192 of the area under the probability density of the posterior

193 is covered by the returned equal-tailed interval.

194

195

196 Returns

197 =======

198

199 mean : float

200 The mean of the posterior distribution

201

202 mode : float

203 The mode of the posterior distributA questão do volume eion

204

205 lower, upper: float

206 The lower and upper bounds of the credible region

207 """

208

209 # we return the equally-tailed range

210 right = (1.0 - coverage) / 2 # half-width in each side

211 lower = scipy.special.betaincinv(k + lambda_, i + lambda_, right)

212 upper = scipy.special.betaincinv(k + lambda_, i + lambda_, 1.0 - right)

213

214 # evaluate mean and mode (https://en.wikipedia.org/wiki/Beta_distribution)

215 alpha = k + lambda_

216 beta = i + lambda_

217

218 E = alpha / (alpha + beta)

219

220 # the mode of a beta distribution is a bit tricky

221 if alpha > 1 and beta > 1:

222 mode = (alpha - 1) / (alpha + beta - 2)

223 elif alpha == 1 and beta == 1:

224 # In the case of precision, if the threshold is close to 1.0, both TP

225 # and FP can be zero, which may cause this condition to be reached, if

226 # the prior is exactly 1 (flat prior). This is a weird situation,

227 # because effectively we are trying to compute the posterior when the

228 # total number of experiments is zero. So, only the prior counts - but

229 # the prior is flat, so we should just pick a value. We choose the

230 # middle of the range.

231 mode = 0.0 # any value would do, we just pick this one

232 elif alpha <= 1 and beta > 1:

233 mode = 0.0

234 elif alpha > 1 and beta <= 1:

235 mode = 1.0

236 else: # elif alpha < 1 and beta < 1:

237 # in the case of precision, if the threshold is close to 1.0, both TP

238 # and FP can be zero, which may cause this condition to be reached, if

239 # the prior is smaller than 1. This is a weird situation, because

240 # effectively we are trying to compute the posterior when the total

241 # number of experiments is zero. So, only the prior counts - but the

242 # prior is bimodal, so we should just pick a value. We choose the

243 # left of the range.

244 mode = 0.0 # could also be 1.0 as the prior is bimodal

245

246 return E, mode, lower, upper

247

248

249def bayesian_measures(tp, fp, tn, fn, lambda_, coverage):

250 """Calculates mean and mode from true/false positive and negative counts

251 with credible regions.

252

253 This function can return bayesian estimates of standard machine learning

254 measures from true and false positive counts of positives and negatives.

255 For a thorough look into these and alternate names for the returned values,

256 please check Wikipedia's entry on `Precision and Recall

257 <https://en.wikipedia.org/wiki/Precision_and_recall>`_. See

258 :py:func:`beta_credible_region` for details on the calculation of returned

259 values.

260

261

262 Parameters

263 ----------

264

265 tp : int

266 True positive count, AKA "hit"

267

268 fp : int

269 False positive count, AKA "false alarm", or "Type I error"

270

271 tn : int

272 True negative count, AKA "correct rejection"

273

274 fn : int

275 False Negative count, AKA "miss", or "Type II error"

276

277 lambda_ : float

278 The parameterisation of the Beta prior to consider. Use

279 :math:`\\lambda=1` for a flat prior. Use :math:`\\lambda=0.5` for

280 Jeffrey's prior.

281

282 coverage : float

283 A floating-point number between 0 and 1.0 indicating the

284 coverage you're expecting. A value of 0.95 will ensure 95%

285 of the area under the probability density of the posterior

286 is covered by the returned equal-tailed interval.

287

288

289

290 Returns

291 -------

292

293 precision : (float, float, float, float)

294 P, AKA positive predictive value (PPV), mean, mode and credible

295 intervals (95% CI). It corresponds arithmetically

296 to ``tp/(tp+fp)``.

297

298 recall : (float, float, float, float)

299 R, AKA sensitivity, hit rate, or true positive rate (TPR), mean, mode

300 and credible intervals (95% CI). It corresponds arithmetically to

301 ``tp/(tp+fn)``.

302

303 specificity : (float, float, float, float)

304 S, AKA selectivity or true negative rate (TNR), mean, mode and credible

305 intervals (95% CI). It corresponds arithmetically to ``tn/(tn+fp)``.

306

307 accuracy : (float, float, float, float)

308 A, mean, mode and credible intervals (95% CI). See `Accuracy

309 <https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers>`_. is

310 the proportion of correct predictions (both true positives and true

311 negatives) among the total number of pixels examined. It corresponds

312 arithmetically to ``(tp+tn)/(tp+tn+fp+fn)``. This measure includes

313 both true-negatives and positives in the numerator, what makes it

314 sensitive to data or regions without annotations.

315

316 jaccard : (float, float, float, float)

317 J, mean, mode and credible intervals (95% CI). See `Jaccard Index or

318 Similarity <https://en.wikipedia.org/wiki/Jaccard_index>`_. It

319 corresponds arithmetically to ``tp/(tp+fp+fn)``. The Jaccard index

320 depends on a TP-only numerator, similarly to the F1 score. For regions

321 where there are no annotations, the Jaccard index will always be zero,

322 irrespective of the model output. Accuracy may be a better proxy if

323 one needs to consider the true abscence of annotations in a region as

324 part of the measure.

325

326 f1_score : (float, float, float, float)

327 F1, mean, mode and credible intervals (95% CI). See `F1-score

328 <https://en.wikipedia.org/wiki/F1_score>`_. It corresponds

329 arithmetically to ``2*P*R/(P+R)`` or ``2*tp/(2*tp+fp+fn)``. The F1 or

330 Dice score depends on a TP-only numerator, similarly to the Jaccard

331 index. For regions where there are no annotations, the F1-score will

332 always be zero, irrespective of the model output. Accuracy may be a

333 better proxy if one needs to consider the true abscence of annotations

334 in a region as part of the measure.

335 """

336

337 return (

338 beta_credible_region(tp, fp, lambda_, coverage), # precision

339 beta_credible_region(tp, fn, lambda_, coverage), # recall

340 beta_credible_region(tn, fp, lambda_, coverage), # specificity

341 beta_credible_region(tp + tn, fp + fn, lambda_, coverage), # accuracy

342 beta_credible_region(tp, fp + fn, lambda_, coverage), # jaccard index

343 beta_credible_region(2 * tp, fp + fn, lambda_, coverage), # f1-score

344 )

345

346

347def auc(x, y):

348 """Calculates the area under the precision-recall curve (AUC)

349

350 This function requires a minimum of 2 points and will use the trapezoidal

351 method to calculate the area under a curve bound between ``[0.0, 1.0]``.

352 It interpolates missing points if required. The input ``x`` should be

353 continuously increasing or decreasing.

354

355

356 Parameters

357 ----------

358

359 x : numpy.ndarray

360 A 1D numpy array containing continuously increasing or decreasing

361 values for the X coordinate.

362

363 y : numpy.ndarray

364 A 1D numpy array containing the Y coordinates of the X values provided

365 in ``x``.

366 """

367

368 x = numpy.array(x)

369 y = numpy.array(y)

370

371 assert len(x) == len(y), "x and y sequences must have the same length"

372

373 dx = numpy.diff(x)

374 if numpy.any(dx < 0):

375 if numpy.all(dx <= 0):

376 # invert direction

377 x = x[::-1]

378 y = y[::-1]

379 else:

380 raise ValueError(

381 "x is neither increasing nor decreasing " ": {}.".format(x)

382 )

383

384 y_interp = numpy.interp(

385 numpy.arange(0, 1, 0.001),

386 numpy.array(x),

387 numpy.array(y),

388 left=1.0,

389 right=0.0,

390 )

391 return y_interp.sum() * 0.001

392

393

394def get_intersection(pred_box, gt_box, multiplier):

395 """Calculate intersection of boxes.

396

397 Parameters

398 ----------

399 pred_box : torch.Tensor

400 A 1D numpy array containing predicted box coords.

401

402 gt_box : torch.Tensor

403 A 1D numpy array containing groud truth box coords.

404

405 multiplier: float

406 A number to increase the predicted bounding box by.

407 """

408 x1t, y1t, x2t, y2t = gt_box.numpy()

409 x1, y1, x2, y2 = pred_box.numpy()

410

411 m = numpy.sqrt(multiplier)

412 d_x = ((m * (x2 - x1)) - (x2 - x1)) / 2.0

413 d_y = ((m * (y2 - y1)) - (y2 - y1)) / 2.0

414 x1 = max(x1 - d_x, 0)

415 x2 = x2 + d_x

416 y1 = max(y1 - d_y, 0)

417 y2 = y2 + d_y

418

419 gt_tensor = gt_box.detach().clone().unsqueeze(0)

420 pred_tensor = torch.tensor([x1, y1, x2, y2]).unsqueeze(0)

421

422 lt = torch.max(pred_tensor[:, None, :2], gt_tensor[:, :2]) # [N,M,2]

423 rb = torch.min(pred_tensor[:, None, 2:], gt_tensor[:, 2:]) # [N,M,2]

424

425 wh = (rb - lt).clamp(min=0) # [N,M,2]

426 inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]

427

428 gt_area = (x2t - x1t) * (y2t - y1t)

429

430 if gt_area > 0:

431 return inter.item() / gt_area

432

433 else:

434 return 0