Coverage for src/bob/measure/__init__.py: 73%

112 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 21:23 +0100

1# import Libraries of other lib packages 

2import numpy 

3 

4from . import calibration, load, plot # noqa: F401 

5from ._library import * # noqa: F401, F403 

6from ._library import eer_threshold, farfrr, logger, roc 

7 

8 

9def fprfnr(negatives, positives, threshold): 

10 """Alias for :py:func:`bob.measure.farfrr`""" 

11 return farfrr(negatives, positives, threshold) 

12 

13 

14def mse(estimation, target): 

15 r"""Mean square error between a set of outputs and target values 

16 

17 Uses the formula: 

18 

19 .. math:: 

20 

21 MSE(\hat{\Theta}) = E[(\hat{\Theta} - \Theta)^2] 

22 

23 Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to 

24 have 2 dimensions. Different examples are organized as rows while different 

25 features in the estimated values or targets are organized as different 

26 columns. 

27 

28 

29 Parameters: 

30 

31 estimation (array): an N-dimensional array that corresponds to the value 

32 estimated by your procedure 

33 

34 target (array): an N-dimensional array that corresponds to the expected 

35 value 

36 

37 

38 Returns: 

39 

40 float: The average of the squared error between the estimated value and the 

41 target 

42 

43 """ 

44 return numpy.mean((estimation - target) ** 2, 0) 

45 

46 

47def rmse(estimation, target): 

48 r"""Calculates the root mean square error between a set of outputs and target 

49 

50 Uses the formula: 

51 

52 .. math:: 

53 

54 RMSE(\hat{\Theta}) = \sqrt(E[(\hat{\Theta} - \Theta)^2]) 

55 

56 Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to 

57 have 2 dimensions. Different examples are organized as rows while different 

58 features in the estimated values or targets are organized as different 

59 columns. 

60 

61 

62 Parameters: 

63 

64 estimation (array): an N-dimensional array that corresponds to the value 

65 estimated by your procedure 

66 

67 target (array): an N-dimensional array that corresponds to the expected 

68 value 

69 

70 

71 Returns: 

72 

73 float: The square-root of the average of the squared error between the 

74 estimated value and the target 

75 

76 """ 

77 return numpy.sqrt(mse(estimation, target)) 

78 

79 

80def relevance(input, machine): 

81 """Calculates the relevance of every input feature to the estimation process 

82 

83 Uses the formula: 

84 

85 Neural Triggering System Operating on High Resolution Calorimetry 

86 Information, Anjos et al, April 2006, Nuclear Instruments and Methods in 

87 Physics Research, volume 559, pages 134-138 

88 

89 .. math:: 

90 

91 R(x_{i}) = |E[(o(x) - o(x|x_{i}=E[x_{i}]))^2]| 

92 

93 In other words, the relevance of a certain input feature **i** is the change 

94 on the machine output value when such feature is replaced by its mean for all 

95 input vectors. For this to work, the `input` parameter has to be a 2D array 

96 with features arranged column-wise while different examples are arranged 

97 row-wise. 

98 

99 

100 Parameters: 

101 

102 input (array): an N-dimensional array that corresponds to the value 

103 estimated by your model 

104 

105 machine (object): A machine that can be called to "process" your input 

106 

107 

108 Returns: 

109 

110 array: An 1D float array as large as the number of columns (second 

111 dimension) of your input array, estimating the "relevance" of each input 

112 column (or feature) to the score provided by the machine. 

113 

114 """ 

115 

116 o = machine(input) 

117 i2 = input.copy() 

118 retval = numpy.ndarray((input.shape[1],), "float64") 

119 retval.fill(0) 

120 for k in range(input.shape[1]): 

121 i2[:, :] = input # reset 

122 i2[:, k] = numpy.mean(input[:, k]) 

123 retval[k] = (mse(machine(i2), o).sum()) ** 0.5 

124 

125 return retval 

126 

127 

128def recognition_rate(cmc_scores, threshold=None, rank=1): 

129 """Calculates the recognition rate from the given input 

130 

131 It is identical to the CMC value for the given ``rank``. 

132 

133 The input has a specific format, which is a list of two-element tuples. Each 

134 of the tuples contains the negative :math:`\\{S_p^-\\}` and the positive 

135 :math:`\\{S_p^+\\}` scores for one probe item :math:`p`, or ``None`` in case 

136 of open set recognition. 

137 

138 If ``threshold`` is set to ``None``, the rank 1 recognition rate is defined 

139 as the number of test items, for which the highest positive 

140 :math:`\\max\\{S_p^+\\}` score is greater than or equal to all negative 

141 scores, divided by the number of all probe items :math:`P`: 

142 

143 .. math:: 

144 

145 \\mathrm{RR} = \\frac{1}{P} \\sum_{p=1}^{P} \\begin{cases} 1 & \\mathrm{if } \\max\\{S_p^+\\} >= \\max\\{S_p^-\\}\\\\ 0 & \\mathrm{otherwise} \\end{cases} 

146 

147 For a given rank :math:`r>1`, up to :math:`r` negative scores that are higher 

148 than the highest positive score are allowed to still count as correctly 

149 classified in the top :math:`r` rank. 

150 

151 If ``threshold`` :math:`\\theta` is given, **all** scores below threshold 

152 will be filtered out. Hence, if all positive scores are below threshold 

153 :math:`\\max\\{S_p^+\\} < \\theta`, the probe will be misclassified **at any 

154 rank**. 

155 

156 For open set recognition, i.e., when there exist a tuple including negative 

157 scores without corresponding positive scores (``None``), and **all** negative 

158 scores are below ``threshold`` :math:`\\max\\{S_p^+\\} < \\theta`, the probe 

159 item is correctly rejected, **and it does not count into the denominator** 

160 :math:`P`. When no ``threshold`` is provided, the open set probes will 

161 **always** count as misclassified, regardless of the ``rank``. 

162 

163 .. warn: 

164 For open set tests, this rate does not correspond to a standard rate. 

165 Please use :py:func:`detection_identification_rate` and 

166 :py:func:`false_alarm_rate` instead. 

167 

168 

169 Parameters: 

170 

171 cmc_scores (:py:class:`list`): A list in the format ``[(negatives, 

172 positives), ...]`` containing the CMC scores (i.e. :py:class:`list`: 

173 A list of tuples, where each tuple contains the 

174 ``negative`` and ``positive`` scores for one probe of the database). 

175 

176 Each pair contains the ``negative`` and the ``positive`` scores for **one 

177 probe item**. Each pair can contain up to one empty array (or ``None``), 

178 i.e., in case of open set recognition. 

179 

180 threshold (:obj:`float`, optional): Decision threshold. If not ``None``, **all** 

181 scores will be filtered by the threshold. In an open set recognition 

182 problem, all open set scores (negatives with no corresponding positive) 

183 for which all scores are below threshold, will be counted as correctly 

184 rejected and **removed** from the probe list (i.e., the denominator). 

185 

186 rank (:obj:`int`, optional): 

187 The rank for which the recognition rate should be computed, 1 by default. 

188 

189 

190 Returns: 

191 

192 float: The (open set) recognition rate for the given rank, a value between 

193 0 and 1. 

194 

195 """ 

196 # If no scores are given, the recognition rate is exactly 0. 

197 if not cmc_scores: 

198 return 0.0 

199 

200 correct = 0 

201 counter = 0 

202 for neg, pos in cmc_scores: 

203 # set all values that are empty before to None 

204 if pos is not None and not numpy.array(pos).size: 

205 pos = None 

206 if neg is not None and not numpy.array(neg).size: 

207 neg = None 

208 

209 if pos is None and neg is None: 

210 raise ValueError( 

211 "One pair of the CMC scores has neither positive nor negative values" 

212 ) 

213 

214 # filter out any negative or positive scores below threshold; scores with exactly the threshold are also filtered out 

215 # now, None and an empty array have different meanings. 

216 if threshold is not None: 

217 if neg is not None: 

218 neg = numpy.array(neg)[neg > threshold] 

219 if pos is not None: 

220 pos = numpy.array(pos)[pos > threshold] 

221 

222 if pos is None: 

223 # no positives, so we definitely do not have a match; 

224 # check if we have negatives above threshold 

225 if not neg.size: 

226 # we have no negative scores over the threshold, so we have correctly rejected the probe 

227 # don't increase any of the two counters... 

228 continue 

229 # we have negatives over threshold, so we have incorrect classifications; independent on the actual rank 

230 counter += 1 

231 else: 

232 # we have a positive, so we need to count the probe 

233 counter += 1 

234 

235 if not numpy.array(pos).size: 

236 # all positive scores have been filtered out by the threshold, we definitely have a mis-match 

237 continue 

238 

239 # get the maximum positive score for the current probe item 

240 # (usually, there is only one positive score, but just in case...) 

241 max_pos = numpy.max(pos) 

242 

243 if neg is None or not numpy.array(neg).size: 

244 # if we had no negatives, or all negatives were below threshold, we have a match at rank 1 

245 correct += 1 

246 else: 

247 # count the number of negative scores that are higher than the best positive score 

248 index = numpy.sum(neg >= max_pos) 

249 if index < rank: 

250 correct += 1 

251 

252 return float(correct) / float(counter) 

253 

254 

255def cmc(cmc_scores): 

256 """Calculates the cumulative match characteristic (CMC) from the given input. 

257 

258 The input has a specific format, which is a list of two-element tuples. Each 

259 of the tuples contains the negative and the positive scores for one probe 

260 item. 

261 

262 For each probe item the probability that the rank :math:`r` of the positive 

263 score is calculated. The rank is computed as the number of negative scores 

264 that are higher than the positive score. If several positive scores for one 

265 test item exist, the **highest** positive score is taken. The CMC finally 

266 computes how many test items have rank r or higher, divided by the total 

267 number of test values. 

268 

269 .. note:: 

270 

271 The CMC is not available for open set classification. Please use the 

272 :py:func:`detection_identification_rate` and :py:func:`false_alarm_rate` 

273 instead. 

274 

275 

276 Parameters 

277 ---------- 

278 

279 cmc_scores : :py:class:`list` 

280 A list in the format ``[(negatives, positives), ...]`` containing the CMC 

281 scores. 

282 

283 Each pair contains the ``negative`` and the ``positive`` scores for **one 

284 probe item**. Each pair can contain up to one empty array (or ``None``), 

285 i.e., in case of open set recognition. 

286 

287 

288 Returns 

289 ------- 

290 

291 1D :py:class:`numpy.ndarray` of `float` 

292 A 1D float array representing the CMC curve. 

293 The rank 1 recognition rate can be found in ``array[0]``, rank 2 rate in 

294 ``array[1]``, and so on. The number of ranks (``array.shape[0]``) is the 

295 number of gallery items. Values are in range ``[0,1]``. 

296 """ 

297 

298 # If no scores are given, we cannot plot anything 

299 probe_count = float(len(cmc_scores)) 

300 if not probe_count: 

301 raise ValueError("The given set of scores is empty") 

302 

303 # compute MC 

304 match_characteristic = numpy.zeros( 

305 (max([len(neg) for neg, _ in cmc_scores if neg is not None]) + 1,), 

306 numpy.int, 

307 ) 

308 

309 for neg, pos in cmc_scores: 

310 if pos is None or not numpy.array(pos).size: 

311 raise ValueError( 

312 "For the CMC computation at least one positive score per pair is necessary." 

313 ) 

314 if neg is None: 

315 neg = [] 

316 

317 # get the maximum positive score for the current probe item 

318 # (usually, there is only one positive score, but just in case...) 

319 max_pos = numpy.max(pos) 

320 

321 # count the number of negative scores that are higher than the best positive score 

322 index = numpy.sum(neg >= max_pos) 

323 match_characteristic[index] += 1 

324 

325 # cumulate 

326 cumulative_match_characteristic = numpy.cumsum( 

327 match_characteristic, dtype=numpy.float64 

328 ) 

329 return cumulative_match_characteristic / probe_count 

330 

331 

332def detection_identification_rate(cmc_scores, threshold, rank=1): 

333 """Computes the `detection and identification rate` for the given threshold. 

334 

335 This value is designed to be used in an open set identification protocol, and 

336 defined in Chapter 14.1 of [LiJain2005]_. 

337 

338 Although the detection and identification rate is designed to be computed on 

339 an open set protocol, it uses only the probe elements, for which a 

340 corresponding gallery element exists. For closed set identification 

341 protocols, this function is identical to :py:func:`recognition_rate`. The 

342 only difference is that for this function, a ``threshold`` for the scores 

343 need to be defined, while for :py:func:`recognition_rate` it is optional. 

344 

345 

346 Parameters: 

347 

348 cmc_scores (:py:class:`list`): A list in the format ``[(negatives, 

349 positives), ...]`` containing the CMC. 

350 

351 Each pair contains the ``negative`` and the ``positive`` scores for **one 

352 probe item**. Each pair can contain up to one empty array (or ``None``), 

353 i.e., in case of open set recognition. 

354 

355 threshold (float): The decision threshold :math:`\\tau``. 

356 

357 rank (:obj:`int`, optional): The rank for which the curve should be plotted 

358 

359 

360 Returns: 

361 

362 float: The detection and identification rate for the given threshold. 

363 

364 """ 

365 

366 # count the correctly classifier probes 

367 correct = 0 

368 counter = 0 

369 for neg, pos in cmc_scores: 

370 if pos is None or not numpy.array(pos).size: 

371 # we only consider probes with corresponding gallery items 

372 continue 

373 # we have an in-gallery probe 

374 counter += 1 

375 # check, if it is correctly classified 

376 if neg is None: 

377 neg = [] 

378 

379 # get the maximum positive score for the current probe item 

380 # (usually, there is only one positive score, but just in case...) 

381 max_pos = numpy.max(pos) 

382 

383 index = numpy.sum( 

384 neg >= max_pos 

385 ) # compute the rank (in fact, rank - 1) 

386 if max_pos >= threshold and index < rank: 

387 correct += 1 

388 

389 if not counter: 

390 logger.warn("No in-gallery probe was found") 

391 return 0.0 

392 

393 return float(correct) / float(counter) 

394 

395 

396def false_alarm_rate(cmc_scores, threshold): 

397 """Computes the `false alarm rate` for the given threshold,. 

398 

399 This value is designed to be used in an open set identification protocol, and 

400 defined in Chapter 14.1 of [LiJain2005]_. 

401 

402 The false alarm rate is designed to be computed on an open set protocol, it 

403 uses only the probe elements, for which **no** corresponding gallery element 

404 exists. 

405 

406 

407 Parameters: 

408 

409 cmc_scores (:py:class:`list`): A list in the format ``[(negatives, 

410 positives), ...]`` containing the CMC scores (i.e. :py:class:`list`: 

411 A list of tuples, where each tuple contains the 

412 ``negative`` and ``positive`` scores for one probe of the database). 

413 

414 Each pair contains the ``negative`` and the ``positive`` scores for **one 

415 probe item**. Each pair can contain up to one empty array (or ``None``), 

416 i.e., in case of open set recognition. 

417 

418 threshold (float): The decision threshold :math:`\\tau``. 

419 

420 

421 Returns: 

422 

423 float: The false alarm rate. 

424 

425 """ 

426 incorrect = 0 

427 counter = 0 

428 for neg, pos in cmc_scores: 

429 # we only consider the out-of-gallery probes, i.e., with no positive scores 

430 if pos is None or not numpy.array(pos).size: 

431 counter += 1 

432 

433 # check if the probe is above threshold 

434 if neg is None or not numpy.array(neg).size: 

435 raise ValueError( 

436 "One pair of the CMC scores has neither positive nor negative values" 

437 ) 

438 if numpy.max(neg) >= threshold: 

439 incorrect += 1 

440 

441 if not counter: 

442 logger.warn("No out-of-gallery probe was found") 

443 return 0.0 

444 

445 return float(incorrect) / float(counter) 

446 

447 

448def eer(negatives, positives, is_sorted=False, also_farfrr=False): 

449 """Calculates the Equal Error Rate (EER). 

450 

451 Please note that it is possible that eer != fpr != fnr. 

452 This function returns (fpr + fnr) / 2 as eer. 

453 If you also need the fpr and fnr values, set ``also_farfrr`` to ``True``. 

454 

455 Parameters 

456 ---------- 

457 negatives : ``array_like (1D, float)`` 

458 The scores for comparisons of objects of different classes. 

459 positives : ``array_like (1D, float)`` 

460 The scores for comparisons of objects of the same class. 

461 is_sorted : bool 

462 Are both sets of scores already in ascendantly sorted order? 

463 also_farfrr : bool 

464 If True, it will also return far and frr. 

465 

466 Returns 

467 ------- 

468 eer : float 

469 The Equal Error Rate (EER). 

470 fpr : float 

471 The False Positive Rate (FPR). Returned only when ``also_farfrr`` is 

472 ``True``. 

473 fnr : float 

474 The False Negative Rate (FNR). Returned only when ``also_farfrr`` is 

475 ``True``. 

476 """ 

477 threshold = eer_threshold(negatives, positives, is_sorted) 

478 far, frr = farfrr(negatives, positives, threshold) 

479 if also_farfrr: 

480 return (far + frr) / 2.0, far, frr 

481 return (far + frr) / 2.0 

482 

483 

484def roc_auc_score( 

485 negatives, positives, npoints=2000, min_far=-8, log_scale=False 

486): 

487 """Area Under the ROC Curve. 

488 Computes the area under the ROC curve. This is useful when you want to report one 

489 number that represents an ROC curve. This implementation uses the trapezoidal rule for 

490 the integration of the ROC curve. For more information, see: 

491 https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve 

492 

493 

494 Parameters 

495 ---------- 

496 negatives : array_like 

497 The negative scores. 

498 positives : array_like 

499 The positive scores. 

500 npoints : int, optional 

501 Number of points in the ROC curve. Higher numbers leads to more accurate ROC. 

502 min_far : float, optional 

503 Min FAR and FRR values to consider when calculating ROC. 

504 log_scale : bool, optional 

505 If True, converts the x axis (FPR) to log10 scale before calculating AUC. This is 

506 useful in cases where len(negatives) >> len(positives) 

507 

508 Returns 

509 ------- 

510 float 

511 The ROC AUC. If ``log_scale`` is False, the value should be between 0 and 1. 

512 """ 

513 fpr, fnr = roc(negatives, positives, npoints, min_far=min_far) 

514 tpr = 1 - fnr 

515 

516 if log_scale: 

517 fpr_pos = fpr > 0 

518 fpr, tpr = fpr[fpr_pos], tpr[fpr_pos] 

519 fpr = numpy.log10(fpr) 

520 

521 area = -1 * numpy.trapz(tpr, fpr) 

522 return area