Coverage for src/bob/bio/base/script/error_utils.py: 72%

108 statements  

« prev     ^ index     » next       coverage.py v7.6.5, created at 2024-11-14 21:41 +0100

1#!/usr/bin/env python 

2# Ivana Chingovska <ivana.chingovska@idiap.ch> 

3# Fri Dec 7 12:33:37 CET 2012 

4"""Utility functions for computation of EPSC curve and related measurement""" 

5 

6import numpy 

7 

8from bob.measure import farfrr 

9 

10 

11def calc_pass_rate(threshold, attacks): 

12 """Calculates the rate of successful spoofing attacks 

13 

14 Parameters 

15 ---------- 

16 threshold : 

17 the threshold used for classification 

18 scores : 

19 numpy with the scores of the spoofing attacks 

20 

21 Returns 

22 ------- 

23 float 

24 rate of successful spoofing attacks 

25 """ 

26 return (attacks >= threshold).mean() 

27 

28 

29def weighted_neg_error_rate_criteria( 

30 data, weight, thres, beta=0.5, criteria="eer" 

31): 

32 """Given the single value for the weight parameter balancing between 

33 impostors and spoofing attacks and a threshold, calculates the error rates 

34 and their relationship depending on the criteria (difference in case of 

35 'eer', hter in case of 'min-hter' criteria) 

36 Keyword parameters: 

37 

38 - data - the development data used to determine the threshold. List on 4 

39 numpy.arrays containing: negatives (licit), positives (licit), 

40 negatives (spoof), positives (spoof) 

41 - weight - the weight parameter balancing between impostors and spoofing 

42 attacks 

43 - thres - the given threshold 

44 - beta - the weight parameter balancing between real accesses and all the 

45 negative samples (impostors and spoofing attacks). Note that this 

46 parameter will be overridden and not considered if the selected criteria 

47 is 'min-hter'. 

48 - criteria - 'eer', 'wer' or 'min-hter' criteria for decision threshold 

49 """ 

50 

51 licit_neg = data[0] 

52 licit_pos = data[1] 

53 spoof_neg = data[2] 

54 spoof_pos = data[3] # unpacking the data 

55 farfrr_licit = farfrr(licit_neg, licit_pos, thres) 

56 farfrr_spoof = farfrr(spoof_neg, spoof_pos, thres) 

57 

58 frr = farfrr_licit[1] # farfrr_spoof[1] should have the same value 

59 far_i = farfrr_licit[0] 

60 far_s = farfrr_spoof[0] 

61 

62 far_w = (1 - weight) * far_i + weight * far_s 

63 

64 if criteria == "eer": 

65 if beta == 0.5: 

66 return abs(far_w - frr) 

67 else: 

68 # return abs(far_w - frr) 

69 return abs((1 - beta) * frr - beta * far_w) 

70 

71 elif criteria == "min-hter": 

72 return (far_w + frr) / 2 

73 

74 else: 

75 return (1 - beta) * frr + beta * far_w 

76 

77 

78def epsc_weights(licit_neg, licit_pos, spoof_neg, spoof_pos, points=100): 

79 """Returns the weights for EPSC 

80 

81 Keyword arguments: 

82 

83 - points - number of points to calculate EPSC 

84 """ 

85 step_size = 1 / float(points) 

86 weights = numpy.array([(i * step_size) for i in range(points + 1)]) 

87 return weights 

88 

89 

90def recursive_thr_search( 

91 data, span_min, span_max, weight, beta=0.5, criteria="eer" 

92): 

93 """Recursive search for the optimal threshold given a criteria. It 

94 evaluates the full range of thresholds at 100 points, and computes the one 

95 which optimizes the threshold. In the next search iteration, it examines 

96 the region around the point that optimizes the threshold. The procedure 

97 stops when the search range is smaller then 1e-10. 

98 

99 Keyword arguments: 

100 - data - the development data used to determine the threshold. List on 4 

101 numpy.arrays containing: negatives (licit), positives (licit), negatives 

102 (spoof), positives (spoof) 

103 - span_min - the minimum of the search range 

104 - span_max - the maximum of the search range 

105 - weight - the weight parameter balancing between impostors and spoofing 

106 attacks 

107 - beta - the weight parameter balancing between real accesses and all the 

108 negative samples (impostors and spoofing attacks). Note that methods called 

109 within this function will override this parameter and not considered if the 

110 selected criteria is 'min-hter'. 

111 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for 

112 Minimum WER or 'min-hter' for Minimum HTER criteria). 

113 """ 

114 

115 quit_thr = 1e-10 

116 steps = 100 

117 if abs((span_max - span_min) / span_max) < quit_thr: 

118 return span_max # or span_min, it doesn't matter 

119 else: 

120 step_size = (span_max - span_min) / steps 

121 thresholds = numpy.array( 

122 [(i * step_size) + span_min for i in range(steps + 1)] 

123 ) 

124 weighted_error_rates = numpy.array( 

125 [ 

126 weighted_neg_error_rate_criteria( 

127 data, weight, thr, beta, criteria 

128 ) 

129 for thr in thresholds 

130 ] 

131 ) 

132 selected_thres = thresholds[ 

133 numpy.where(weighted_error_rates == min(weighted_error_rates)) 

134 ] # all the thresholds which have minimum weighted error rate 

135 thr = selected_thres[ 

136 int(selected_thres.size / 2) 

137 ] # choose the centrally positioned threshold 

138 return recursive_thr_search( 

139 data, thr - step_size, thr + step_size, weight, beta, criteria 

140 ) 

141 

142 

143def weighted_negatives_threshold( 

144 licit_neg, licit_pos, spoof_neg, spoof_pos, weight, beta=0.5, criteria="eer" 

145): 

146 """Calculates the threshold for achieving the given criteria between the 

147 FAR_w and the FRR, given the single value for the weight parameter 

148 balancing between impostors and spoofing attacks and a single value for the 

149 parameter beta balancing between the real accesses and the negatives 

150 (impostors and spoofing attacks) 

151 

152 Keyword parameters: 

153 - licit_neg - numpy.array of scores for the negatives (licit scenario) 

154 - licit_pos - numpy.array of scores for the positives (licit scenario) 

155 - spoof_neg - numpy.array of scores for the negatives (spoof scenario) 

156 - spoof_pos - numpy.array of scores for the positives (spoof scenario) 

157 - weight - the weight parameter balancing between impostors and spoofing 

158 attacks 

159 - beta - the weight parameter balancing between real accesses and all the 

160 negative samples (impostors and spoofing attacks). Note that methods called 

161 within this function will override this parameter and not considered if the 

162 selected criteria is 'min-hter'. 

163 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for 

164 Minimum WER or 'min-hter' for Minimum HTER criteria). 

165 """ 

166 span_min = min( 

167 numpy.append(licit_neg, spoof_neg) 

168 ) # the min of the span where we will search for the threshold 

169 span_max = max( 

170 numpy.append(licit_pos, spoof_pos) 

171 ) # the max of the span where we will search for the threshold 

172 data = ( 

173 licit_neg, 

174 licit_pos, 

175 spoof_neg, 

176 spoof_pos, 

177 ) # pack the data into a single list 

178 return recursive_thr_search( 

179 data, span_min, span_max, weight, beta, criteria 

180 ) 

181 

182 

183def epsc_thresholds( 

184 licit_neg, 

185 licit_pos, 

186 spoof_neg, 

187 spoof_pos, 

188 points=100, 

189 criteria="eer", 

190 omega=None, 

191 beta=None, 

192): 

193 """Calculates the optimal thresholds for EPSC, for a range of the weight 

194 parameter balancing between impostors and spoofing attacks, and for a range 

195 of the beta parameter balancing between real accesses and all the negatives 

196 (impostors and spoofing attacks) 

197 

198 Keyword arguments: 

199 

200 - licit_neg - numpy.array of scores for the negatives (licit scenario) 

201 - licit_pos - numpy.array of scores for the positives (licit scenario) 

202 - spoof_neg - numpy.array of scores for the negatives (spoof scenario) 

203 - spoof_pos - numpy.array of scores for the positives (spoof scenario) 

204 - points - number of points to calculate EPSC 

205 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter') 

206 - omega - the value of the parameter omega, balancing between impostors and 

207 spoofing attacks. If None, it is going to span the full range [0,1]. 

208 Otherwise, can be set to a fixed value or a list of values. 

209 - beta - the value of the parameter beta, balancing between real accesses 

210 and all the negatives (zero-effort impostors and spoofing attacks). If 

211 None, it is going to span the full range [0,1]. Otherwise, can be set to a 

212 fixed value or a list of values. 

213 

214 """ 

215 step_size = 1 / float(points) 

216 

217 if omega is None: 

218 omega = numpy.array([(i * step_size) for i in range(points + 1)]) 

219 elif ( 

220 not isinstance(omega, list) 

221 and not isinstance(omega, tuple) 

222 and not isinstance(omega, numpy.ndarray) 

223 ): 

224 omega = numpy.array([omega]) 

225 else: 

226 omega = numpy.array(omega) 

227 

228 if beta is None: 

229 beta = numpy.array([(i * step_size) for i in range(points + 1)]) 

230 elif ( 

231 not isinstance(beta, list) 

232 and not isinstance(beta, tuple) 

233 and not isinstance(beta, numpy.ndarray) 

234 ): 

235 beta = numpy.array([beta]) 

236 else: 

237 beta = numpy.array(beta) 

238 

239 thresholds = numpy.ndarray([beta.size, omega.size], "float64") 

240 for bindex, b in enumerate(beta): 

241 thresholds[bindex, :] = numpy.array( 

242 [ 

243 weighted_negatives_threshold( 

244 licit_neg, 

245 licit_pos, 

246 spoof_neg, 

247 spoof_pos, 

248 w, 

249 b, 

250 criteria=criteria, 

251 ) 

252 for w in omega 

253 ], 

254 "float64", 

255 ) 

256 

257 return omega, beta, thresholds 

258 

259 

260def weighted_err(error_1, error_2, weight): 

261 """Calculates the weighted error rate between the two input parameters 

262 

263 Keyword arguments: 

264 - error_1 - the first input error rate (FAR for zero effort impostors 

265 usually) 

266 - error_2 - the second input error rate (SFAR) 

267 - weight - the given weight 

268 """ 

269 return (1 - weight) * error_1 + weight * error_2 

270 

271 

272def error_rates_at_weight( 

273 licit_neg, licit_pos, spoof_neg, spoof_pos, omega, threshold, beta=0.5 

274): 

275 """Calculates several error rates: FRR, FAR (zero-effort impostors), SFAR, 

276 FAR_w, HTER_w for a given value of w. It returns the calculated threshold 

277 as a last argument 

278 

279 Keyword arguments: 

280 

281 - licit_neg - numpy.array of scores for the negatives (licit scenario) 

282 - licit_pos - numpy.array of scores for the positives (licit scenario) 

283 - spoof_neg - numpy.array of scores for the negatives (spoof scenario) 

284 - spoof_pos - numpy.array of scores for the positives (spoof scenario) 

285 - threshold - the given threshold 

286 - omega - the omega parameter balancing between impostors and spoofing 

287 attacks 

288 - beta - the weight parameter balancing between real accesses and all the 

289 

290 negative samples (impostors and spoofing attacks). 

291 """ 

292 

293 farfrr_licit = farfrr( 

294 licit_neg, licit_pos, threshold 

295 ) # calculate test frr @ threshold (licit scenario) 

296 farfrr_spoof = farfrr( 

297 spoof_neg, spoof_pos, threshold 

298 ) # calculate test frr @ threshold (spoof scenario) 

299 

300 # we can take this value from farfrr_spoof as well, it doesn't matter 

301 frr = farfrr_licit[1] 

302 far = farfrr_licit[0] 

303 sfar = farfrr_spoof[0] 

304 

305 far_w = weighted_err(far, sfar, omega) 

306 hter_w = (far_w + frr) / 2 

307 wer_wb = weighted_err(frr, far_w, beta) 

308 

309 return (frr, far, sfar, far_w, wer_wb, hter_w, threshold) 

310 

311 

312def epsc_error_rates( 

313 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta 

314): 

315 """Calculates several error rates: FAR_w and WER_wb for the given weights 

316 (omega and beta) and thresholds (the thresholds need to be computed first 

317 using the method: epsc_thresholds() before passing to this method) 

318 

319 Parameters 

320 ---------- 

321 licit_neg : array_like 

322 array of scores for the negatives (licit scenario) 

323 licit_pos : array_like 

324 array of scores for the positives (licit scenario) 

325 spoof_neg : array_like 

326 array of scores for the negatives (spoof scenario) 

327 spoof_pos : array_like 

328 array of scores for the positives (spoof scenario) 

329 thresholds : array_like 

330 ndarray with threshold values 

331 omega : array_like 

332 array of the omega parameter balancing between impostors 

333 and spoofing attacks 

334 beta : array_like 

335 array of the beta parameter balancing between real accesses 

336 and all negatives (impostors and spoofing attacks) 

337 

338 Returns 

339 ------- 

340 far_w_errors: array_like 

341 FAR_w 

342 wer_wb_errors: array_like 

343 WER_wb 

344 """ 

345 

346 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64") 

347 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64") 

348 

349 for bindex, b in enumerate(beta): 

350 errors = [ 

351 error_rates_at_weight( 

352 licit_neg, 

353 licit_pos, 

354 spoof_neg, 

355 spoof_pos, 

356 w, 

357 thresholds[bindex, windex], 

358 b, 

359 ) 

360 for windex, w in enumerate(omega) 

361 ] 

362 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))] 

363 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))] 

364 

365 return far_w_errors, wer_wb_errors 

366 

367 

368def all_error_rates( 

369 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta 

370): 

371 """Calculates several error rates: FAR_w and WER_wb for the given weights 

372 (omega and beta) and thresholds (the thresholds need to be computed first 

373 using the method: epsc_thresholds() before passing to this method) 

374 

375 Parameters 

376 ---------- 

377 licit_neg : array_like 

378 array of scores for the negatives (licit scenario) 

379 licit_pos : array_like 

380 array of scores for the positives (licit scenario) 

381 spoof_neg : array_like 

382 array of scores for the negatives (spoof scenario) 

383 spoof_pos : array_like 

384 array of scores for the positives (spoof scenario) 

385 thresholds : array_like 

386 ndarray with threshold values 

387 omega : array_like 

388 array of the omega parameter balancing between impostors 

389 and spoofing attacks 

390 beta : array_like 

391 array of the beta parameter balancing between real accesses 

392 and all negatives (impostors and spoofing attacks) 

393 

394 Returns 

395 ------- 

396 far_w_errors: array_like 

397 FAR_w 

398 wer_wb_errors: array_like 

399 WER_wb 

400 """ 

401 

402 frr_errors = numpy.ndarray((beta.size, omega.size), "float64") 

403 far_errors = numpy.ndarray((beta.size, omega.size), "float64") 

404 sfar_errors = numpy.ndarray((beta.size, omega.size), "float64") 

405 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64") 

406 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64") 

407 hter_wb_errors = numpy.ndarray((beta.size, omega.size), "float64") 

408 

409 for bindex, b in enumerate(beta): 

410 errors = [ 

411 error_rates_at_weight( 

412 licit_neg, 

413 licit_pos, 

414 spoof_neg, 

415 spoof_pos, 

416 w, 

417 thresholds[bindex, windex], 

418 b, 

419 ) 

420 for windex, w in enumerate(omega) 

421 ] 

422 frr_errors[bindex, :] = [errors[i][0] for i in range(len(errors))] 

423 far_errors[bindex, :] = [errors[i][1] for i in range(len(errors))] 

424 sfar_errors[bindex, :] = [errors[i][2] for i in range(len(errors))] 

425 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))] 

426 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))] 

427 hter_wb_errors[bindex, :] = [errors[i][5] for i in range(len(errors))] 

428 

429 return ( 

430 frr_errors, 

431 far_errors, 

432 sfar_errors, 

433 far_w_errors, 

434 wer_wb_errors, 

435 hter_wb_errors, 

436 ) 

437 

438 

439def calc_aue( 

440 licit_neg, 

441 licit_pos, 

442 spoof_neg, 

443 spoof_pos, 

444 thresholds, 

445 omega, 

446 beta, 

447 l_bound=0, 

448 h_bound=1, 

449 var_param="omega", 

450): 

451 """Calculates AUE of EPSC for the given thresholds and weights 

452 

453 Keyword arguments: 

454 

455 - licit_neg - numpy.array of scores for the negatives (licit scenario) 

456 - licit_pos - numpy.array of scores for the positives (licit scenario) 

457 - spoof_neg - numpy.array of scores for the negatives (spoof scenario) 

458 - spoof_pos - numpy.array of scores for the positives (spoof scenario) 

459 - l_bound - lower bound of integration 

460 - h_bound - higher bound of integration 

461 - points - number of points to calculate EPSC 

462 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter') 

463 - var_param - name of the parameter which is varied on the abscissa 

464 ('omega' or 'beta') 

465 """ 

466 

467 from scipy import integrate 

468 

469 if var_param == "omega": 

470 errors = all_error_rates( 

471 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta 

472 ) 

473 weights = omega # setting the weights to the varying parameter 

474 else: 

475 errors = all_error_rates( 

476 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta 

477 ) 

478 weights = beta # setting the weights to the varying parameter 

479 

480 wer_errors = errors[4].reshape(1, errors[4].size) 

481 

482 l_ind = numpy.where(weights >= l_bound)[0][0] 

483 h_ind = numpy.where(weights <= h_bound)[0][-1] 

484 aue = integrate.cumtrapz(wer_errors, weights) 

485 aue = numpy.append( 

486 [0], aue 

487 ) # for indexing purposes, aue is cumulative integration 

488 aue = aue[h_ind] - aue[l_ind] 

489 

490 return aue