Coverage for src/bob/bio/base/script/error

1#!/usr/bin/env python

2# Ivana Chingovska <ivana.chingovska@idiap.ch>

3# Fri Dec 7 12:33:37 CET 2012

4"""Utility functions for computation of EPSC curve and related measurement"""

6import numpy

8from bob.measure import farfrr

11def calc_pass_rate(threshold, attacks):

12 """Calculates the rate of successful spoofing attacks

14 Parameters

15 ----------

16 threshold :

17 the threshold used for classification

18 scores :

19 numpy with the scores of the spoofing attacks

21 Returns

22 -------

23 float

24 rate of successful spoofing attacks

25 """

26 return (attacks >= threshold).mean()

29def weighted_neg_error_rate_criteria(

30 data, weight, thres, beta=0.5, criteria="eer"

31):

32 """Given the single value for the weight parameter balancing between

33 impostors and spoofing attacks and a threshold, calculates the error rates

34 and their relationship depending on the criteria (difference in case of

35 'eer', hter in case of 'min-hter' criteria)

36 Keyword parameters:

38 - data - the development data used to determine the threshold. List on 4

39 numpy.arrays containing: negatives (licit), positives (licit),

40 negatives (spoof), positives (spoof)

41 - weight - the weight parameter balancing between impostors and spoofing

42 attacks

43 - thres - the given threshold

44 - beta - the weight parameter balancing between real accesses and all the

45 negative samples (impostors and spoofing attacks). Note that this

46 parameter will be overridden and not considered if the selected criteria

47 is 'min-hter'.

48 - criteria - 'eer', 'wer' or 'min-hter' criteria for decision threshold

49 """

51 licit_neg = data[0]

52 licit_pos = data[1]

53 spoof_neg = data[2]

54 spoof_pos = data[3] # unpacking the data

55 farfrr_licit = farfrr(licit_neg, licit_pos, thres)

56 farfrr_spoof = farfrr(spoof_neg, spoof_pos, thres)

58 frr = farfrr_licit[1] # farfrr_spoof[1] should have the same value

59 far_i = farfrr_licit[0]

60 far_s = farfrr_spoof[0]

62 far_w = (1 - weight) * far_i + weight * far_s

64 if criteria == "eer":

65 if beta == 0.5:

66 return abs(far_w - frr)

67 else:

68 # return abs(far_w - frr)

69 return abs((1 - beta) * frr - beta * far_w)

71 elif criteria == "min-hter":

72 return (far_w + frr) / 2

74 else:

75 return (1 - beta) * frr + beta * far_w

78def epsc_weights(licit_neg, licit_pos, spoof_neg, spoof_pos, points=100):

79 """Returns the weights for EPSC

81 Keyword arguments:

83 - points - number of points to calculate EPSC

84 """

85 step_size = 1 / float(points)

86 weights = numpy.array([(i * step_size) for i in range(points + 1)])

87 return weights

90def recursive_thr_search(

91 data, span_min, span_max, weight, beta=0.5, criteria="eer"

92):

93 """Recursive search for the optimal threshold given a criteria. It

94 evaluates the full range of thresholds at 100 points, and computes the one

95 which optimizes the threshold. In the next search iteration, it examines

96 the region around the point that optimizes the threshold. The procedure

97 stops when the search range is smaller then 1e-10.

99 Keyword arguments:

100 - data - the development data used to determine the threshold. List on 4

101 numpy.arrays containing: negatives (licit), positives (licit), negatives

102 (spoof), positives (spoof)

103 - span_min - the minimum of the search range

104 - span_max - the maximum of the search range

105 - weight - the weight parameter balancing between impostors and spoofing

106 attacks

107 - beta - the weight parameter balancing between real accesses and all the

108 negative samples (impostors and spoofing attacks). Note that methods called

109 within this function will override this parameter and not considered if the

110 selected criteria is 'min-hter'.

111 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for

112 Minimum WER or 'min-hter' for Minimum HTER criteria).

113 """

114

115 quit_thr = 1e-10

116 steps = 100

117 if abs((span_max - span_min) / span_max) < quit_thr:

118 return span_max # or span_min, it doesn't matter

119 else:

120 step_size = (span_max - span_min) / steps

121 thresholds = numpy.array(

122 [(i * step_size) + span_min for i in range(steps + 1)]

123 )

124 weighted_error_rates = numpy.array(

125 [

126 weighted_neg_error_rate_criteria(

127 data, weight, thr, beta, criteria

128 )

129 for thr in thresholds

130 ]

131 )

132 selected_thres = thresholds[

133 numpy.where(weighted_error_rates == min(weighted_error_rates))

134 ] # all the thresholds which have minimum weighted error rate

135 thr = selected_thres[

136 int(selected_thres.size / 2)

137 ] # choose the centrally positioned threshold

138 return recursive_thr_search(

139 data, thr - step_size, thr + step_size, weight, beta, criteria

140 )

141

142

143def weighted_negatives_threshold(

144 licit_neg, licit_pos, spoof_neg, spoof_pos, weight, beta=0.5, criteria="eer"

145):

146 """Calculates the threshold for achieving the given criteria between the

147 FAR_w and the FRR, given the single value for the weight parameter

148 balancing between impostors and spoofing attacks and a single value for the

149 parameter beta balancing between the real accesses and the negatives

150 (impostors and spoofing attacks)

151

152 Keyword parameters:

153 - licit_neg - numpy.array of scores for the negatives (licit scenario)

154 - licit_pos - numpy.array of scores for the positives (licit scenario)

155 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)

156 - spoof_pos - numpy.array of scores for the positives (spoof scenario)

157 - weight - the weight parameter balancing between impostors and spoofing

158 attacks

159 - beta - the weight parameter balancing between real accesses and all the

160 negative samples (impostors and spoofing attacks). Note that methods called

161 within this function will override this parameter and not considered if the

162 selected criteria is 'min-hter'.

163 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for

164 Minimum WER or 'min-hter' for Minimum HTER criteria).

165 """

166 span_min = min(

167 numpy.append(licit_neg, spoof_neg)

168 ) # the min of the span where we will search for the threshold

169 span_max = max(

170 numpy.append(licit_pos, spoof_pos)

171 ) # the max of the span where we will search for the threshold

172 data = (

173 licit_neg,

174 licit_pos,

175 spoof_neg,

176 spoof_pos,

177 ) # pack the data into a single list

178 return recursive_thr_search(

179 data, span_min, span_max, weight, beta, criteria

180 )

181

182

183def epsc_thresholds(

184 licit_neg,

185 licit_pos,

186 spoof_neg,

187 spoof_pos,

188 points=100,

189 criteria="eer",

190 omega=None,

191 beta=None,

192):

193 """Calculates the optimal thresholds for EPSC, for a range of the weight

194 parameter balancing between impostors and spoofing attacks, and for a range

195 of the beta parameter balancing between real accesses and all the negatives

196 (impostors and spoofing attacks)

197

198 Keyword arguments:

199

200 - licit_neg - numpy.array of scores for the negatives (licit scenario)

201 - licit_pos - numpy.array of scores for the positives (licit scenario)

202 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)

203 - spoof_pos - numpy.array of scores for the positives (spoof scenario)

204 - points - number of points to calculate EPSC

205 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter')

206 - omega - the value of the parameter omega, balancing between impostors and

207 spoofing attacks. If None, it is going to span the full range [0,1].

208 Otherwise, can be set to a fixed value or a list of values.

209 - beta - the value of the parameter beta, balancing between real accesses

210 and all the negatives (zero-effort impostors and spoofing attacks). If

211 None, it is going to span the full range [0,1]. Otherwise, can be set to a

212 fixed value or a list of values.

213

214 """

215 step_size = 1 / float(points)

216

217 if omega is None:

218 omega = numpy.array([(i * step_size) for i in range(points + 1)])

219 elif (

220 not isinstance(omega, list)

221 and not isinstance(omega, tuple)

222 and not isinstance(omega, numpy.ndarray)

223 ):

224 omega = numpy.array([omega])

225 else:

226 omega = numpy.array(omega)

227

228 if beta is None:

229 beta = numpy.array([(i * step_size) for i in range(points + 1)])

230 elif (

231 not isinstance(beta, list)

232 and not isinstance(beta, tuple)

233 and not isinstance(beta, numpy.ndarray)

234 ):

235 beta = numpy.array([beta])

236 else:

237 beta = numpy.array(beta)

238

239 thresholds = numpy.ndarray([beta.size, omega.size], "float64")

240 for bindex, b in enumerate(beta):

241 thresholds[bindex, :] = numpy.array(

242 [

243 weighted_negatives_threshold(

244 licit_neg,

245 licit_pos,

246 spoof_neg,

247 spoof_pos,

248 w,

249 b,

250 criteria=criteria,

251 )

252 for w in omega

253 ],

254 "float64",

255 )

256

257 return omega, beta, thresholds

258

259

260def weighted_err(error_1, error_2, weight):

261 """Calculates the weighted error rate between the two input parameters

262

263 Keyword arguments:

264 - error_1 - the first input error rate (FAR for zero effort impostors

265 usually)

266 - error_2 - the second input error rate (SFAR)

267 - weight - the given weight

268 """

269 return (1 - weight) * error_1 + weight * error_2

270

271

272def error_rates_at_weight(

273 licit_neg, licit_pos, spoof_neg, spoof_pos, omega, threshold, beta=0.5

274):

275 """Calculates several error rates: FRR, FAR (zero-effort impostors), SFAR,

276 FAR_w, HTER_w for a given value of w. It returns the calculated threshold

277 as a last argument

278

279 Keyword arguments:

280

281 - licit_neg - numpy.array of scores for the negatives (licit scenario)

282 - licit_pos - numpy.array of scores for the positives (licit scenario)

283 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)

284 - spoof_pos - numpy.array of scores for the positives (spoof scenario)

285 - threshold - the given threshold

286 - omega - the omega parameter balancing between impostors and spoofing

287 attacks

288 - beta - the weight parameter balancing between real accesses and all the

289

290 negative samples (impostors and spoofing attacks).

291 """

292

293 farfrr_licit = farfrr(

294 licit_neg, licit_pos, threshold

295 ) # calculate test frr @ threshold (licit scenario)

296 farfrr_spoof = farfrr(

297 spoof_neg, spoof_pos, threshold

298 ) # calculate test frr @ threshold (spoof scenario)

299

300 # we can take this value from farfrr_spoof as well, it doesn't matter

301 frr = farfrr_licit[1]

302 far = farfrr_licit[0]

303 sfar = farfrr_spoof[0]

304

305 far_w = weighted_err(far, sfar, omega)

306 hter_w = (far_w + frr) / 2

307 wer_wb = weighted_err(frr, far_w, beta)

308

309 return (frr, far, sfar, far_w, wer_wb, hter_w, threshold)

310

311

312def epsc_error_rates(

313 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta

314):

315 """Calculates several error rates: FAR_w and WER_wb for the given weights

316 (omega and beta) and thresholds (the thresholds need to be computed first

317 using the method: epsc_thresholds() before passing to this method)

318

319 Parameters

320 ----------

321 licit_neg : array_like

322 array of scores for the negatives (licit scenario)

323 licit_pos : array_like

324 array of scores for the positives (licit scenario)

325 spoof_neg : array_like

326 array of scores for the negatives (spoof scenario)

327 spoof_pos : array_like

328 array of scores for the positives (spoof scenario)

329 thresholds : array_like

330 ndarray with threshold values

331 omega : array_like

332 array of the omega parameter balancing between impostors

333 and spoofing attacks

334 beta : array_like

335 array of the beta parameter balancing between real accesses

336 and all negatives (impostors and spoofing attacks)

337

338 Returns

339 -------

340 far_w_errors: array_like

341 FAR_w

342 wer_wb_errors: array_like

343 WER_wb

344 """

345

346 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64")

347 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")

348

349 for bindex, b in enumerate(beta):

350 errors = [

351 error_rates_at_weight(

352 licit_neg,

353 licit_pos,

354 spoof_neg,

355 spoof_pos,

356 w,

357 thresholds[bindex, windex],

358 b,

359 )

360 for windex, w in enumerate(omega)

361 ]

362 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))]

363 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))]

364

365 return far_w_errors, wer_wb_errors

366

367

368def all_error_rates(

369 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta

370):

371 """Calculates several error rates: FAR_w and WER_wb for the given weights

372 (omega and beta) and thresholds (the thresholds need to be computed first

373 using the method: epsc_thresholds() before passing to this method)

374

375 Parameters

376 ----------

377 licit_neg : array_like

378 array of scores for the negatives (licit scenario)

379 licit_pos : array_like

380 array of scores for the positives (licit scenario)

381 spoof_neg : array_like

382 array of scores for the negatives (spoof scenario)

383 spoof_pos : array_like

384 array of scores for the positives (spoof scenario)

385 thresholds : array_like

386 ndarray with threshold values

387 omega : array_like

388 array of the omega parameter balancing between impostors

389 and spoofing attacks

390 beta : array_like

391 array of the beta parameter balancing between real accesses

392 and all negatives (impostors and spoofing attacks)

393

394 Returns

395 -------

396 far_w_errors: array_like

397 FAR_w

398 wer_wb_errors: array_like

399 WER_wb

400 """

401

402 frr_errors = numpy.ndarray((beta.size, omega.size), "float64")

403 far_errors = numpy.ndarray((beta.size, omega.size), "float64")

404 sfar_errors = numpy.ndarray((beta.size, omega.size), "float64")

405 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64")

406 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")

407 hter_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")

408

409 for bindex, b in enumerate(beta):

410 errors = [

411 error_rates_at_weight(

412 licit_neg,

413 licit_pos,

414 spoof_neg,

415 spoof_pos,

416 w,

417 thresholds[bindex, windex],

418 b,

419 )

420 for windex, w in enumerate(omega)

421 ]

422 frr_errors[bindex, :] = [errors[i][0] for i in range(len(errors))]

423 far_errors[bindex, :] = [errors[i][1] for i in range(len(errors))]

424 sfar_errors[bindex, :] = [errors[i][2] for i in range(len(errors))]

425 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))]

426 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))]

427 hter_wb_errors[bindex, :] = [errors[i][5] for i in range(len(errors))]

428

429 return (

430 frr_errors,

431 far_errors,

432 sfar_errors,

433 far_w_errors,

434 wer_wb_errors,

435 hter_wb_errors,

436 )

437

438

439def calc_aue(

440 licit_neg,

441 licit_pos,

442 spoof_neg,

443 spoof_pos,

444 thresholds,

445 omega,

446 beta,

447 l_bound=0,

448 h_bound=1,

449 var_param="omega",

450):

451 """Calculates AUE of EPSC for the given thresholds and weights

452

453 Keyword arguments:

454

455 - licit_neg - numpy.array of scores for the negatives (licit scenario)

456 - licit_pos - numpy.array of scores for the positives (licit scenario)

457 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)

458 - spoof_pos - numpy.array of scores for the positives (spoof scenario)

459 - l_bound - lower bound of integration

460 - h_bound - higher bound of integration

461 - points - number of points to calculate EPSC

462 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter')

463 - var_param - name of the parameter which is varied on the abscissa

464 ('omega' or 'beta')

465 """

466

467 from scipy import integrate

468

469 if var_param == "omega":

470 errors = all_error_rates(

471 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta

472 )

473 weights = omega # setting the weights to the varying parameter

474 else:

475 errors = all_error_rates(

476 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta

477 )

478 weights = beta # setting the weights to the varying parameter

479

480 wer_errors = errors[4].reshape(1, errors[4].size)

481

482 l_ind = numpy.where(weights >= l_bound)[0][0]

483 h_ind = numpy.where(weights <= h_bound)[0][-1]

484 aue = integrate.cumtrapz(wer_errors, weights)

485 aue = numpy.append(

486 [0], aue

487 ) # for indexing purposes, aue is cumulative integration

488 aue = aue[h_ind] - aue[l_ind]

489

490 return aue

Coverage for src/bob/bio/base/script/error_utils.py: 72%

108 statements