Coverage for src/bob/bio/base/script/error_utils.py: 72%
108 statements
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
1#!/usr/bin/env python
2# Ivana Chingovska <ivana.chingovska@idiap.ch>
3# Fri Dec 7 12:33:37 CET 2012
4"""Utility functions for computation of EPSC curve and related measurement"""
6import numpy
8from bob.measure import farfrr
11def calc_pass_rate(threshold, attacks):
12 """Calculates the rate of successful spoofing attacks
14 Parameters
15 ----------
16 threshold :
17 the threshold used for classification
18 scores :
19 numpy with the scores of the spoofing attacks
21 Returns
22 -------
23 float
24 rate of successful spoofing attacks
25 """
26 return (attacks >= threshold).mean()
29def weighted_neg_error_rate_criteria(
30 data, weight, thres, beta=0.5, criteria="eer"
31):
32 """Given the single value for the weight parameter balancing between
33 impostors and spoofing attacks and a threshold, calculates the error rates
34 and their relationship depending on the criteria (difference in case of
35 'eer', hter in case of 'min-hter' criteria)
36 Keyword parameters:
38 - data - the development data used to determine the threshold. List on 4
39 numpy.arrays containing: negatives (licit), positives (licit),
40 negatives (spoof), positives (spoof)
41 - weight - the weight parameter balancing between impostors and spoofing
42 attacks
43 - thres - the given threshold
44 - beta - the weight parameter balancing between real accesses and all the
45 negative samples (impostors and spoofing attacks). Note that this
46 parameter will be overridden and not considered if the selected criteria
47 is 'min-hter'.
48 - criteria - 'eer', 'wer' or 'min-hter' criteria for decision threshold
49 """
51 licit_neg = data[0]
52 licit_pos = data[1]
53 spoof_neg = data[2]
54 spoof_pos = data[3] # unpacking the data
55 farfrr_licit = farfrr(licit_neg, licit_pos, thres)
56 farfrr_spoof = farfrr(spoof_neg, spoof_pos, thres)
58 frr = farfrr_licit[1] # farfrr_spoof[1] should have the same value
59 far_i = farfrr_licit[0]
60 far_s = farfrr_spoof[0]
62 far_w = (1 - weight) * far_i + weight * far_s
64 if criteria == "eer":
65 if beta == 0.5:
66 return abs(far_w - frr)
67 else:
68 # return abs(far_w - frr)
69 return abs((1 - beta) * frr - beta * far_w)
71 elif criteria == "min-hter":
72 return (far_w + frr) / 2
74 else:
75 return (1 - beta) * frr + beta * far_w
78def epsc_weights(licit_neg, licit_pos, spoof_neg, spoof_pos, points=100):
79 """Returns the weights for EPSC
81 Keyword arguments:
83 - points - number of points to calculate EPSC
84 """
85 step_size = 1 / float(points)
86 weights = numpy.array([(i * step_size) for i in range(points + 1)])
87 return weights
90def recursive_thr_search(
91 data, span_min, span_max, weight, beta=0.5, criteria="eer"
92):
93 """Recursive search for the optimal threshold given a criteria. It
94 evaluates the full range of thresholds at 100 points, and computes the one
95 which optimizes the threshold. In the next search iteration, it examines
96 the region around the point that optimizes the threshold. The procedure
97 stops when the search range is smaller then 1e-10.
99 Keyword arguments:
100 - data - the development data used to determine the threshold. List on 4
101 numpy.arrays containing: negatives (licit), positives (licit), negatives
102 (spoof), positives (spoof)
103 - span_min - the minimum of the search range
104 - span_max - the maximum of the search range
105 - weight - the weight parameter balancing between impostors and spoofing
106 attacks
107 - beta - the weight parameter balancing between real accesses and all the
108 negative samples (impostors and spoofing attacks). Note that methods called
109 within this function will override this parameter and not considered if the
110 selected criteria is 'min-hter'.
111 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for
112 Minimum WER or 'min-hter' for Minimum HTER criteria).
113 """
115 quit_thr = 1e-10
116 steps = 100
117 if abs((span_max - span_min) / span_max) < quit_thr:
118 return span_max # or span_min, it doesn't matter
119 else:
120 step_size = (span_max - span_min) / steps
121 thresholds = numpy.array(
122 [(i * step_size) + span_min for i in range(steps + 1)]
123 )
124 weighted_error_rates = numpy.array(
125 [
126 weighted_neg_error_rate_criteria(
127 data, weight, thr, beta, criteria
128 )
129 for thr in thresholds
130 ]
131 )
132 selected_thres = thresholds[
133 numpy.where(weighted_error_rates == min(weighted_error_rates))
134 ] # all the thresholds which have minimum weighted error rate
135 thr = selected_thres[
136 int(selected_thres.size / 2)
137 ] # choose the centrally positioned threshold
138 return recursive_thr_search(
139 data, thr - step_size, thr + step_size, weight, beta, criteria
140 )
143def weighted_negatives_threshold(
144 licit_neg, licit_pos, spoof_neg, spoof_pos, weight, beta=0.5, criteria="eer"
145):
146 """Calculates the threshold for achieving the given criteria between the
147 FAR_w and the FRR, given the single value for the weight parameter
148 balancing between impostors and spoofing attacks and a single value for the
149 parameter beta balancing between the real accesses and the negatives
150 (impostors and spoofing attacks)
152 Keyword parameters:
153 - licit_neg - numpy.array of scores for the negatives (licit scenario)
154 - licit_pos - numpy.array of scores for the positives (licit scenario)
155 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)
156 - spoof_pos - numpy.array of scores for the positives (spoof scenario)
157 - weight - the weight parameter balancing between impostors and spoofing
158 attacks
159 - beta - the weight parameter balancing between real accesses and all the
160 negative samples (impostors and spoofing attacks). Note that methods called
161 within this function will override this parameter and not considered if the
162 selected criteria is 'min-hter'.
163 - criteria - the decision threshold criteria ('eer' for EER, 'wer' for
164 Minimum WER or 'min-hter' for Minimum HTER criteria).
165 """
166 span_min = min(
167 numpy.append(licit_neg, spoof_neg)
168 ) # the min of the span where we will search for the threshold
169 span_max = max(
170 numpy.append(licit_pos, spoof_pos)
171 ) # the max of the span where we will search for the threshold
172 data = (
173 licit_neg,
174 licit_pos,
175 spoof_neg,
176 spoof_pos,
177 ) # pack the data into a single list
178 return recursive_thr_search(
179 data, span_min, span_max, weight, beta, criteria
180 )
183def epsc_thresholds(
184 licit_neg,
185 licit_pos,
186 spoof_neg,
187 spoof_pos,
188 points=100,
189 criteria="eer",
190 omega=None,
191 beta=None,
192):
193 """Calculates the optimal thresholds for EPSC, for a range of the weight
194 parameter balancing between impostors and spoofing attacks, and for a range
195 of the beta parameter balancing between real accesses and all the negatives
196 (impostors and spoofing attacks)
198 Keyword arguments:
200 - licit_neg - numpy.array of scores for the negatives (licit scenario)
201 - licit_pos - numpy.array of scores for the positives (licit scenario)
202 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)
203 - spoof_pos - numpy.array of scores for the positives (spoof scenario)
204 - points - number of points to calculate EPSC
205 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter')
206 - omega - the value of the parameter omega, balancing between impostors and
207 spoofing attacks. If None, it is going to span the full range [0,1].
208 Otherwise, can be set to a fixed value or a list of values.
209 - beta - the value of the parameter beta, balancing between real accesses
210 and all the negatives (zero-effort impostors and spoofing attacks). If
211 None, it is going to span the full range [0,1]. Otherwise, can be set to a
212 fixed value or a list of values.
214 """
215 step_size = 1 / float(points)
217 if omega is None:
218 omega = numpy.array([(i * step_size) for i in range(points + 1)])
219 elif (
220 not isinstance(omega, list)
221 and not isinstance(omega, tuple)
222 and not isinstance(omega, numpy.ndarray)
223 ):
224 omega = numpy.array([omega])
225 else:
226 omega = numpy.array(omega)
228 if beta is None:
229 beta = numpy.array([(i * step_size) for i in range(points + 1)])
230 elif (
231 not isinstance(beta, list)
232 and not isinstance(beta, tuple)
233 and not isinstance(beta, numpy.ndarray)
234 ):
235 beta = numpy.array([beta])
236 else:
237 beta = numpy.array(beta)
239 thresholds = numpy.ndarray([beta.size, omega.size], "float64")
240 for bindex, b in enumerate(beta):
241 thresholds[bindex, :] = numpy.array(
242 [
243 weighted_negatives_threshold(
244 licit_neg,
245 licit_pos,
246 spoof_neg,
247 spoof_pos,
248 w,
249 b,
250 criteria=criteria,
251 )
252 for w in omega
253 ],
254 "float64",
255 )
257 return omega, beta, thresholds
260def weighted_err(error_1, error_2, weight):
261 """Calculates the weighted error rate between the two input parameters
263 Keyword arguments:
264 - error_1 - the first input error rate (FAR for zero effort impostors
265 usually)
266 - error_2 - the second input error rate (SFAR)
267 - weight - the given weight
268 """
269 return (1 - weight) * error_1 + weight * error_2
272def error_rates_at_weight(
273 licit_neg, licit_pos, spoof_neg, spoof_pos, omega, threshold, beta=0.5
274):
275 """Calculates several error rates: FRR, FAR (zero-effort impostors), SFAR,
276 FAR_w, HTER_w for a given value of w. It returns the calculated threshold
277 as a last argument
279 Keyword arguments:
281 - licit_neg - numpy.array of scores for the negatives (licit scenario)
282 - licit_pos - numpy.array of scores for the positives (licit scenario)
283 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)
284 - spoof_pos - numpy.array of scores for the positives (spoof scenario)
285 - threshold - the given threshold
286 - omega - the omega parameter balancing between impostors and spoofing
287 attacks
288 - beta - the weight parameter balancing between real accesses and all the
290 negative samples (impostors and spoofing attacks).
291 """
293 farfrr_licit = farfrr(
294 licit_neg, licit_pos, threshold
295 ) # calculate test frr @ threshold (licit scenario)
296 farfrr_spoof = farfrr(
297 spoof_neg, spoof_pos, threshold
298 ) # calculate test frr @ threshold (spoof scenario)
300 # we can take this value from farfrr_spoof as well, it doesn't matter
301 frr = farfrr_licit[1]
302 far = farfrr_licit[0]
303 sfar = farfrr_spoof[0]
305 far_w = weighted_err(far, sfar, omega)
306 hter_w = (far_w + frr) / 2
307 wer_wb = weighted_err(frr, far_w, beta)
309 return (frr, far, sfar, far_w, wer_wb, hter_w, threshold)
312def epsc_error_rates(
313 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta
314):
315 """Calculates several error rates: FAR_w and WER_wb for the given weights
316 (omega and beta) and thresholds (the thresholds need to be computed first
317 using the method: epsc_thresholds() before passing to this method)
319 Parameters
320 ----------
321 licit_neg : array_like
322 array of scores for the negatives (licit scenario)
323 licit_pos : array_like
324 array of scores for the positives (licit scenario)
325 spoof_neg : array_like
326 array of scores for the negatives (spoof scenario)
327 spoof_pos : array_like
328 array of scores for the positives (spoof scenario)
329 thresholds : array_like
330 ndarray with threshold values
331 omega : array_like
332 array of the omega parameter balancing between impostors
333 and spoofing attacks
334 beta : array_like
335 array of the beta parameter balancing between real accesses
336 and all negatives (impostors and spoofing attacks)
338 Returns
339 -------
340 far_w_errors: array_like
341 FAR_w
342 wer_wb_errors: array_like
343 WER_wb
344 """
346 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64")
347 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")
349 for bindex, b in enumerate(beta):
350 errors = [
351 error_rates_at_weight(
352 licit_neg,
353 licit_pos,
354 spoof_neg,
355 spoof_pos,
356 w,
357 thresholds[bindex, windex],
358 b,
359 )
360 for windex, w in enumerate(omega)
361 ]
362 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))]
363 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))]
365 return far_w_errors, wer_wb_errors
368def all_error_rates(
369 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta
370):
371 """Calculates several error rates: FAR_w and WER_wb for the given weights
372 (omega and beta) and thresholds (the thresholds need to be computed first
373 using the method: epsc_thresholds() before passing to this method)
375 Parameters
376 ----------
377 licit_neg : array_like
378 array of scores for the negatives (licit scenario)
379 licit_pos : array_like
380 array of scores for the positives (licit scenario)
381 spoof_neg : array_like
382 array of scores for the negatives (spoof scenario)
383 spoof_pos : array_like
384 array of scores for the positives (spoof scenario)
385 thresholds : array_like
386 ndarray with threshold values
387 omega : array_like
388 array of the omega parameter balancing between impostors
389 and spoofing attacks
390 beta : array_like
391 array of the beta parameter balancing between real accesses
392 and all negatives (impostors and spoofing attacks)
394 Returns
395 -------
396 far_w_errors: array_like
397 FAR_w
398 wer_wb_errors: array_like
399 WER_wb
400 """
402 frr_errors = numpy.ndarray((beta.size, omega.size), "float64")
403 far_errors = numpy.ndarray((beta.size, omega.size), "float64")
404 sfar_errors = numpy.ndarray((beta.size, omega.size), "float64")
405 far_w_errors = numpy.ndarray((beta.size, omega.size), "float64")
406 wer_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")
407 hter_wb_errors = numpy.ndarray((beta.size, omega.size), "float64")
409 for bindex, b in enumerate(beta):
410 errors = [
411 error_rates_at_weight(
412 licit_neg,
413 licit_pos,
414 spoof_neg,
415 spoof_pos,
416 w,
417 thresholds[bindex, windex],
418 b,
419 )
420 for windex, w in enumerate(omega)
421 ]
422 frr_errors[bindex, :] = [errors[i][0] for i in range(len(errors))]
423 far_errors[bindex, :] = [errors[i][1] for i in range(len(errors))]
424 sfar_errors[bindex, :] = [errors[i][2] for i in range(len(errors))]
425 far_w_errors[bindex, :] = [errors[i][3] for i in range(len(errors))]
426 wer_wb_errors[bindex, :] = [errors[i][4] for i in range(len(errors))]
427 hter_wb_errors[bindex, :] = [errors[i][5] for i in range(len(errors))]
429 return (
430 frr_errors,
431 far_errors,
432 sfar_errors,
433 far_w_errors,
434 wer_wb_errors,
435 hter_wb_errors,
436 )
439def calc_aue(
440 licit_neg,
441 licit_pos,
442 spoof_neg,
443 spoof_pos,
444 thresholds,
445 omega,
446 beta,
447 l_bound=0,
448 h_bound=1,
449 var_param="omega",
450):
451 """Calculates AUE of EPSC for the given thresholds and weights
453 Keyword arguments:
455 - licit_neg - numpy.array of scores for the negatives (licit scenario)
456 - licit_pos - numpy.array of scores for the positives (licit scenario)
457 - spoof_neg - numpy.array of scores for the negatives (spoof scenario)
458 - spoof_pos - numpy.array of scores for the positives (spoof scenario)
459 - l_bound - lower bound of integration
460 - h_bound - higher bound of integration
461 - points - number of points to calculate EPSC
462 - criteria - the decision threshold criteria ('eer', 'wer' or 'min-hter')
463 - var_param - name of the parameter which is varied on the abscissa
464 ('omega' or 'beta')
465 """
467 from scipy import integrate
469 if var_param == "omega":
470 errors = all_error_rates(
471 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta
472 )
473 weights = omega # setting the weights to the varying parameter
474 else:
475 errors = all_error_rates(
476 licit_neg, licit_pos, spoof_neg, spoof_pos, thresholds, omega, beta
477 )
478 weights = beta # setting the weights to the varying parameter
480 wer_errors = errors[4].reshape(1, errors[4].size)
482 l_ind = numpy.where(weights >= l_bound)[0][0]
483 h_ind = numpy.where(weights <= h_bound)[0][-1]
484 aue = integrate.cumtrapz(wer_errors, weights)
485 aue = numpy.append(
486 [0], aue
487 ) # for indexing purposes, aue is cumulative integration
488 aue = aue[h_ind] - aue[l_ind]
490 return aue