Source code for bob.pad.base.error_utils

#!/usr/bin/env python
# Ivana Chingovska <ivana.chingovska@idiap.ch>
# Fri Dec  7 12:33:37 CET 2012
"""Utility functions for computation of EPSC curve and related measurement"""

import logging
import re

from collections import defaultdict

import numpy

from bob.bio.base.score.load import _iterate_csv_score_file
from bob.measure import (
    eer_threshold,
    far_threshold,
    farfrr,
    frr_threshold,
    min_hter_threshold,
)

logger = logging.getLogger(__name__)


[docs]def calc_threshold( method, pos, negs, all_negs, far_value=None, is_sorted=False ): """Calculates the threshold based on the given method. Parameters ---------- method : str One of ``bpcer20``, ``eer``, ``min-hter``, ``apcer20``. pos : ``array_like`` The positive scores. They should be sorted! negs : list A list of array_like negative scores. Each item in the list corresponds to scores of one PAI. all_negs : ``array_like`` An array of all negative scores. This can be calculated from negs as well but we ask for it since you might have it already calculated. far_value : :obj:`float`, optional If method is far, far_value and all_negs are used to calculate the threshold. is_sorted : :obj:`bool`, optional If True, it means all scores are sorted and no sorting will happen. Returns ------- float The calculated threshold. Raises ------ ValueError If method is unknown. """ method = method.lower() if "bpcer" in method: desired_apcer = 1 / float(method.replace("bpcer", "")) threshold = apcer_threshold( desired_apcer, pos, *negs, is_sorted=is_sorted ) elif "apcer" in method: desired_bpcer = 1 / float(method.replace("apcer", "")) threshold = frr_threshold( all_negs, pos, desired_bpcer, is_sorted=is_sorted ) elif method == "far": threshold = far_threshold(all_negs, pos, far_value, is_sorted=is_sorted) elif method == "eer": threshold = eer_threshold(all_negs, pos, is_sorted=is_sorted) elif method == "min-hter": threshold = min_hter_threshold(all_negs, pos, is_sorted=is_sorted) else: raise ValueError("Unknown threshold criteria: {}".format(method)) return threshold
[docs]def apcer_threshold(desired_apcer, pos, *negs, is_sorted=False): """Computes the threshold given the desired APCER as the criteria. APCER is computed as max of all APCER_PAI values. The threshold will be computed such that the real APCER is **at most** the desired value. Parameters ---------- desired_apcer : float The desired APCER value. pos : list An array or list of positive scores in float. *negs A list of negative scores. Each item corresponds to the negative scores of one PAI. is_sorted : :obj:`bool`, optional Set to ``True`` if ALL arrays (pos and negs) are sorted. Returns ------- float The computed threshold that satisfies the desired APCER. """ threshold = max( far_threshold(neg, pos, desired_apcer, is_sorted=is_sorted) for neg in negs ) return threshold
[docs]def apcer_bpcer(threshold, pos, *negs): """Computes APCER_PAI, APCER, and BPCER given the positive scores and a list of negative scores and a threshold. Parameters ---------- threshold : float The threshold to be used to compute the error rates. pos : list An array or list of positive scores in float. *negs A list of negative scores. Each item corresponds to the negative scores of one PAI. Returns ------- tuple A tuple such as (list of APCER_PAI, APCER, BPCER) """ apcers = [] assert len(negs) > 0, negs for neg in negs: far, frr = farfrr(neg, pos, threshold) apcers.append(far) bpcer = frr # bpcer will be the same in all cases return apcers, max(apcers), bpcer
[docs]def split_csv_pad_per_pai(filename, regexps=[], regexp_column="attack_type"): """Returns scores for Bona-Fide samples and scores for each PAI. By default, the real_id column (second column) is used as indication for each Presentation Attack Instrument (PAI). For example, with default regexps and regexp_column, if you have scores like:: claimed_id, test_label, is_bonafide, attack_type, score 001, bona_fide_sample_1_path, True, , 0.9 001, print_sample_1_path, False, print, 0.6 001, print_sample_2_path, False, print, 0.6 001, replay_sample_1_path, False, replay, 0.2 001, replay_sample_2_path, False, replay, 0.2 001, mask_sample_1_path, False, mask, 0.5 001, mask_sample_2_path, False, mask, 0.5 this function will return 1 set of positive scores, and 3 sets of negative scores (for each print, replay, and mask PAIs). Otherwise, you can provide a list regular expressions that match each PAI. For example, with regexps as ['print', 'replay', 'mask'], if you have scores like:: claimed_id, test_label, is_bonafide, attack_type, score 001, bona_fide_sample_1_path, True, , 0.9 001, print_sample_1_path, False, print/1, 0.6 001, print_sample_2_path, False, print/2, 0.6 001, replay_sample_1_path, False, replay/1, 0.2 001, replay_sample_2_path, False, replay/2, 0.2 001, mask_sample_1_path, False, mask/1, 0.5 001, mask_sample_2_path, False, mask/2, 0.5 the function will return 3 sets of negative scores (for print, replay, and mask PAIs, given in regexp). Parameters ---------- filename : str Path to the score file. regexps : :obj:`list`, optional A list of regular expressions that match each PAI. If not given, the values in the column pointed by regexp_column are used to find scores for different PAIs. regexp_column : :obj:`str`, optional If a list of regular expressions are given, those patterns will be matched against the values in this column. default: ``attack_type`` Returns ------- tuple A tuple, ([positives], {'pai_name': [negatives]}), containing positive scores and a dict of negative scores mapping PAIs names to their respective scores. Raises ------ ValueError If none of the given regular expressions match the values in regexp_column. KeyError If regexp_column is not a column of the CSV file. """ pos = [] negs = defaultdict(list) logger.debug(f"Loading CSV score file: '{filename}'") if regexps: regexps = [re.compile(pattern) for pattern in regexps] for row in _iterate_csv_score_file(filename): # if it is a Bona-Fide score if row["is_bonafide"].lower() == "true": pos.append(row["score"]) continue if not regexps: negs[row[regexp_column]].append(row["score"]) continue # if regexps is not None or empty and is not a Bona-Fide score for pattern in regexps: if pattern.search(row[regexp_column]): negs[pattern.pattern].append(row["score"]) break else: # this else is for the for loop: ``for pattern in regexps:`` raise ValueError( f"No regexps: {regexps} match `{row[regexp_column]}' " f"from `{regexp_column}' column." ) logger.debug(f"Found {len(negs)} different PAIs names: {list(negs.keys())}") return pos, negs
[docs]def split_csv_pad(filename): """Loads PAD scores from a CSV score file, splits them by attack vs bonafide. The CSV must contain a ``is_bonafide`` column with each field either ``True`` or ``False`` (case insensitive). Parameters ---------- filename: str The path to a CSV file containing all the scores. Returns ------- tuple Tuple of 1D-arrays: (attack, bonafide). The negative (attacks) and positives (bonafide) scores. """ logger.debug(f"Loading CSV score file: '{filename}'") split_scores = defaultdict(list) for row in _iterate_csv_score_file(filename): if row["is_bonafide"].lower() == "true": split_scores["bonafide"].append(row["score"]) else: split_scores["attack"].append(row["score"]) logger.debug( f"Found {len(split_scores['attack'])} negative (attack), and" f"{len(split_scores['bonafide'])} positive (bonafide) scores." ) # Cast the scores to numpy float for key, scores in split_scores.items(): split_scores[key] = numpy.array(scores, dtype=numpy.float64) return split_scores["attack"], split_scores["bonafide"]