Source code for bob.db.casia_fasd

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
"""
The CASIA-FASD database is a spoofing attack database which consists of three
types of attacks: warped printed photographs, printed photographs with cut eyes
and video attacks. The samples are taken with three types of cameras: low
quality, normal quality and high quality.

The actual raw data for the database should be downloaded from the original
URL. This package only contains the Bob accessor methods to use the DB directly
from python, with our certified protocols.

References:

  1. Z. Zhang, J. Yan, S. Lei, D. Yi, S. Z. Li: "A Face Antispoofing Database
  with Diverse Attacks", In proceedings of the 5th IAPR International
  Conference on Biometrics (ICB'12), New Delhi, India, 2012."""

import os
import six
import numpy
from bob.db.base import utils
from .models import *


[docs]class Database(object):
    def __init__(self, foldsdir=None):

        from .driver import Interface
        self.info = Interface()
        self.groups = ('train', 'test')
        self.classes = ('attack', 'real')
        self.qualities = ('low', 'normal', 'high')
        self.types = ('warped', 'cut', 'video')
        self.ids = list(range(1, 51))
        package_directory = os.path.dirname(os.path.abspath(__file__))
        if foldsdir == None:
            self.foldsdir = os.path.join(package_directory, 'newfolds')
        else:
            self.foldsdir = foldsdir

[docs]    def set_foldsdir(self, foldsdir):
        """Sets the directory holding the cross validation protocol of the database"""
        self.foldsdir = foldsdir

[docs]    def check_validity(self, l, obj, valid, default):
        """Checks validity of user input data against a set of valid values"""
        if not l: return default
        elif isinstance(l, six.string_types) or isinstance(
                l, six.integer_types):
            return self.check_validity((l, ), obj, valid, default)
        for k in l:
            if k not in valid:
                raise RuntimeError(
                    'Invalid %s "%s". Valid values are %s, or lists/tuples of those'
                    % (obj, k, valid))
        return l

[docs]    def get_file(self, pc):
        '''Returns the full file path given the path components pc'''
        from pkg_resources import resource_filename
        if os.path.isabs(pc):
            return pc
        return resource_filename(__name__, os.path.join(pc))

[docs]    def files(self,
              directory=None,
              extension=None,
              ids=[],
              groups=None,
              cls=None,
              qualities=None,
              types=None):
        """Returns a set of filenames for the specific query by the user.

    .. deprecated:: 1.1.0

      This function is *deprecated*, use :py:meth:`.Database.objects` instead.

    Keyword Parameters:

    directory
      A directory name that will be prepended to the final filepath returned

    extension
      A filename extension that will be appended to the final filepath returned

    ids
      The id of the client whose videos need to be retrieved. Should be an integer number in the range 1-50 (the total number of client is 50

    groups
      One of the protocolar subgroups of data as specified in the tuple groups, or a
      tuple with several of them.  If you set this parameter to an empty string
      or the value None, we use reset it to the default which is to get all.

    cls
      Either "attack", "real" or a combination of those (in a
      tuple). Defines the class of data to be retrieved.  If you set this
      parameter to an empty string or the value None, it will be set to the tuple ("real", "attack").

    qualities
      Either "low", "normal" or "high" or any combination of those (in a
      tuple). Defines the qualities of the videos in the database that are going to be used. If you set this
      parameter to the value None, the videos of all qualities are returned ("low", "normal", "high").

    types
      Either "warped", "cut" or "video" or any combination of those (in a
      tuple). Defines the types of attack videos in the database that are going to be used. If you set this
      parameter to the value None, the videos of all the attack types are returned ("warped", "cut", "video").

    Returns: A dictionary containing the resolved filenames considering all
    the filtering criteria. The keys of the dictionary are just pro-forma (for uniformity with the other databases).
    """

        import warnings
        warnings.warn(
            "The method Database.files() is deprecated, use Database.objects() for more powerful object retrieval",
            DeprecationWarning)

        def make_path(stem, directory, extension):
            if not extension: extension = ''
            if directory: return os.path.join(directory, stem + extension)
            return stem + extension

        # check if groups set are valid
        VALID_GROUPS = self.groups
        groups = self.check_validity(groups, "group", VALID_GROUPS,
                                     VALID_GROUPS)

        # by default, do NOT grab enrollment data from the database
        VALID_CLASSES = self.classes
        VALID_TYPES = self.types
        if cls == None and types != None:  # types are strictly specified which means we don't need the calss of real accesses
            cls = ('attack', )
        else:
            cls = self.check_validity(cls, "class", VALID_CLASSES,
                                      ('real', 'attack'))

        # check if video quality types are valid
        VALID_QUALITIES = self.qualities
        qualities = self.check_validity(qualities, "quality", VALID_QUALITIES,
                                        VALID_QUALITIES)

        # check if attack types are valid

        if cls != (
                'real',
        ):  # if the class is 'real' only, then there is no need for types to be reset to the default (real accesses have no types)
            types = self.check_validity(types, "type", VALID_TYPES,
                                        VALID_TYPES)

        VALID_IDS = self.ids
        ids = self.check_validity(ids, "id", VALID_IDS, VALID_IDS)

        retval = {}
        key = 0

        db_mappings = {
            'real_normal': '1',
            'real_low': '2',
            'real_high': 'HR_1',
            'warped_normal': '3',
            'warped_low': '4',
            'warped_high': 'HR_2',
            'cut_normal': '5',
            'cut_low': '6',
            'cut_high': 'HR_3',
            'video_normal': '7',
            'video_low': '8',
            'video_high': 'HR_4'
        }

        # identitites in the training set are assigned ids 1-20, identities in the test set are assigned ids 21-50
        for i in ids:
            for g in groups:
                if (g == 'train' and i > 20) or (g == 'test' and i <= 20):
                    continue
                cur_id = i
                if g == 'test':
                    cur_id = i - 20
                    # the id within the group subset
                folder_name = g + '_release'
                for q in qualities:
                    if cls == ('real', ) and types != None:
                        continue
                        # category real + any type does not exist
                    for c in cls:
                        if c == 'real':  # the class real doesn't have any different types, only the attacks can be of different type
                            name = os.path.join(folder_name, "%d" % cur_id,
                                                db_mappings[c + '_' + q])
                            retval[key] = make_path(name, directory, extension)
                            key = key + 1
                        else:
                            for t in types:
                                name = os.path.join(folder_name, "%d" % cur_id,
                                                    db_mappings[t + '_' + q])
                                retval[key] = make_path(
                                    name, directory, extension)
                                key = key + 1

        return retval

[docs]    def objects(self,
                ids=[],
                groups=None,
                cls=None,
                qualities=None,
                types=None):
        """Returns a list of unique :py:class:`.File` objects for the specific query by the user.

    Keyword Parameters:

    ids
      The id of the client whose videos need to be retrieved. Should be an integer number in the range 1-50 (the total number of client is 50

    groups
      One of the protocolar subgroups of data as specified in the tuple groups, or a
      tuple with several of them.  If you set this parameter to an empty string
      or the value None, we use reset it to the default which is to get all.

    cls
      Either "attack", "real" or a combination of those (in a
      tuple). Defines the class of data to be retrieved.  If you set this
      parameter to an empty string or the value None, it will be set to the tuple ("real", "attack").

    qualities
      Either "low", "normal" or "high" or any combination of those (in a
      tuple). Defines the qualities of the videos in the database that are going to be used. If you set this
      parameter to the value None, the videos of all qualities are returned ("low", "normal", "high").

    types
      Either "warped", "cut" or "video" or any combination of those (in a
      tuple). Defines the types of attack videos in the database that are going to be used. If you set this
      parameter to the value None, the videos of all the attack types are returned ("warped", "cut", "video").

    Returns: A list of :py:class:`.File` objects.
    """

        # check if groups set are valid
        VALID_GROUPS = self.groups
        groups = self.check_validity(groups, "group", VALID_GROUPS,
                                     VALID_GROUPS)

        # by default, do NOT grab enrollment data from the database
        VALID_CLASSES = self.classes
        VALID_TYPES = self.types
        if cls == None and types != None:  # types are strictly specified which means we don't need the calss of real accesses
            cls = ('attack', )
        else:
            cls = self.check_validity(cls, "class", VALID_CLASSES,
                                      ('real', 'attack'))

        # check if video quality types are valid
        VALID_QUALITIES = self.qualities
        qualities = self.check_validity(qualities, "quality", VALID_QUALITIES,
                                        VALID_QUALITIES)

        # check if attack types are valid

        if cls != (
                'real',
        ):  # if the class is 'real' only, then there is no need for types to be reset to the default (real accesses have no types)
            types = self.check_validity(types, "type", VALID_TYPES,
                                        VALID_TYPES)

        VALID_IDS = self.ids
        ids = self.check_validity(ids, "id", VALID_IDS, VALID_IDS)

        retval = []

        db_mappings = {
            'real_normal': '1',
            'real_low': '2',
            'real_high': 'HR_1',
            'warped_normal': '3',
            'warped_low': '4',
            'warped_high': 'HR_2',
            'cut_normal': '5',
            'cut_low': '6',
            'cut_high': 'HR_3',
            'video_normal': '7',
            'video_low': '8',
            'video_high': 'HR_4'
        }

        # identitites in the training set are assigned ids 1-20, identities in the test set are assigned ids 21-50
        for i in ids:
            for g in groups:
                if (g == 'train' and i > 20) or (g == 'test' and i <= 20):
                    continue
                cur_id = i
                if g == 'test':
                    cur_id = i - 20
                    # the id within the group subset
                folder_name = g + '_release'
                for q in qualities:
                    if cls == ('real', ) and types != None:
                        continue
                        # category real + any type does not exist
                    for c in cls:
                        if c == 'real':  # the class real doesn't have any different types, only the attacks can be of different type
                            filename = os.path.join(folder_name, "%d" % cur_id,
                                                    db_mappings[c + '_' + q])
                            retval.append(File(filename, c, g))
                        else:
                            for t in types:
                                filename = os.path.join(
                                    folder_name, "%d" % cur_id,
                                    db_mappings[t + '_' + q])
                                retval.append(File(filename, c, g))
        return retval

[docs]    def cross_valid_gen(self, numpos, numneg, numfolds=10, outfilename=None):
        """ Performs N-fold cross-validation on a given number of samples. Generates the indices of the validation subset for N folds, and writes them into a text file (the indices of the training samples are easy to compute once the indices of the validation subset are known). This method is intended for 2-class classification problems, therefore the number of both positive and negative samples should be given at the beginning. The method generates validation indices for both positive and negative samples separately. Each row of the output file are the validation indices of one fold; validation indices for the positive class are in the odd lines, and validation indices for the negative class are in the even lines.

    Keyword parameters:

    numpos
      Number of positive samples

    numneg
      Number of negative samples

    numfold
      Number of folds

    outfilename
      The filename of the output file
    """
        if outfilename == None:
            outfilename = self.get_file(
                os.path.join(self.foldsdir, 'cross_valid.txt'))
        f = open(outfilename, 'w')

        def cross_valid(numsamples, numfolds):
            ''' The actual cross-validation function, returns the validation indices in a tab-delimited null-terminated string'''
            from random import shuffle
            X = list(range(0, numsamples))
            shuffle(X)
            retval = []
            for k in range(numfolds):
                tr = [X[i] for i in range(0, numsamples) if i % numfolds != k]
                vl = [X[i] for i in range(0, numsamples) if i % numfolds == k]
                valid = ""
                for ind in vl:
                    valid += "%d\t" % ind
                retval.append(valid)
            return retval

        valid_pos = cross_valid(
            numpos, numfolds)  # the validation indices of the positive set
        valid_neg = cross_valid(
            numneg, numfolds)  # the validation indices of the negative set
        # it is enough to save just the validation indices, training indices are all the rest
        for i in range(0, numfolds):
            f.write(
                valid_pos[i] + '\n'
            )  # write the validation indices for the real samples (every odd line in the file)
            f.write(
                valid_neg[i] + '\n'
            )  # write the validation indices for the attack samples (every even line in the file)
        f.close()
        return 0

[docs]    def cross_valid_read(self):
        """ Reads the cross-validation indices from a file and returns two lists of validation indices: for the positive and for the negative class. Each list actually consists of sublists; one sublist with validation indices for each fold.

    Keyword parameters:

  """
        infilename = self.get_file(
            os.path.join(self.foldsdir, 'cross_valid.txt'))
        lines = open(infilename, 'r').readlines()
        subsets_pos = []
        subsets_neg = []
        linenum = 1
        for line in lines:
            ind_list = [int(i) for i in line.rstrip('\n\t').split('\t')]
            if linenum % 2 == 1:
                subsets_pos.append(
                    ind_list
                )  # odd lines: validation indices for the positive class
            else:
                subsets_neg.append(
                    ind_list
                )  # even lines: validation indices for the negative class
            linenum += 1
        return subsets_pos, subsets_neg

[docs]    def cross_valid_foldfiles(self,
                              cls,
                              types=None,
                              fold_no=0,
                              directory=None,
                              extension=None):
        """ Returns two dictionaries: one with the names of the files of the validation subset in one fold, and one with the names of the files in the training subset of that fold. The number of the cross_validation fold is given as a parameter.

    .. deprecated:: 1.1.0

      This function is *deprecated*, use :py:meth:`.Database.objects` instead.

    Keyword parameters:

    cls
      The class of the samples: 'real' or 'attack'

    types
      Type of the database that is going to be used: 'warped', 'cut' or 'video' or a tuple of these

    fold_no
      Number of the fold

    directory
      This parameter will be prepended to all the filenames which are going to be returned by this procedure

    extension
      This parameter will be appended to all the filenames which are going to be returned by this procedure
  """

        import warnings
        warnings.warn(
            "The method Database.cross_valid_foldfiles() is deprecated, use Database.cross_valid_foldobjects() for more powerful object retrieval",
            DeprecationWarning)

        VALID_TYPES = self.types

        if cls == 'real':
            infilename = self.get_file(os.path.join(self.foldsdir, 'real.txt'))
        else:
            types = self.check_validity(types, "type", VALID_TYPES,
                                        VALID_TYPES)
            if 'warped' in types and 'cut' in types and 'video' in types:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, 'cut_warped_video_attack.txt'))
            elif 'warped' in types and 'cut' in types:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, 'cut_warped_attack.txt'))
            else:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, types[0] + '_attack.txt'))

        lines = open(infilename, 'r').readlines()
        files_val = {
        }  # the keys in the both dictionaries are just pro-forma, for compatibility with other databases
        files_train = {}
        k_val = 0
        k_train = 0  # simple counters

        def make_path(stem, directory, extension):
            if not extension: extension = ''
            if directory: return os.path.join(directory, stem + extension)
            return stem + extension

        for line in lines:
            words = line.rstrip('\n\t').split('\t')
            if int(words[1]) == fold_no:
                files_val[k_val] = make_path(words[0], directory, extension)
                k_val += 1
            else:
                files_train[k_train] = make_path(words[0], directory,
                                                 extension)
                k_train += 1

        return files_val, files_train

[docs]    def cross_valid_foldobjects(self,
                                cls,
                                types=None,
                                qualities=None,
                                fold_no=0):
        """ Returns two dictionaries: one with the names of the files of the validation subset in one fold, and one with the names of the files in the training subset of that fold. The number of the cross_validation fold is given as a parameter.

    Keyword parameters:

    cls
      The class of the samples: 'real' or 'attack'

    types
      Type of the database that is going to be used: 'warped', 'cut' or 'video' or a tuple of these

    qualities
      Either "low", "normal" or "high" or any combination of those (in a
      tuple). Defines the qualities of the videos in the database that are going to be used. If you set this
      parameter to the value None, the videos of all qualities are returned ("low", "normal", "high").

    fold_no
      Number of the fold
  """

        VALID_TYPES = self.types

        VALID_QUALITIES = self.qualities
        qualities = self.check_validity(qualities, "quality", VALID_QUALITIES,
                                        VALID_QUALITIES)

        if cls == 'real':
            infilename = self.get_file(os.path.join(self.foldsdir, 'real.txt'))
        else:
            types = self.check_validity(types, "type", VALID_TYPES,
                                        VALID_TYPES)
            if 'warped' in types and 'cut' in types and 'video' in types:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, 'cut_warped_video_attack.txt'))
            elif 'warped' in types and 'cut' in types:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, 'cut_warped_attack.txt'))
            else:
                infilename = self.get_file(
                    os.path.join(self.foldsdir, types[0] + '_attack.txt'))

        lines = open(infilename, 'r').readlines()
        obj_val = []
        obj_train = []

        for line in lines:
            words = line.rstrip('\n\t').split('\t')

            if int(words[1]) == fold_no:
                f = File(words[0], cls, 'dev')
                if f.get_quality() in qualities:
                    obj_val.append(
                        f
                    )  # the file still belongs to the training set, but is in dev set in cross-validation
            else:
                f = File(words[0], cls, 'train')
                if f.get_quality() in qualities:
                    obj_train.append(f)

        return obj_val, obj_train

[docs]    def save_by_filename(self, filename, obj, directory, extension):
        """Saves a single object supporting the bob save() protocol.

    .. deprecated:: 1.1.0

      This function is *deprecated*, use :py:meth:`.File.save()` instead.

    This method will call save() on the the given object using the correct
    database filename stem for the given filename

    Keyword Parameters:

    filename
      The unique filename under which the object will be saved. Before calling this method, the method files() should be called (with no directory and extension arguments passed) in order to obtain the unique filenames for each of the files to be saved.

    obj
      The object that needs to be saved, respecting the bob save() protocol.

    directory
      This is the base directory to which you want to save the data. The
      directory is tested for existence and created if it is not there with
      os.makedirs()

    extension
      The extension determines the way each of the arrays will be saved.
    """

        import warnings
        warnings.warn(
            "The method Database.save() is deprecated, use the File object directly as returned by Database.objects() for more powerful object manipulation.",
            DeprecationWarning)

        from bob.io.base import save

        fullpath = os.path.join(directory, filename + extension)
        fulldir = os.path.dirname(fullpath)
        utils.makedirs_safe(fulldir)
        save(obj, fullpath)


[docs]def get_config():
    """Returns a string containing the configuration information.
  """
    import bob.extension
    return bob.extension.get_config(__name__)


# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]