Source code for bob.db.putvein.query

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""
This module provides the Dataset interface allowing the user to query the
PUT Vein database in the most obvious ways.
"""

import os
import six
from .models import File


[docs]class Database(object):

    def __init__(self):
        self.protocols = ('L_4', 'R_4', 'LR_4', 'RL_4', "R_BEAT_4",
                          'L_1', 'R_1', 'LR_1', 'RL_1', "R_BEAT_1")
        self.purposes = ('enroll', 'probe')
        self.kinds = ('palm', 'wrist')
        self.groups = ('train', 'dev', 'eval')


[docs]    def check_validity(self, l, obj, valid, default):
        """Checks validity of user input data against a set of valid values"""
        if not l:
            return default
        elif isinstance(l, six.string_types) or isinstance(l, six.integer_types):
            return self.check_validity((l,), obj, valid, default)

        for k in l:
            if k not in valid:
                raise RuntimeError('Invalid %s "%s". Valid values are %s, or lists/tuples of those' % (obj, k, valid))

        return l


[docs]    def client_id_from_model_id(self, model_id):
        return int(model_id.split("_")[0])


[docs]    def model_ids(self, protocol=None, groups=None, kinds=None):
        """Returns a list of model ids for the specific query by the user."""

        if protocol not in self.protocols:
            raise RuntimeError('Invalid protocol "%s". Valid values are %s' % (protocol, self.protocols))

        files = self.objects(protocol=protocol, groups=groups, kinds=kinds)

        ids = []
        splitted_protocol = protocol.split("_")

        if splitted_protocol[-1] == "4":
            for f in files:
                ids.append(str(f.client_id))
        else:  # because we test protocol name before, we now that only
            # possibility is slitted_protocol[-1] == "1"
            for f in files:
                ids.append("{}_{}".format(f.client_id, f.nr))

        ids = list(set(ids))
        return ids


[docs]    def check_ids_validity(self, ids, max_value):
        """Checks validity of client ids"""
        if not ids:
            return range(1, max_value + 1)

        invalid_ids = [x for x in ids if (x > max_value) or (x < 1)]
        if invalid_ids:
            raise RuntimeError('Invalid ids "%s". "\
            "Valid values are between 1 and %d' % (invalid_ids, max_value))

        return ids


[docs]    def objects(self, protocol=None, purposes=None, model_ids=None, groups=None, kinds=None):
        """
Returns a set of Files for the specific query by the user.

        Keyword Parameters:

        protocol
          One of the PUT protocols. As on 08.02.2017 protocols are:

              - ``L_4``,
              - ``R_4``,
              - ``LR_4``,
              - ``RL_4``,
              - ``R_BEAT_4``,
              - ``L_1``,
              - ``R_1``,
              - ``LR_1``
              - ``RL_1``,
              - ``R_BEAT_1``.

          Protocols still contains the original protocol ('L', 'R', 'LR', 'RL')
          data, the difference is, whether each enroll model is constructed
          using all hand's images (4), or each enroll image is used as a model.
          E.g.:

          The ``R_1`` protocol, if one kind (palm / wrist) is used, each group
          (dev / eval) consists of 25*4 enroll images (each image treated as a
          separate model) and 25*8 probe images, resulting in:

              - 25*4*8 = 800 genuine comparisons,
              - (25*4)*(24*8) = 19200 zero-effort impostor comparisons,
              - 25*4*25*8 = 20'000 total comparisons.

          The ``R_4`` protocol consists of the same data as ``R_1`` but now 4
          images makes enroll model resulting in 25 enroll models per dev /
          eval group. Meaning there are:

              - 25*8 = 200 genuine SCORES;
              - 25*(24*8) = 4800 zero-effort impostor SCORES;
              - 25*25*8 = 5'000 total SCORES.

          Protocols ``R_BEAT_1`` and ``R_BEAT_4`` are new **quick test**
          protocols for BOB and BEAT platforms.

          The ``R_BEAT_1`` protocol consists only of 2 persons in dev / eval
          datasets so that databse could be effectively use for algorithm
          testing. If we use only one kind of data (palm / wrist), than for
          each group (dev / eval) we have 4*2 enroll images (each image makes
          a separate model) and 2*8 probe images resulting in:

              - 2*4*8 = 64 genuine compressions;
              - 2*4*8 = 64 zero-effort impostor compressions;
              - 4*2*2*8 = 128 total comparisons.

          The ``R_BEAT_4`` consists of the same data as ``R_BEAT_1`` but now 4
          images makes enroll model resulting in 2 enroll models per dev / eval
          group. Meaning there are:

              - 2*8 = 16 genuine SCORES;
              - 2*8 = 16 zero-effort impostor SCORES;
              - 2*2*8 = 32 total SCORES.

          **You can find more information in packages documentation.**

        purposes
          The purposes required to be retrieved ('enroll', 'probe') or a tuple
          with several of them. If 'None' is given (this is the default), it is
          considered the same as a tuple with all possible values. This field
          is ignored for the data from the "train" group.

        model_ids
          Only retrieves the files for provided of model ids.
          To enable database compatibility with ``bob.bio.vein``, ``model_ids``
          can be ``None`` or list with length ``1`` (user can't pass multiple
          ``model_ids``)
          The ``model_ids`` is a string.  If 'None' is given (this is the
          default), no filter over the ``model_ids`` is performed.
          Be careful - model ID correspond to the ENROLL data set objects
          (files), don't try to make a specific 'probe' data set query using
          the ``model_ids`` -- in any way entire probe data set will be
          returned.

        groups
          One of the groups ('train', 'dev', 'eval') or a tuple with several
          of them. If 'None' is given (this is the default), it is considered
          the same as a tuple with all possible values.

        kinds
          One of the kinds of data ('palm', 'wrist'), or a tuple with several of
          them.  If 'None' is given (this is the default), it is considered the
          same as a tuple with all possible values.

        Returns: A list of ``File`` objects.
        """
# ################## WORKAROUNDS TO CONSTRUCT MODELS FROM 1 OR 4 IMAGES########
        # this part of the code is a workaround to make the ``putvein``
        # database work with the ``bob.bio.vein``.
        # The ``new`` implementation allows protocols ending with ``4`` and
        # ``1`` for different model creation. Also it allows corresponding
        # ``model_ids`` to make the ``bob.bio.vein`` model concept work.

        # if only asking for 'probes', then ignore model_ids as all of our
        # protocols do a full probe-model scan
        if (purposes and len(purposes) == 1 and 'probe' in purposes) or \
        (purposes and len(purposes) == 1 and 'train' in purposes) or \
        (purposes and purposes == 'probe') or \
        (purposes and purposes == 'train'):
            model_ids = None

        # Check the protocol:
        if protocol not in self.protocols:
            raise RuntimeError('Invalid protocol "%s". Valid values are %s' % (protocol, self.protocols))

        # overrides ``new type`` PROTOCOL name to old type implementation,
        # where allowed PROTOCOL NAMES were 'R', 'L', 'LR', 'RL'.
        splitted_protocol = protocol.split("_")
        protocol = splitted_protocol[0]


        # deals with MODEL_IDS and converts them to old type IDS (client
        # number, an integer between 1 and 100)
        if model_ids == None:
            ids = None
            # we don't perform search by the model, so we return all files:
            nrs = range(1, 4+1)
        elif splitted_protocol[-1] == "1":
            if len(model_ids) == 1:
                ids = [int(model_ids[0].split("_")[0])]
                nrs = [int(model_ids[0].split("_")[1])]
            else:
                raise IOError("Unfortunately if ``model_ids`` are used, you can"
                              " pass just one ID")
        else: # only possibility is that splitted_protocol[-1] == "4":
            nrs = range(1, 4+1)
            ids = []
            for id in model_ids:
                ids.append(int(id.split("_")[0]))

        # extra logic for the ``BEAT`` test protocols:
        if "BEAT" in splitted_protocol:
            if ids == None:
                ids = [1,2,26,27]
            else:
                ids = self.check_ids_validity(ids, 50)
        elif protocol in ('L', 'R'):
            ids = self.check_ids_validity(ids, 50)
        else:
            ids = self.check_ids_validity(ids, 100)
# END OF THE CUSTOM LOGIC.
###############################################################################

        purposes = self.check_validity(purposes, "purposes", self.purposes, self.purposes)
        groups = self.check_validity(groups, "groups", self.groups, self.groups)
        kinds = self.check_validity(kinds, "kinds", self.kinds, self.kinds)

        # Create the result list of files
        result = []

        if protocol in ('L', 'R'):
            filtered_ids = [ (x, x) for x in ids ]
            result.extend(self._get_protocol(protocol, purposes, groups, filtered_ids, kinds, False, True, nrs))
        elif protocol == 'LR':
            if ('train' in groups) or ('dev' in groups):
                filtered_ids = [ (x, x) for x in ids if x <= 50 ]
                result.extend(self._get_protocol('L', purposes, groups, filtered_ids, kinds, False, False, nrs))
            if 'eval' in groups:
                filtered_ids = [ (x, x - 50) for x in ids if x > 50 ]
                result.extend(self._get_protocol('R', purposes, groups, filtered_ids, kinds, True, False, nrs))
        elif protocol == 'RL':
            if ('train' in groups) or ('dev' in groups):
                filtered_ids = [ (x, x) for x in ids if x <= 50 ]
                result.extend(self._get_protocol('R', purposes, groups, filtered_ids, kinds, False, False, nrs))
            if 'eval' in groups:
                filtered_ids = [ (x, x - 50) for x in ids if x > 50 ]
                result.extend(self._get_protocol('L', purposes, groups, filtered_ids, kinds, True, False, nrs))

        return result


    def _get_protocol(self, protocol, purposes, groups, ids, kinds, mirrored, split, nrs):
        result = []

        if protocol == 'L':
            side = 'Left'
        else:
            side = 'Right'

        train_processed = False

        for group in groups:

            for purpose in purposes:
                if group == 'train':
                    if train_processed:
                        continue
                    series = [1, 2, 3]
                    train_processed = True
                elif purpose == 'enroll':
                    series = [1]
                else:
                    series = [2, 3]

                if split:
                    if group == 'eval':
                        filtered_ids = [ x for x in ids if x[1] >= 26 ]
                    else:
                        filtered_ids = [ x for x in ids if x[1] <= 25 ]
                else:
                    filtered_ids = ids

                for kind in kinds:
                    kind = kind[0].upper() + kind[1:]
                    result.extend(self._get_files(kind, side, filtered_ids, series, mirrored, nrs))

        return result


    def _get_files(self, kind, side, filtered_ids, series, mirrored, nrs):
        result = []

        for id in filtered_ids:
            for serie in series:
                #for n in range(1, 5):
                for n in nrs:
                    result.append(
                        File(
                            os.path.join(
                                kind,
                                'o_%03d' % id[1],
                                side,
                                'Series_%d' % serie,
                                '%s_o%03d_%s_S%d_Nr%d.bmp' % (kind[0], id[1], side[0], serie, n)
                            ),
                            id[0],
                            mirrored
                        )
                    )

        return result

# =============================================================================
# functions for BEAT platform.
# =============================================================================
[docs]    def file_model_id(self, file, protocol):
        """
        ``file_model_id`` - is a function made for the ``BEAT`` platform.
        Function outputs the ``model_id`` according to the protocol used.

         Keyword Parameters:

        file
          The ``bob.db.putvein`` file object

        protocol
          The ``bob.db.putvein`` protocol used - one of the protocols:

              - 'L_4',
              - 'R_4',
              - 'LR_4',
              - 'RL_4',
              - 'R_BEAT_4',
              - 'L_1',
              - 'R_1',
              - 'LR_1',
              - 'RL_1',
              - 'R_BEAT_1'.

        Returns: A model_id -- a string that represents the file ``model_id``
        according to the protocol used.
        """

        # Check the protocol:
        if protocol not in self.protocols:
            raise RuntimeError('Invalid protocol "{}". Valid values are {}'.\
                               format(protocol, self.protocols))

        if protocol.endswith("4"):
            model_id = str(file.client_id)

        else:  # because we check protocol names, only option that remains is
            #    protocol.endswith("1"):
            model_id = "{}_{}".format(file.client_id, file.nr)

        return model_id