Source code for bob.bio.base.database.filelist.query

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import os
import six

import bob.db.base

from .. import ZTBioDatabase
from .. import BioFile

from .models import ListReader

import logging
logger = logging.getLogger('bob.bio.base')


class FileListBioDatabase(ZTBioDatabase):
    """This class provides a user-friendly interface to databases that are given as file lists.

    Parameters
    ----------

    filelists_directory : str
      The directory that contains the filelists defining the protocol(s). If you use the protocol
      attribute when querying the database, it will be appended to the base directory, such that
      several protocols are supported by the same class instance of `bob.bio.base`.

    name : str
      The name of the database

    protocol : str
      The protocol of the database. This should be a folder inside ``filelists_directory``.

    bio_file_class : ``class``
      The class that should be used to return the files.
      This can be :py:class:`bob.bio.base.database.BioFile`, :py:class:`bob.bio.spear.database.AudioBioFile`, :py:class:`bob.bio.face.database.FaceBioFile`, or anything similar.

    original_directory : str or ``None``
      The directory, where the original data can be found.

    original_extension : str or [str] or ``None``
      The filename extension of the original data, or multiple extensions.

    annotation_directory : str or ``None``
      The directory, where additional annotation files can be found.

    annotation_extension : str or ``None``
      The filename extension of the annotation files.

    annotation_type : str or ``None``
      The type of annotation that can be read.
      Currently, options are ``'eyecenter', 'named', 'idiap'``.
      See :py:func:`bob.db.base.read_annotation_file` for details.

    dev_sub_directory : str or ``None``
      Specify a custom subdirectory for the filelists of the development set (default is ``'dev'``)

    eval_sub_directory : str or ``None``
      Specify a custom subdirectory for the filelists of the development set (default is ``'eval'``)

    world_filename : str or ``None``
      Specify a custom filename for the training filelist (default is ``'norm/train_world.lst'``)

    optional_world_1_filename : str or ``None``
      Specify a custom filename for the (first optional) training filelist
      (default is ``'norm/train_optional_world_1.lst'``)

    optional_world_2_filename : str or ``None``
      Specify a custom filename for the (second optional) training filelist
      (default is ``'norm/train_optional_world_2.lst'``)

    models_filename : str or ``None``
      Specify a custom filename for the model filelists (default is ``'for_models.lst'``)

    probes_filename : str or ``None``
      Specify a custom filename for the probes filelists (default is ``'for_probes.lst'``)

    scores_filename : str or ``None``
      Specify a custom filename for the scores filelists (default is ``'for_scores.lst'``)

    tnorm_filename : str or ``None``
      Specify a custom filename for the T-norm scores filelists (default is ``'for_tnorm.lst'``)

    znorm_filename : str or ``None``
      Specify a custom filename for the Z-norm scores filelists (default is ``'for_znorm.lst'``)

    use_dense_probe_file_list : bool or None
      Specify which list to use among ``probes_filename`` (dense) or ``scores_filename``.
      If ``None`` it is tried to be estimated based on the given parameters.

    keep_read_lists_in_memory : bool
      If set to ``True`` (the default), the lists are read only once and stored in memory.
      Otherwise the lists will be re-read for every query (not recommended).
    """

    def __init__(
            self,
            filelists_directory,
            name,
            protocol=None,
            bio_file_class=BioFile,

            original_directory=None,
            original_extension=None,
            annotation_directory=None,
            annotation_extension='.pos',
            annotation_type='eyecenter',

            dev_sub_directory=None,
            eval_sub_directory=None,

            world_filename=None,
            optional_world_1_filename=None,
            optional_world_2_filename=None,
            models_filename=None,

            # For probing, use ONE of the two score file lists:
            probes_filename=None,  # File containing the probe files -> dense model/probe score matrix
            scores_filename=None,  # File containing list of model and probe files -> sparse model/probe score matrix
            # For ZT-Norm:
            tnorm_filename=None,
            znorm_filename=None,
            use_dense_probe_file_list=None,
            # if both probe_filename and scores_filename is given, what kind of list should be used?
            keep_read_lists_in_memory=True,
            # if set to True (the RECOMMENDED default) lists are read only once and stored in memory.
            **kwargs
    ):
        """Initializes the database with the file lists from the given base directory,
        and the given sub-directories and file names (which default to useful values if not given)."""

        super(FileListBioDatabase, self).__init__(
            name=name,
            protocol=protocol,
            original_directory=original_directory,
            original_extension=original_extension,
            annotation_directory=annotation_directory,
            annotation_extension=annotation_extension,
            annotation_type=annotation_type,
            **kwargs)
        # extra args for pretty printing
        self._kwargs.update(dict(
            filelists_directory=filelists_directory,
            dev_sub_directory=dev_sub_directory,
            eval_sub_directory=eval_sub_directory,
            world_filename=world_filename,
            optional_world_1_filename=optional_world_1_filename,
            optional_world_2_filename=optional_world_2_filename,
            models_filename=models_filename,
            probes_filename=probes_filename,
            scores_filename=scores_filename,
            tnorm_filename=tnorm_filename,
            znorm_filename=znorm_filename,
            use_dense_probe_file_list=use_dense_probe_file_list,
            # if both probe_filename and scores_filename are given, what kind
            # of list should be used?
            keep_read_lists_in_memory=keep_read_lists_in_memory,
        ))
        # self.original_directory = original_directory
        # self.original_extension = original_extension
        self.bio_file_class = bio_file_class
        self.keep_read_lists_in_memory=keep_read_lists_in_memory
        self.list_readers = {}

        self.m_base_dir = os.path.abspath(filelists_directory)
        if not os.path.isdir(self.m_base_dir):
            raise RuntimeError('Invalid directory specified %s.' % (self.m_base_dir))

        # sub-directories for dev and eval set:
        self.m_dev_subdir = dev_sub_directory if dev_sub_directory is not None else 'dev'
        self.m_eval_subdir = eval_sub_directory if eval_sub_directory is not None else 'eval'

        # training list:     format:   filename client_id
        self.m_world_filename = world_filename if world_filename is not None else os.path.join('norm',
                                                                                               'train_world.lst')
        # optional training list 1:     format:   filename client_id
        self.m_optional_world_1_filename = optional_world_1_filename if optional_world_1_filename is not None else os.path.join(
            'norm', 'train_optional_world_1.lst')
        # optional training list 2:     format:   filename client_id
        self.m_optional_world_2_filename = optional_world_2_filename if optional_world_2_filename is not None else os.path.join(
            'norm', 'train_optional_world_2.lst')
        # model list:        format:   filename model_id client_id
        self.m_models_filename = models_filename if models_filename is not None else 'for_models.lst'
        # scores list:       format:   filename model_id claimed_client_id client_id
        self.m_scores_filename = scores_filename if scores_filename is not None else 'for_scores.lst'
        # probe list:        format:   filename client_id
        self.m_probes_filename = probes_filename if probes_filename is not None else 'for_probes.lst'
        # T-Norm models      format:   filename model_id client_id
        self.m_tnorm_filename = tnorm_filename if tnorm_filename is not None else 'for_tnorm.lst'
        # Z-Norm files       format:   filename client_id
        self.m_znorm_filename = znorm_filename if znorm_filename is not None else 'for_znorm.lst'

        self.m_use_dense_probe_file_list = use_dense_probe_file_list


    def _list_reader(self, protocol):
        if protocol not in self.list_readers:
            if protocol is not None:
                protocol_dir = os.path.join(self.get_base_directory(), protocol)
                if not os.path.isdir(protocol_dir):
                    raise ValueError("The directory %s for the given protocol '%s' does not exist" % (protocol_dir, protocol))
            self.list_readers[protocol] = ListReader(self.keep_read_lists_in_memory)

        return self.list_readers[protocol]

    def _make_bio(self, files):
        return [self.bio_file_class(client_id=f.client_id, path=f.path, file_id=f.id) for f in files]


[docs]    def all_files(self, groups=['dev'], add_zt_files=True):
        """Returns all files for the given group. The internally stored protocol is used, throughout.

        Parameters
        ----------

        groups : [str]
          A list of groups to retrieve the files for.

        add_zt_files : bool
          If selected, also files for ZT-norm scoring will be added.
          Please select this option only if this dataset provides ZT-norm files, see :py:meth:`implements_zt`.

        Returns
        -------

        [BioFile]
          A list of all files that fulfill your query.
        """
        files = self.objects(groups, self.protocol, **self.all_files_options)
        # add all files that belong to the ZT-norm
        for group in groups:
            if group == 'world':
                continue
            if add_zt_files:
                if self.implements_zt(self.protocol, group):
                    files += self.tobjects(group, self.protocol)
                    files += self.zobjects(group, self.protocol, **self.z_probe_options)
                else:
                    logger.warn("ZT score files are requested, but no such files are defined in group %s for protocol %s", group, self.protocol)

        return self.sort(self._make_bio(files))


[docs]    def groups(self, protocol=None, add_world=True, add_subworld=True):
        """This function returns the list of groups for this database.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol for which the groups should be retrieved.
          If ``None``, the internally stored protocol is used.

        add_world : bool
          Add the world groups?

        add_subworld : bool
          Add the sub-world groups? Only valid, when ``add_world=True``

        Returns
        -------

        [str]
          A list of groups
        """
        groups = []
        protocol = protocol or self.protocol
        if protocol is not None:
            if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_dev_subdir)):
                groups.append('dev')
            if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_eval_subdir)):
                groups.append('eval')
            if add_world:
                if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_world_filename)):
                    groups.append('world')
            if add_world and add_subworld:
                if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_optional_world_1_filename)):
                    groups.append('optional_world_1')
                if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_optional_world_2_filename)):
                    groups.append('optional_world_2')
        else:
            if os.path.isdir(os.path.join(self.get_base_directory(), self.m_dev_subdir)):
                groups.append('dev')
            if os.path.isdir(os.path.join(self.get_base_directory(), self.m_eval_subdir)):
                groups.append('eval')
            if add_world:
                if os.path.isfile(os.path.join(self.get_base_directory(), self.m_world_filename)):
                    groups.append('world')
            if add_world and add_subworld:
                if os.path.isfile(os.path.join(self.get_base_directory(), self.m_optional_world_1_filename)):
                    groups.append('optional_world_1')
                if os.path.isfile(os.path.join(self.get_base_directory(), self.m_optional_world_2_filename)):
                    groups.append('optional_world_2')
        return groups


[docs]    def implements_zt(self, protocol=None, groups=None):
        """Checks if the file lists for the ZT score normalization are available.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol for which the groups should be retrieved.

        groups : str or [str] or ``None``
          The groups for which the ZT score normalization file lists should be checked ``('dev', 'eval')``.

        Returns
        -------

        bool
          ``True`` if the all file lists for ZT score normalization exist, otherwise ``False``.
        """
        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        for group in groups:
            for t in ['for_tnorm', 'for_znorm']:
                if not os.path.exists(self._get_list_file(group, t, protocol)):
                    return False
        # all files exist
        return True

[docs]    def uses_dense_probe_file(self, protocol):
        """Determines if a dense probe file list is used based on the existence of parameters."""
        # return, whatever was specified in constructor, if not None
        if self.m_use_dense_probe_file_list is not None:
            return self.m_use_dense_probe_file_list

        # check the existence of the files
        probes = True
        scores = True
        for group in self.groups(protocol, add_world=False):
            probes = probes and os.path.exists(self._get_list_file(group, type='for_probes', protocol=protocol))
            scores = scores and os.path.exists(self._get_list_file(group, type='for_scores', protocol=protocol))
        # decide, which score files are available
        if probes and not scores:
            return True
        if not probes and scores:
            return False
        raise ValueError("Unable to determine, which way of probing should be used. Please specify.")


[docs]    def get_base_directory(self):
        """Returns the base directory where the filelists defining the database
           are located."""
        return self.m_base_dir

[docs]    def set_base_directory(self, filelists_directory):
        """Resets the base directory where the filelists defining the database
          are located."""
        self.m_base_dir = filelists_directory
        if not os.path.isdir(self.filelists_directory):
            raise RuntimeError('Invalid directory specified %s.' % (self.filelists_directory))

    def _get_list_file(self, group, type=None, protocol=None):
        if protocol:
            base_directory = os.path.join(self.get_base_directory(), protocol)
        else:
            base_directory = self.get_base_directory()
        if group == 'world':
            return os.path.join(base_directory, self.m_world_filename)
        elif group == 'optional_world_1':
            return os.path.join(base_directory, self.m_optional_world_1_filename)
        elif group == 'optional_world_2':
            return os.path.join(base_directory, self.m_optional_world_2_filename)
        else:
            group_dir = self.m_dev_subdir if group == 'dev' else self.m_eval_subdir
            list_name = {'for_models': self.m_models_filename,
                         'for_probes': self.m_probes_filename,
                         'for_scores': self.m_scores_filename,
                         'for_tnorm': self.m_tnorm_filename,
                         'for_znorm': self.m_znorm_filename
                         }[type]
            return os.path.join(base_directory, group_dir, list_name)

[docs]    def client_id_from_model_id(self, model_id, group='dev'):
        """Returns the client id that is connected to the given model id.

        Parameters
        ----------

        model_id : str or ``None``
          The model id for which the client id should be returned.

        groups : str or [str] or ``None``
          (optional) the groups, the client belongs to.
          Might be one or more of ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.
          If groups are given, only these groups are considered.

        protocol : str or ``None``
          The protocol to consider.

        Returns
        -------

        str
          The client id for the given model id, if found.
        """
        protocol = self.protocol
        groups = self.check_parameters_for_validity(group, "group",
                                                    self.groups(protocol),
                                                    default_parameters=self.groups(protocol, add_subworld=False))

        for group in groups:
            model_dict = self._list_reader(protocol).read_models(self._get_list_file(group, 'for_models', protocol), group,
                                                        'for_models')
            if model_id in model_dict:
                return model_dict[model_id]

        raise ValueError("The given model id '%s' cannot be found in one of the groups '%s'" % (model_id, groups))

[docs]    def client_id_from_t_model_id(self, t_model_id, group='dev'):
        """Returns the client id that is connected to the given T-Norm model id.

        Parameters
        ----------

        model_id : str or ``None``
          The model id for which the client id should be returned.

        groups : str or [str] or ``None``
          (optional) the groups, the client belongs to.
          Might be one or more of ``('dev', 'eval')``.
          If groups are given, only these groups are considered.

        Returns
        -------

        str
          The client id for the given model id of a T-Norm model, if found.
        """
        protocol = self.protocol
        groups = self.check_parameters_for_validity(group, "group", self.groups(protocol, add_world=False))

        for group in groups:
            model_dict = self._list_reader(protocol).read_models(self._get_list_file(group, 'for_tnorm', protocol), group,
                                                        'for_tnorm')
            if t_model_id in model_dict:
                return model_dict[t_model_id]

        raise ValueError(
            "The given T-norm model id '%s' cannot be found in one of the groups '%s'" % (t_model_id, groups))

    def __client_id_list__(self, groups, type, protocol=None):
        ids = set()
        protocol = protocol or self.protocol
        # read all lists for all groups and extract the model ids
        for group in groups:
            files = self._list_reader(protocol).read_list(self._get_list_file(group, type, protocol), group, type)
            for file in files:
                ids.add(file.client_id)
        return ids

[docs]    def client_ids(self, protocol=None, groups=None):
        """Returns a list of client ids for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the clients belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.

        Returns
        -------

        [str]
          A list containing all the client ids which have the given properties.
        """

        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group",
                                                    self.groups(protocol),
                                                    default_parameters=self.groups(protocol, add_subworld=False))

        return self.__client_id_list__(groups, 'for_models', protocol)

[docs]    def tclient_ids(self, protocol=None, groups=None):
        """Returns a list of T-Norm client ids for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the clients belong ("dev", "eval").

        Returns
        -------

        [str]
          A list containing all the T-Norm client ids which have the given properties.
        """

        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        return self.__client_id_list__(groups, 'for_tnorm', protocol)

[docs]    def zclient_ids(self, protocol=None, groups=None):
        """Returns a list of Z-Norm client ids for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the clients belong ("dev", "eval").

        Returns
        -------

        [str]
          A list containing all the Z-Norm client ids which have the given properties.
        """

        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        return self.__client_id_list__(groups, 'for_znorm', protocol)

    def __model_id_list__(self, groups, type, protocol=None):
        ids = set()
        protocol = protocol or self.protocol
        # read all lists for all groups and extract the model ids
        for group in groups:
            dict = self._list_reader(protocol).read_models(self._get_list_file(group, type, protocol), group, type)
            ids.update(dict.keys())
        return list(ids)

[docs]    def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs):
        """Returns a list of model ids for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the models belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.

        Returns
        -------

        [str]
          A list containing all the model ids which have the given properties.
        """
        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol=protocol))

        return self.__model_id_list__(groups, 'for_models', protocol)

[docs]    def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs):
        """Returns a list of T-Norm model ids for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the models belong ``('dev', 'eval')``.

        Returns
        -------

        [str]
          A list containing all the T-Norm model ids belonging to the given group.
        """
        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        return self.__model_id_list__(groups, 'for_tnorm', protocol)

[docs]    def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, classes=None, **kwargs):
        """Returns a set of :py:class:`bob.bio.base.database.BioFile` objects for the specific query by the user.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        purposes : str or [str] or ``None``
          The purposes required to be retrieved ``('enroll', 'probe')`` or a tuple
          with several of them. If ``None`` is given (this is the default), it is
          considered the same as a tuple with all possible values. This field is
          ignored for the data from the ``'world', 'optional_world_1', 'optional_world_2'`` groups.

        model_ids : str or [str] or ``None``
          Only retrieves the files for the provided list of model ids (claimed
          client id). If ``None`` is given (this is the default), no filter over
          the model_ids is performed.

        groups : str or [str] or ``None``
          One of the groups ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')`` or a tuple with several of them.
          If ``None`` is given (this is the default), it is considered to be the existing subset of ``('world', 'dev', 'eval')``.

        classes : str or [str] or ``None``
          The classes (types of accesses) to be retrieved ``('client', 'impostor')``
          or a tuple with several of them. If ``None`` is given (this is the
          default), it is considered the same as a tuple with all possible values.

          .. note::
             Classes are not allowed to be specified when 'probes_filename' is used in the constructor.

        Returns
        -------

        [BioFile]
          A list of :py:class:`BioFile` objects considering all the filtering criteria.
        """

        protocol = protocol or self.protocol
        if self.uses_dense_probe_file(protocol) and classes is not None:
            raise ValueError("To be able to use the 'classes' keyword, please use the 'for_scores.lst' list file.")

        purposes = self.check_parameters_for_validity(purposes, "purpose", ('enroll', 'probe'))
        groups = self.check_parameters_for_validity(groups, "group",
                                                    self.groups(protocol),
                                                    default_parameters=self.groups(protocol, add_subworld=False))
        classes = self.check_parameters_for_validity(classes, "class", ('client', 'impostor'))

        if isinstance(model_ids, six.string_types):
            model_ids = (model_ids,)

        # first, collect all the lists that we want to process
        lists = []
        probe_lists = []
        if 'world' in groups:
            lists.append(self._list_reader(protocol).read_list(self._get_list_file('world', protocol=protocol), 'world'))
        if 'optional_world_1' in groups:
            lists.append(self._list_reader(protocol).read_list(self._get_list_file('optional_world_1', protocol=protocol),
                                                      'optional_world_1'))
        if 'optional_world_2' in groups:
            lists.append(self._list_reader(protocol).read_list(self._get_list_file('optional_world_2', protocol=protocol),
                                                      'optional_world_2'))

        for group in ('dev', 'eval'):
            if group in groups:
                if 'enroll' in purposes:
                    lists.append(
                        self._list_reader(protocol).read_list(self._get_list_file(group, 'for_models', protocol=protocol), group, 'for_models'))
                if 'probe' in purposes:
                    if self.uses_dense_probe_file(protocol):
                        probe_lists.append(
                            self._list_reader(protocol).read_list(self._get_list_file(group, 'for_probes', protocol=protocol), group, 'for_probes'))
                    else:
                        probe_lists.append(
                            self._list_reader(protocol).read_list(self._get_list_file(group, 'for_scores', protocol=protocol), group, 'for_scores'))

        # now, go through the lists and filter the elements

        # remember the file ids that are already in the list
        file_ids = set()
        retval = []

        # non-probe files; just filter by model id
        for list in lists:
            for file in list:
                # check if we already have this file
                if file.id not in file_ids:
                    if model_ids is None or file._model_id in model_ids:
                        file_ids.add(file.id)
                        retval.append(file)

        # probe files; filter by model id and by class
        for list in probe_lists:
            if self.uses_dense_probe_file(protocol):
                # dense probing is used; do not filter over the model ids and not over the classes
                # -> just add all probe files
                for file in list:
                    if file.id not in file_ids:
                        file_ids.add(file.id)
                        retval.append(file)

            else:
                # sparse probing is used; filter over model ids and over the classes
                for file in list:
                    # filter by model id
                    if model_ids is None or file._model_id in model_ids:
                        # filter by class
                        if ('client' in classes and file.client_id == file.claimed_id) or \
                                ('impostor' in classes and file.client_id != file.claimed_id):
                            # check if we already have this file
                            if file.id not in file_ids:
                                file_ids.add(file.id)
                                retval.append(file)

        return self._make_bio(retval)

[docs]    def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs):
        """Returns a list of :py:class:`bob.bio.base.database.BioFile` objects for enrolling T-norm models for score normalization.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        model_ids : str or [str] or ``None``
          Only retrieves the files for the provided list of model ids (claimed
          client id). If ``None`` is given (this is the default), no filter over
          the model_ids is performed.

        groups : str or [str] or ``None``
          The groups to which the models belong ``('dev', 'eval')``.

        Returns
        -------

        [BioFile]
          A list of :py:class:`BioFile` objects considering all the filtering criteria.
        """
        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        if (isinstance(model_ids, six.string_types)):
            model_ids = (model_ids,)

        # iterate over the lists and extract the files
        # we assume that there is no duplicate file here...
        retval = []
        for group in groups:
            for file in self._list_reader(protocol).read_list(self._get_list_file(group, 'for_tnorm', protocol), group,
                                                     'for_tnorm'):
                if model_ids is None or file._model_id in model_ids:
                    retval.append(file)

        return self._make_bio(retval)

[docs]    def zobjects(self, groups=None, protocol=None, **kwargs):
        """Returns a list of :py:class:`BioFile` objects to perform Z-norm score normalization.

        Parameters
        ----------

        protocol : str or ``None``
          The protocol to consider

        groups : str or [str] or ``None``
          The groups to which the clients belong ``('dev', 'eval')``.

        Returns
        -------

        [BioFile]
          A list of File objects considering all the filtering criteria.
        """

        protocol = protocol or self.protocol
        groups = self.check_parameters_for_validity(groups, "group", self.groups(protocol, add_world=False))

        # iterate over the lists and extract the files
        # we assume that there is no duplicate file here...
        retval = []
        for group in groups:
            retval.extend([file for file in
                           self._list_reader(protocol).read_list(self._get_list_file(group, 'for_znorm', protocol), group,
                                                        'for_znorm')])

        return self._make_bio(retval)

[docs]    def annotations(self, file):
        """Reads the annotations for the given file id from file and returns them in a dictionary.

        Parameters
        ----------

        file : BioFile
          The BioFile object for which the annotations should be read.

        Returns
        -------

        dict
          The annotations as a dictionary, e.g.: ``{'reye':(re_y,re_x), 'leye':(le_y,le_x)}``
        """
        if self.annotation_directory is None:
            return None

        # since the file id is equal to the file name, we can simply use it
        annotation_file = os.path.join(self.annotation_directory, file.id + self.annotation_extension)

        # return the annotations as read from file
        return bob.db.base.read_annotation_file(annotation_file, self.annotation_type)


[docs]    def original_file_name(self, file, check_existence=True):
        """Returns the original file name of the given file.

        This interface supports several original extensions, so that file lists can contain images
        of different data types.

        When multiple original extensions are specified, this function will check the existence of any of
        these file names, and return the first one that actually exists.
        In this case, the ``check_existence`` flag is ignored.

        Parameters
        ----------

        file : BioFile
          The BioFile object for which the file name should be returned.

        check_existence : bool
          Should the existence of the original file be checked?
          (Ignored when multiple original extensions were specified in the constructor.)

        Returns
        -------

        str
          The full path of the original data file.
        """

        if isinstance(self.original_extension, six.string_types):
            # extract file name
            file_name = file.make_path(self.original_directory, self.original_extension)
            if check_existence and os.path.exists(file_name):
                return file_name

        # check all registered extensions
        for extension in self.original_extension:
            file_name = file.make_path(self.original_directory, extension)
            if check_existence and os.path.exists(file_name):
                return file_name

        # None of the extensions matched
        raise IOError("File '%s' does not exist with any of the extensions '%s'" % (
            file.make_path(self.original_directory, None), self.original_extension))