Coverage for src/bob/bio/base/database/filelist/query.py: 93%
248 statements
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3import logging
4import os
6from bob.bio.base.database.legacy import check_parameters_for_validity
7from bob.bio.base.utils.annotations import read_annotation_file
9from .. import BioFile, ZTBioDatabase
10from .models import ListReader
12logger = logging.getLogger("bob.bio.base")
15class FileListBioDatabase(ZTBioDatabase):
16 """This class provides a user-friendly interface to databases that are given as file lists.
18 Parameters
19 ----------
21 filelists_directory : str
22 The directory that contains the filelists defining the protocol(s). If you use the protocol
23 attribute when querying the database, it will be appended to the base directory, such that
24 several protocols are supported by the same class instance of `bob.bio.base`.
26 name : str
27 The name of the database
29 protocol : str
30 The protocol of the database. This should be a folder inside ``filelists_directory``.
32 bio_file_class : ``class``
33 The class that should be used to return the files.
34 This can be :py:class:`bob.bio.base.database.BioFile`, :py:class:`bob.bio.spear.database.AudioBioFile`, :py:class:`bob.bio.face.database.FaceBioFile`, or anything similar.
36 original_directory : str or ``None``
37 The directory, where the original data can be found.
39 original_extension : str or [str] or ``None``
40 The filename extension of the original data, or multiple extensions.
42 annotation_directory : str or ``None``
43 The directory, where additional annotation files can be found.
45 annotation_extension : str or ``None``
46 The filename extension of the annotation files.
48 annotation_type : str or ``None``
49 The type of annotation that can be read.
50 Currently, options are ``'eyecenter', 'named', 'idiap'``.
51 See :py:func:`read_annotation_file` for details.
53 dev_sub_directory : str or ``None``
54 Specify a custom subdirectory for the filelists of the development set (default is ``'dev'``)
56 eval_sub_directory : str or ``None``
57 Specify a custom subdirectory for the filelists of the development set (default is ``'eval'``)
59 world_filename : str or ``None``
60 Specify a custom filename for the training filelist (default is ``'norm/train_world.lst'``)
62 optional_world_1_filename : str or ``None``
63 Specify a custom filename for the (first optional) training filelist
64 (default is ``'norm/train_optional_world_1.lst'``)
66 optional_world_2_filename : str or ``None``
67 Specify a custom filename for the (second optional) training filelist
68 (default is ``'norm/train_optional_world_2.lst'``)
70 models_filename : str or ``None``
71 Specify a custom filename for the model filelists (default is ``'for_models.lst'``)
73 probes_filename : str or ``None``
74 Specify a custom filename for the probes filelists (default is ``'for_probes.lst'``)
76 scores_filename : str or ``None``
77 Specify a custom filename for the scores filelists (default is ``'for_scores.lst'``)
79 tnorm_filename : str or ``None``
80 Specify a custom filename for the T-norm scores filelists (default is ``'for_tnorm.lst'``)
82 znorm_filename : str or ``None``
83 Specify a custom filename for the Z-norm scores filelists (default is ``'for_znorm.lst'``)
85 use_dense_probe_file_list : bool or None
86 Specify which list to use among ``probes_filename`` (dense) or ``scores_filename``.
87 If ``None`` it is tried to be estimated based on the given parameters.
89 keep_read_lists_in_memory : bool
90 If set to ``True`` (the default), the lists are read only once and stored in memory.
91 Otherwise the lists will be re-read for every query (not recommended).
92 """
94 def __init__(
95 self,
96 filelists_directory,
97 name,
98 protocol=None,
99 bio_file_class=BioFile,
100 original_directory=None,
101 original_extension=None,
102 annotation_directory=None,
103 annotation_extension=".json",
104 annotation_type="json",
105 dev_sub_directory=None,
106 eval_sub_directory=None,
107 world_filename=None,
108 optional_world_1_filename=None,
109 optional_world_2_filename=None,
110 models_filename=None,
111 # For probing, use ONE of the two score file lists:
112 probes_filename=None, # File containing the probe files -> dense model/probe score matrix
113 scores_filename=None, # File containing list of model and probe files -> sparse model/probe score matrix
114 # For ZT-Norm:
115 tnorm_filename=None,
116 znorm_filename=None,
117 use_dense_probe_file_list=None,
118 # if both probe_filename and scores_filename is given, what kind of list should be used?
119 keep_read_lists_in_memory=True,
120 # if set to True (the RECOMMENDED default) lists are read only once and stored in memory.
121 **kwargs,
122 ):
123 """Initializes the database with the file lists from the given base directory,
124 and the given sub-directories and file names (which default to useful values if not given).
125 """
127 super(FileListBioDatabase, self).__init__(
128 name=name,
129 protocol=protocol,
130 original_directory=original_directory,
131 original_extension=original_extension,
132 annotation_directory=annotation_directory,
133 annotation_extension=annotation_extension,
134 annotation_type=annotation_type,
135 **kwargs,
136 )
137 # extra args for pretty printing
138 self._kwargs.update(
139 dict(
140 filelists_directory=filelists_directory,
141 dev_sub_directory=dev_sub_directory,
142 eval_sub_directory=eval_sub_directory,
143 world_filename=world_filename,
144 optional_world_1_filename=optional_world_1_filename,
145 optional_world_2_filename=optional_world_2_filename,
146 models_filename=models_filename,
147 probes_filename=probes_filename,
148 scores_filename=scores_filename,
149 tnorm_filename=tnorm_filename,
150 znorm_filename=znorm_filename,
151 use_dense_probe_file_list=use_dense_probe_file_list,
152 # if both probe_filename and scores_filename are given, what kind
153 # of list should be used?
154 keep_read_lists_in_memory=keep_read_lists_in_memory,
155 )
156 )
157 # self.original_directory = original_directory
158 # self.original_extension = original_extension
159 self.bio_file_class = bio_file_class
160 self.keep_read_lists_in_memory = keep_read_lists_in_memory
161 self.list_readers = {}
163 self.m_base_dir = os.path.abspath(filelists_directory)
164 if not os.path.isdir(self.m_base_dir):
165 raise RuntimeError(
166 "Invalid directory specified %s." % (self.m_base_dir)
167 )
169 # sub-directories for dev and eval set:
170 self.m_dev_subdir = (
171 dev_sub_directory if dev_sub_directory is not None else "dev"
172 )
173 self.m_eval_subdir = (
174 eval_sub_directory if eval_sub_directory is not None else "eval"
175 )
177 # training list: format: filename client_id
178 self.m_world_filename = (
179 world_filename
180 if world_filename is not None
181 else os.path.join("norm", "train_world.lst")
182 )
183 # optional training list 1: format: filename client_id
184 self.m_optional_world_1_filename = (
185 optional_world_1_filename
186 if optional_world_1_filename is not None
187 else os.path.join("norm", "train_optional_world_1.lst")
188 )
189 # optional training list 2: format: filename client_id
190 self.m_optional_world_2_filename = (
191 optional_world_2_filename
192 if optional_world_2_filename is not None
193 else os.path.join("norm", "train_optional_world_2.lst")
194 )
195 # model list: format: filename model_id client_id
196 self.m_models_filename = (
197 models_filename if models_filename is not None else "for_models.lst"
198 )
199 # scores list: format: filename model_id claimed_client_id client_id
200 self.m_scores_filename = (
201 scores_filename if scores_filename is not None else "for_scores.lst"
202 )
203 # probe list: format: filename client_id
204 self.m_probes_filename = (
205 probes_filename if probes_filename is not None else "for_probes.lst"
206 )
207 # T-Norm models format: filename model_id client_id
208 self.m_tnorm_filename = (
209 tnorm_filename if tnorm_filename is not None else "for_tnorm.lst"
210 )
211 # Z-Norm files format: filename client_id
212 self.m_znorm_filename = (
213 znorm_filename if znorm_filename is not None else "for_znorm.lst"
214 )
216 self.m_use_dense_probe_file_list = use_dense_probe_file_list
218 def _list_reader(self, protocol):
219 if protocol not in self.list_readers:
220 if protocol is not None:
221 protocol_dir = os.path.join(self.get_base_directory(), protocol)
222 if not os.path.isdir(protocol_dir):
223 raise ValueError(
224 "The directory %s for the given protocol '%s' does not exist"
225 % (protocol_dir, protocol)
226 )
227 self.list_readers[protocol] = ListReader(
228 self.keep_read_lists_in_memory
229 )
231 return self.list_readers[protocol]
233 def _make_bio(self, files):
234 return [
235 self.bio_file_class(
236 client_id=f.client_id, path=f.path, file_id=f.id
237 )
238 for f in files
239 ]
241 def all_files(self, groups=["dev"], add_zt_files=True):
242 """Returns all files for the given group. The internally stored protocol is used, throughout.
244 Parameters
245 ----------
247 groups : [str]
248 A list of groups to retrieve the files for.
250 add_zt_files : bool
251 If selected, also files for ZT-norm scoring will be added.
252 Please select this option only if this dataset provides ZT-norm files, see :py:meth:`implements_zt`.
254 Returns
255 -------
257 [BioFile]
258 A list of all files that fulfill your query.
259 """
260 files = self.objects(groups, self.protocol, **self.all_files_options)
261 # add all files that belong to the ZT-norm
262 for group in groups:
263 if group == "world":
264 continue
265 if add_zt_files:
266 if self.implements_zt(self.protocol, group):
267 files += self.tobjects(group, self.protocol)
268 files += self.zobjects(
269 group, self.protocol, **self.z_probe_options
270 )
271 else:
272 logger.warning(
273 "ZT score files are requested, but no such files are defined in group %s for protocol %s",
274 group,
275 self.protocol,
276 )
278 return self.sort(self._make_bio(files))
280 def groups(self, protocol=None, add_world=True, add_subworld=True):
281 """This function returns the list of groups for this database.
283 Parameters
284 ----------
286 protocol : str or ``None``
287 The protocol for which the groups should be retrieved.
288 If ``None``, the internally stored protocol is used.
290 add_world : bool
291 Add the world groups?
293 add_subworld : bool
294 Add the sub-world groups? Only valid, when ``add_world=True``
296 Returns
297 -------
299 [str]
300 A list of groups
301 """
302 groups = []
303 protocol = protocol or self.protocol
304 if protocol is not None:
305 if os.path.isdir(
306 os.path.join(
307 self.get_base_directory(), protocol, self.m_dev_subdir
308 )
309 ):
310 groups.append("dev")
311 if os.path.isdir(
312 os.path.join(
313 self.get_base_directory(), protocol, self.m_eval_subdir
314 )
315 ):
316 groups.append("eval")
317 if add_world:
318 if os.path.isfile(
319 os.path.join(
320 self.get_base_directory(),
321 protocol,
322 self.m_world_filename,
323 )
324 ):
325 groups.append("world")
326 if add_world and add_subworld:
327 if os.path.isfile(
328 os.path.join(
329 self.get_base_directory(),
330 protocol,
331 self.m_optional_world_1_filename,
332 )
333 ):
334 groups.append("optional_world_1")
335 if os.path.isfile(
336 os.path.join(
337 self.get_base_directory(),
338 protocol,
339 self.m_optional_world_2_filename,
340 )
341 ):
342 groups.append("optional_world_2")
343 else:
344 if os.path.isdir(
345 os.path.join(self.get_base_directory(), self.m_dev_subdir)
346 ):
347 groups.append("dev")
348 if os.path.isdir(
349 os.path.join(self.get_base_directory(), self.m_eval_subdir)
350 ):
351 groups.append("eval")
352 if add_world:
353 if os.path.isfile(
354 os.path.join(
355 self.get_base_directory(), self.m_world_filename
356 )
357 ):
358 groups.append("world")
359 if add_world and add_subworld:
360 if os.path.isfile(
361 os.path.join(
362 self.get_base_directory(),
363 self.m_optional_world_1_filename,
364 )
365 ):
366 groups.append("optional_world_1")
367 if os.path.isfile(
368 os.path.join(
369 self.get_base_directory(),
370 self.m_optional_world_2_filename,
371 )
372 ):
373 groups.append("optional_world_2")
374 return groups
376 def implements_zt(self, protocol=None, groups=None):
377 """Checks if the file lists for the ZT score normalization are available.
379 Parameters
380 ----------
382 protocol : str or ``None``
383 The protocol for which the groups should be retrieved.
385 groups : str or [str] or ``None``
386 The groups for which the ZT score normalization file lists should be checked ``('dev', 'eval')``.
388 Returns
389 -------
391 bool
392 ``True`` if the all file lists for ZT score normalization exist, otherwise ``False``.
393 """
394 protocol = protocol or self.protocol
395 groups = check_parameters_for_validity(
396 groups, "group", self.groups(protocol, add_world=False)
397 )
399 for group in groups:
400 for t in ["for_tnorm", "for_znorm"]:
401 if not os.path.exists(self._get_list_file(group, t, protocol)):
402 return False
403 # all files exist
404 return True
406 def uses_dense_probe_file(self, protocol):
407 """Determines if a dense probe file list is used based on the existence of parameters."""
408 # return, whatever was specified in constructor, if not None
409 if self.m_use_dense_probe_file_list is not None:
410 return self.m_use_dense_probe_file_list
412 # check the existence of the files
413 probes = True
414 scores = True
415 for group in self.groups(protocol, add_world=False):
416 probes = probes and os.path.exists(
417 self._get_list_file(group, type="for_probes", protocol=protocol)
418 )
419 scores = scores and os.path.exists(
420 self._get_list_file(group, type="for_scores", protocol=protocol)
421 )
422 # decide, which score files are available
423 if probes and not scores:
424 return True
425 if not probes and scores:
426 return False
427 raise ValueError(
428 "Unable to determine, which way of probing should be used. Please specify."
429 )
431 def get_base_directory(self):
432 """Returns the base directory where the filelists defining the database
433 are located."""
434 return self.m_base_dir
436 def set_base_directory(self, filelists_directory):
437 """Resets the base directory where the filelists defining the database
438 are located."""
439 self.m_base_dir = filelists_directory
440 if not os.path.isdir(self.filelists_directory):
441 raise RuntimeError(
442 "Invalid directory specified %s." % (self.filelists_directory)
443 )
445 def _get_list_file(self, group, type=None, protocol=None):
446 if protocol:
447 base_directory = os.path.join(self.get_base_directory(), protocol)
448 else:
449 base_directory = self.get_base_directory()
450 if group == "world":
451 return os.path.join(base_directory, self.m_world_filename)
452 elif group == "optional_world_1":
453 return os.path.join(
454 base_directory, self.m_optional_world_1_filename
455 )
456 elif group == "optional_world_2":
457 return os.path.join(
458 base_directory, self.m_optional_world_2_filename
459 )
460 else:
461 group_dir = (
462 self.m_dev_subdir if group == "dev" else self.m_eval_subdir
463 )
464 list_name = {
465 "for_models": self.m_models_filename,
466 "for_probes": self.m_probes_filename,
467 "for_scores": self.m_scores_filename,
468 "for_tnorm": self.m_tnorm_filename,
469 "for_znorm": self.m_znorm_filename,
470 }[type]
471 return os.path.join(base_directory, group_dir, list_name)
473 def client_id_from_model_id(self, model_id, group="dev"):
474 """Returns the client id that is connected to the given model id.
476 Parameters
477 ----------
479 model_id : str or ``None``
480 The model id for which the client id should be returned.
482 groups : str or [str] or ``None``
483 (optional) the groups, the client belongs to.
484 Might be one or more of ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.
485 If groups are given, only these groups are considered.
487 protocol : str or ``None``
488 The protocol to consider.
490 Returns
491 -------
493 str
494 The client id for the given model id, if found.
495 """
496 protocol = self.protocol
497 groups = check_parameters_for_validity(
498 group,
499 "group",
500 self.groups(protocol),
501 default_parameters=self.groups(protocol, add_subworld=False),
502 )
504 for group in groups:
505 model_dict = self._list_reader(protocol).read_models(
506 self._get_list_file(group, "for_models", protocol),
507 group,
508 "for_models",
509 )
510 if model_id in model_dict:
511 return model_dict[model_id]
513 raise ValueError(
514 "The given model id '%s' cannot be found in one of the groups '%s'"
515 % (model_id, groups)
516 )
518 def client_id_from_t_model_id(self, t_model_id, group="dev"):
519 """Returns the client id that is connected to the given T-Norm model id.
521 Parameters
522 ----------
524 model_id : str or ``None``
525 The model id for which the client id should be returned.
527 groups : str or [str] or ``None``
528 (optional) the groups, the client belongs to.
529 Might be one or more of ``('dev', 'eval')``.
530 If groups are given, only these groups are considered.
532 Returns
533 -------
535 str
536 The client id for the given model id of a T-Norm model, if found.
537 """
538 protocol = self.protocol
539 groups = check_parameters_for_validity(
540 group, "group", self.groups(protocol, add_world=False)
541 )
543 for group in groups:
544 model_dict = self._list_reader(protocol).read_models(
545 self._get_list_file(group, "for_tnorm", protocol),
546 group,
547 "for_tnorm",
548 )
549 if t_model_id in model_dict:
550 return model_dict[t_model_id]
552 raise ValueError(
553 "The given T-norm model id '%s' cannot be found in one of the groups '%s'"
554 % (t_model_id, groups)
555 )
557 def __client_id_list__(self, groups, type, protocol=None):
558 ids = set()
559 protocol = protocol or self.protocol
560 # read all lists for all groups and extract the model ids
561 for group in groups:
562 files = self._list_reader(protocol).read_list(
563 self._get_list_file(group, type, protocol), group, type
564 )
565 for file in files:
566 ids.add(file.client_id)
567 return ids
569 def client_ids(self, protocol=None, groups=None):
570 """Returns a list of client ids for the specific query by the user.
572 Parameters
573 ----------
575 protocol : str or ``None``
576 The protocol to consider
578 groups : str or [str] or ``None``
579 The groups to which the clients belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.
581 Returns
582 -------
584 [str]
585 A list containing all the client ids which have the given properties.
586 """
588 protocol = protocol or self.protocol
589 groups = check_parameters_for_validity(
590 groups,
591 "group",
592 self.groups(protocol),
593 default_parameters=self.groups(protocol, add_subworld=False),
594 )
596 return self.__client_id_list__(groups, "for_models", protocol)
598 def tclient_ids(self, protocol=None, groups=None):
599 """Returns a list of T-Norm client ids for the specific query by the user.
601 Parameters
602 ----------
604 protocol : str or ``None``
605 The protocol to consider
607 groups : str or [str] or ``None``
608 The groups to which the clients belong ("dev", "eval").
610 Returns
611 -------
613 [str]
614 A list containing all the T-Norm client ids which have the given properties.
615 """
617 protocol = protocol or self.protocol
618 groups = check_parameters_for_validity(
619 groups, "group", self.groups(protocol, add_world=False)
620 )
622 return self.__client_id_list__(groups, "for_tnorm", protocol)
624 def zclient_ids(self, protocol=None, groups=None):
625 """Returns a list of Z-Norm client ids for the specific query by the user.
627 Parameters
628 ----------
630 protocol : str or ``None``
631 The protocol to consider
633 groups : str or [str] or ``None``
634 The groups to which the clients belong ("dev", "eval").
636 Returns
637 -------
639 [str]
640 A list containing all the Z-Norm client ids which have the given properties.
641 """
643 protocol = protocol or self.protocol
644 groups = check_parameters_for_validity(
645 groups, "group", self.groups(protocol, add_world=False)
646 )
648 return self.__client_id_list__(groups, "for_znorm", protocol)
650 def __model_id_list__(self, groups, type, protocol=None):
651 ids = set()
652 protocol = protocol or self.protocol
653 # read all lists for all groups and extract the model ids
654 for group in groups:
655 dict = self._list_reader(protocol).read_models(
656 self._get_list_file(group, type, protocol), group, type
657 )
658 ids.update(dict.keys())
659 return list(ids)
661 def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs):
662 """Returns a list of model ids for the specific query by the user.
664 Parameters
665 ----------
667 protocol : str or ``None``
668 The protocol to consider
670 groups : str or [str] or ``None``
671 The groups to which the models belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.
673 Returns
674 -------
676 [str]
677 A list containing all the model ids which have the given properties.
678 """
679 protocol = protocol or self.protocol
680 groups = check_parameters_for_validity(
681 groups, "group", self.groups(protocol=protocol)
682 )
684 return self.__model_id_list__(groups, "for_models", protocol)
686 def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs):
687 """Returns a list of T-Norm model ids for the specific query by the user.
689 Parameters
690 ----------
692 protocol : str or ``None``
693 The protocol to consider
695 groups : str or [str] or ``None``
696 The groups to which the models belong ``('dev', 'eval')``.
698 Returns
699 -------
701 [str]
702 A list containing all the T-Norm model ids belonging to the given group.
703 """
704 protocol = protocol or self.protocol
705 groups = check_parameters_for_validity(
706 groups, "group", self.groups(protocol, add_world=False)
707 )
709 return self.__model_id_list__(groups, "for_tnorm", protocol)
711 def objects(
712 self,
713 groups=None,
714 protocol=None,
715 purposes=None,
716 model_ids=None,
717 classes=None,
718 **kwargs,
719 ):
720 """Returns a set of :py:class:`bob.bio.base.database.BioFile` objects for the specific query by the user.
722 Parameters
723 ----------
725 protocol : str or ``None``
726 The protocol to consider
728 purposes : str or [str] or ``None``
729 The purposes required to be retrieved ``('enroll', 'probe')`` or a tuple
730 with several of them. If ``None`` is given (this is the default), it is
731 considered the same as a tuple with all possible values. This field is
732 ignored for the data from the ``'world', 'optional_world_1', 'optional_world_2'`` groups.
734 model_ids : str or [str] or ``None``
735 Only retrieves the files for the provided list of model ids (claimed
736 client id). If ``None`` is given (this is the default), no filter over
737 the model_ids is performed.
739 groups : str or [str] or ``None``
740 One of the groups ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')`` or a tuple with several of them.
741 If ``None`` is given (this is the default), it is considered to be the existing subset of ``('world', 'dev', 'eval')``.
743 classes : str or [str] or ``None``
744 The classes (types of accesses) to be retrieved ``('client', 'impostor')``
745 or a tuple with several of them. If ``None`` is given (this is the
746 default), it is considered the same as a tuple with all possible values.
748 .. note::
749 Classes are not allowed to be specified when 'probes_filename' is used in the constructor.
751 Returns
752 -------
754 [BioFile]
755 A list of :py:class:`BioFile` objects considering all the filtering criteria.
756 """
758 protocol = protocol or self.protocol
759 if self.uses_dense_probe_file(protocol) and classes is not None:
760 raise ValueError(
761 "To be able to use the 'classes' keyword, please use the 'for_scores.lst' list file."
762 )
764 purposes = check_parameters_for_validity(
765 purposes, "purpose", ("enroll", "probe")
766 )
767 groups = check_parameters_for_validity(
768 groups,
769 "group",
770 self.groups(protocol),
771 default_parameters=self.groups(protocol, add_subworld=False),
772 )
773 classes = check_parameters_for_validity(
774 classes, "class", ("client", "impostor")
775 )
777 if isinstance(model_ids, str):
778 model_ids = (model_ids,)
780 # first, collect all the lists that we want to process
781 lists = []
782 probe_lists = []
783 if "world" in groups:
784 lists.append(
785 self._list_reader(protocol).read_list(
786 self._get_list_file("world", protocol=protocol), "world"
787 )
788 )
789 if "optional_world_1" in groups:
790 lists.append(
791 self._list_reader(protocol).read_list(
792 self._get_list_file("optional_world_1", protocol=protocol),
793 "optional_world_1",
794 )
795 )
796 if "optional_world_2" in groups:
797 lists.append(
798 self._list_reader(protocol).read_list(
799 self._get_list_file("optional_world_2", protocol=protocol),
800 "optional_world_2",
801 )
802 )
804 for group in ("dev", "eval"):
805 if group in groups:
806 if "enroll" in purposes:
807 lists.append(
808 self._list_reader(protocol).read_list(
809 self._get_list_file(
810 group, "for_models", protocol=protocol
811 ),
812 group,
813 "for_models",
814 )
815 )
816 if "probe" in purposes:
817 if self.uses_dense_probe_file(protocol):
818 probe_lists.append(
819 self._list_reader(protocol).read_list(
820 self._get_list_file(
821 group, "for_probes", protocol=protocol
822 ),
823 group,
824 "for_probes",
825 )
826 )
827 else:
828 probe_lists.append(
829 self._list_reader(protocol).read_list(
830 self._get_list_file(
831 group, "for_scores", protocol=protocol
832 ),
833 group,
834 "for_scores",
835 )
836 )
838 # now, go through the lists and filter the elements
840 # remember the file ids that are already in the list
841 file_ids = set()
842 retval = []
844 # non-probe files; just filter by model id
845 for list in lists:
846 for file in list:
847 # check if we already have this file
848 if file.id not in file_ids:
849 if model_ids is None or file._model_id in model_ids:
850 file_ids.add(file.id)
851 retval.append(file)
853 # probe files; filter by model id and by class
854 for list in probe_lists:
855 if self.uses_dense_probe_file(protocol):
856 # dense probing is used; do not filter over the model ids and not over the classes
857 # -> just add all probe files
858 for file in list:
859 if file.id not in file_ids:
860 file_ids.add(file.id)
861 retval.append(file)
863 else:
864 # sparse probing is used; filter over model ids and over the classes
865 for file in list:
866 # filter by model id
867 if model_ids is None or file._model_id in model_ids:
868 # filter by class
869 if (
870 "client" in classes
871 and file.client_id == file.claimed_id
872 ) or (
873 "impostor" in classes
874 and file.client_id != file.claimed_id
875 ):
876 # check if we already have this file
877 if file.id not in file_ids:
878 file_ids.add(file.id)
879 retval.append(file)
881 return self._make_bio(retval)
883 def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs):
884 """Returns a list of :py:class:`bob.bio.base.database.BioFile` objects for enrolling T-norm models for score normalization.
886 Parameters
887 ----------
889 protocol : str or ``None``
890 The protocol to consider
892 model_ids : str or [str] or ``None``
893 Only retrieves the files for the provided list of model ids (claimed
894 client id). If ``None`` is given (this is the default), no filter over
895 the model_ids is performed.
897 groups : str or [str] or ``None``
898 The groups to which the models belong ``('dev', 'eval')``.
900 Returns
901 -------
903 [BioFile]
904 A list of :py:class:`BioFile` objects considering all the filtering criteria.
905 """
906 protocol = protocol or self.protocol
907 groups = check_parameters_for_validity(
908 groups, "group", self.groups(protocol, add_world=False)
909 )
911 if isinstance(model_ids, str):
912 model_ids = (model_ids,)
914 # iterate over the lists and extract the files
915 # we assume that there is no duplicate file here...
916 retval = []
917 for group in groups:
918 for file in self._list_reader(protocol).read_list(
919 self._get_list_file(group, "for_tnorm", protocol),
920 group,
921 "for_tnorm",
922 ):
923 if model_ids is None or file._model_id in model_ids:
924 retval.append(file)
926 return self._make_bio(retval)
928 def zobjects(self, groups=None, protocol=None, **kwargs):
929 """Returns a list of :py:class:`BioFile` objects to perform Z-norm score normalization.
931 Parameters
932 ----------
934 protocol : str or ``None``
935 The protocol to consider
937 groups : str or [str] or ``None``
938 The groups to which the clients belong ``('dev', 'eval')``.
940 Returns
941 -------
943 [BioFile]
944 A list of File objects considering all the filtering criteria.
945 """
947 protocol = protocol or self.protocol
948 groups = check_parameters_for_validity(
949 groups, "group", self.groups(protocol, add_world=False)
950 )
952 # iterate over the lists and extract the files
953 # we assume that there is no duplicate file here...
954 retval = []
955 for group in groups:
956 retval.extend(
957 [
958 file
959 for file in self._list_reader(protocol).read_list(
960 self._get_list_file(group, "for_znorm", protocol),
961 group,
962 "for_znorm",
963 )
964 ]
965 )
967 return self._make_bio(retval)
969 def annotations(self, file):
970 """Reads the annotations for the given file id from file and returns them in a dictionary.
972 Parameters
973 ----------
975 file : BioFile
976 The BioFile object for which the annotations should be read.
978 Returns
979 -------
981 dict
982 The annotations as a dictionary, e.g.: ``{'reye':(re_y,re_x), 'leye':(le_y,le_x)}``
983 """
984 if self.annotation_directory is None:
985 return None
987 # since the file id is equal to the file name, we can simply use it
988 annotation_file = os.path.join(
989 self.annotation_directory, file.id + self.annotation_extension
990 )
992 # return the annotations as read from file
993 return read_annotation_file(annotation_file, self.annotation_type)
995 def original_file_name(self, file, check_existence=True):
996 """Returns the original file name of the given file.
998 This interface supports several original extensions, so that file lists can contain images
999 of different data types.
1001 When multiple original extensions are specified, this function will check the existence of any of
1002 these file names, and return the first one that actually exists.
1003 In this case, the ``check_existence`` flag is ignored.
1005 Parameters
1006 ----------
1008 file : BioFile
1009 The BioFile object for which the file name should be returned.
1011 check_existence : bool
1012 Should the existence of the original file be checked?
1013 (Ignored when multiple original extensions were specified in the constructor.)
1015 Returns
1016 -------
1018 str
1019 The full path of the original data file.
1020 """
1022 if isinstance(self.original_extension, str):
1023 # extract file name
1024 file_name = file.make_path(
1025 self.original_directory, self.original_extension
1026 )
1027 if not check_existence or os.path.exists(file_name):
1028 return file_name
1030 # check all registered extensions
1031 for extension in self.original_extension:
1032 file_name = file.make_path(self.original_directory, extension)
1033 if os.path.exists(file_name):
1034 return file_name
1036 # None of the extensions matched
1037 raise IOError(
1038 "File '%s' does not exist with any of the extensions '%s'"
1039 % (
1040 file.make_path(self.original_directory, None),
1041 self.original_extension,
1042 )
1043 )