Source code for bob.bio.base.tools.FileSelector

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>

import os
from .. import utils

[docs]@utils.Singleton class FileSelector(object): """This class provides shortcuts for selecting different files for different stages of the verification process. It communicates with the database and provides lists of file names for all steps of the tool chain. .. todo:: Find a way that this class' methods get correctly documented, instead of the :py:class:`bob.bio.base.Singleton` wrapper class. **Parameters:** database : :py:class:`bob.bio.base.database.BioDatabase` or derived The database object that provides the list of files. preprocessed_directory : str The directory, where preprocessed data should be written to. extractor_file : str The filename, where the extractor should be written to (if any). extracted_directory : str The directory, where extracted features should be written to. projector_file : str The filename, where the projector should be written to (if any). projected_directory : str The directory, where projetced features should be written to (if required). enroller_file : str The filename, where the enroller should be written to (if required). model_directories : (str, str) The directories, where models and t-norm models should be written to. score_directories : (str, str) The directories, where score files for no-norm and ZT-norm should be written to. zt_score_directories : (str, str, str, str, str) or ``None`` If given, specify the directories, where intermediate score files required to compute the ZT-norm should be written. The 5 directories are for 1: normal scores; 2: Z-scores; 3: T-scores; 4: ZT-scores; 5: ZT-samevalue scores. default_extension : str The default extension of all intermediate files. compressed_extension : str The extension for writing compressed score files. By default, no compression is performed. """ def __init__( self, database, preprocessed_directory, extractor_file, extracted_directory, projector_file, projected_directory, enroller_file, model_directories, score_directories, zt_score_directories = None, default_extension = '.hdf5', compressed_extension = '', zt_norm = False ): """Initialize the file selector object with the current configuration.""" self.database = database self.extractor_file = extractor_file self.projector_file = projector_file self.enroller_file = enroller_file self.model_directories = model_directories self.score_directories = score_directories self.zt_score_directories = zt_score_directories self.default_extension = default_extension self.compressed_extension = compressed_extension self.directories = { 'original' : database.original_directory, 'preprocessed' : preprocessed_directory, 'extracted' : extracted_directory, 'projected' : projected_directory } self.zt_norm = zt_norm def uses_probe_file_sets(self): """Returns true if the given protocol enables several probe files for scoring.""" return self.database.uses_probe_file_sets() def get_paths(self, files, directory_type = None): """Returns the list of file names for the given list of File objects.""" try: directory = self.directories[directory_type] except KeyError: raise ValueError("The given directory type '%s' is not supported." % directory_type) return self.database.file_names(files, directory, self.default_extension) ### List of files that will be used for all files def original_data_list(self, groups = None): """Returns the list of original ``BioFile`` objects that can be used for preprocessing.""" return self.database.all_files(groups=groups,add_zt_files=self.zt_norm) def original_directory_and_extension(self): """Returns the directory and extension of the original files.""" return self.database.original_directory, self.database.original_extension def annotation_list(self, groups = None): """Returns the list of annotations objects.""" return self.database.all_files(groups=groups,add_zt_files=self.zt_norm) def get_annotations(self, annotation_file): """Returns the annotations of the given file.""" return self.database.annotations(annotation_file) def preprocessed_data_list(self, groups = None): """Returns the list of preprocessed data files.""" return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "preprocessed") def feature_list(self, groups = None): """Returns the list of extracted feature files.""" return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "extracted") def projected_list(self, groups = None): """Returns the list of projected feature files.""" return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "projected") ### Training lists def training_list(self, directory_type, step, arrange_by_client = False): """Returns the list of features that should be used for projector training. The directory_type might be any of 'preprocessed', 'extracted', or 'projected'. The step might by any of 'train_extractor', 'train_projector', or 'train_enroller'. If arrange_by_client is enabled, a list of lists (one list for each client) is returned.""" files = self.database.training_files(step, arrange_by_client) if arrange_by_client: return [self.get_paths(files[client], directory_type) for client in range(len(files))] else: return self.get_paths(files, directory_type) ### Enrollment and models def client_id(self, model_id, group, is_t_model_id = False): """Returns the id of the client for the given model id or T-norm model id.""" if is_t_model_id: return self.database.client_id_from_t_model_id(model_id, group = group) else: return self.database.client_id_from_model_id(model_id, group = group) def model_ids(self, group): """Returns the sorted list of model ids from the given group.""" return sorted(self.database.model_ids(groups=group)) def enroll_files(self, model_id, group, directory_type): """Returns the list of model feature files used for enrollment of the model with the given model_id from the given group. The directory_type might be 'extracted' or 'projected'.""" files = self.database.enroll_files(group = group, model_id = model_id) return self.get_paths(files, directory_type) def model_file(self, model_id, group): """Returns the file of the model with the given model id.""" return os.path.join(self.model_directories[0], group, str(model_id) + self.default_extension) def probe_objects(self, group): """Returns the probe File objects used to compute the raw scores.""" # get the probe files for all models if self.uses_probe_file_sets(): return self.database.probe_file_sets(group = group) else: return self.database.probe_files(group = group) def probe_objects_for_model(self, model_id, group): """Returns the probe File objects used to compute the raw scores for the given model id. This is actually a sub-set of all probe_objects().""" # get the probe files for the specific model if self.uses_probe_file_sets(): return self.database.probe_file_sets(model_id = model_id, group = group) else: return self.database.probe_files(model_id = model_id, group = group) def t_model_ids(self, group): """Returns the sorted list of T-Norm-model ids from the given group.""" return sorted(self.database.t_model_ids(groups = group)) def t_enroll_files(self, t_model_id, group, directory_type): """Returns the list of T-norm model files used for enrollment of the given model_id from the given group.""" files = self.database.t_enroll_files(group = group, t_model_id = t_model_id) return self.get_paths(files, directory_type) def t_model_file(self, model_id, group): """Returns the file of the T-Norm-model with the given model id.""" return os.path.join(self.model_directories[1], group, str(model_id) + self.default_extension) def z_probe_objects(self, group): """Returns the probe File objects used to compute the Z-Norm.""" # get the probe files for all models if self.uses_probe_file_sets(): return self.database.z_probe_file_sets(group = group) else: return self.database.z_probe_files(group = group) ### ZT-Normalization def a_file(self, model_id, group): """Returns the A-file for the given model id that is used for computing ZT normalization.""" a_dir = os.path.join(self.zt_score_directories[0], group) return os.path.join(a_dir, str(model_id) + self.default_extension) def b_file(self, model_id, group): """Returns the B-file for the given model id that is used for computing ZT normalization.""" b_dir = os.path.join(self.zt_score_directories[1], group) return os.path.join(b_dir, str(model_id) + self.default_extension) def c_file(self, t_model_id, group): """Returns the C-file for the given T-model id that is used for computing ZT normalization.""" c_dir = os.path.join(self.zt_score_directories[2], group) return os.path.join(c_dir, "TM" + str(t_model_id) + self.default_extension) def c_file_for_model(self, model_id, group): """Returns the C-file for the given model id that is used for computing ZT normalization.""" c_dir = os.path.join(self.zt_score_directories[2], group) return os.path.join(c_dir, str(model_id) + self.default_extension) def d_file(self, t_model_id, group): """Returns the D-file for the given T-model id that is used for computing ZT normalization.""" d_dir = os.path.join(self.zt_score_directories[3], group) return os.path.join(d_dir, str(t_model_id) + self.default_extension) def d_matrix_file(self, group): """Returns the D-file for storing all scores for pairs of T-models and Z-probes.""" d_dir = os.path.join(self.zt_score_directories[3], group) return os.path.join(d_dir, "D" + self.default_extension) def d_same_value_file(self, t_model_id, group): """Returns the specific D-file for storing which pairs of the given T-model id and all Z-probes are intrapersonal or extrapersonal.""" d_dir = os.path.join(self.zt_score_directories[4], group) return os.path.join(d_dir, str(t_model_id) + self.default_extension) def d_same_value_matrix_file(self, group): """Returns the specific D-file for storing which pairs of T-models and Z-probes are intrapersonal or extrapersonal.""" d_dir = os.path.join(self.zt_score_directories[4], group) return os.path.join(d_dir, "D_sameValue" + self.default_extension) def no_norm_file(self, model_id, group): """Returns the score text file for the given model id of the given group.""" no_norm_dir = os.path.join(self.score_directories[0], group) return os.path.join(no_norm_dir, str(model_id) + ".txt") + self.compressed_extension def no_norm_result_file(self, group): """Returns the resulting score text file for the given group.""" no_norm_dir = self.score_directories[0] return os.path.join(no_norm_dir, "scores-" + group) + self.compressed_extension def zt_norm_file(self, model_id, group): """Returns the score text file after ZT-normalization for the given model id of the given group.""" zt_norm_dir = os.path.join(self.score_directories[1], group) return os.path.join(zt_norm_dir, str(model_id) + ".txt") + self.compressed_extension def zt_norm_result_file(self, group): """Returns the resulting score text file after ZT-normalization for the given group.""" zt_norm_dir = self.score_directories[1] return os.path.join(zt_norm_dir, "scores-" + group) + self.compressed_extension def calibrated_score_file(self, group, zt_norm=False): """Returns the directory where calibrated scores can be found.""" calibration_dir = self.score_directories[1 if zt_norm else 0] return os.path.join(calibration_dir, "calibrated-" + group) + self.compressed_extension