Source code for bob.bio.base.tools.algorithm

import bob.io.base
import os

import logging
import inspect
logger = logging.getLogger("bob.bio.base")

from .FileSelector import FileSelector
from .extractor import read_features
from .. import utils


[docs]def train_projector(algorithm, extractor, allow_missing_files = False, force = False): """Trains the feature projector using extracted features of the ``'world'`` group, if the algorithm requires projector training. This function should only be called, when the ``algorithm`` actually requires projector training. The projector of the given ``algorithm`` is trained using extracted features. It writes the projector to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if the target file already exist, it is not re-created. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, in which the projector should be trained. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the training data. allow_missing_files : bool If set to ``True``, extracted files that are not found are silently ignored during training. force : bool If given, the projector file is regenerated, even if it already exists. """ if not algorithm.requires_projector_training: logger.warn("The train_projector function should not have been called, since the algorithm does not need projector training.") return # the file selector object fs = FileSelector.instance() if utils.check_file(fs.projector_file, force, algorithm.min_projector_file_size): logger.info("- Projection: projector '%s' already exists.", fs.projector_file) else: bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) # train projector logger.info("- Projection: loading training data") train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files) if algorithm.split_training_features_by_client: logger.info("- Projection: training projector '%s' using %d identities: ", fs.projector_file, len(train_files)) else: logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) # perform training if utils.is_argument_available("metadata", algorithm.train_projector): metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client) algorithm.train_projector(train_features, fs.projector_file, metadata=metadata) else: algorithm.train_projector(train_features, fs.projector_file)
[docs]def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False): """Projects the features for all files of the database. The given ``algorithm`` is used to project all features required for the current experiment. It writes the projected data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if target files already exist, they are not re-created. The extractor is only used to load the data in a coherent way. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, used for projecting features and writing them to file. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the extracted features, which should be projected. groups : some of ``('world', 'dev', 'eval')`` or ``None`` The list of groups, for which the data should be projected. indices : (int, int) or None If specified, only the features for the given index range ``range(begin, end)`` should be projected. This is usually given, when parallel threads are executed. allow_missing_files : bool If set to ``True``, extracted files that are not found are silently ignored. force : bool If given, files are regenerated, even if they already exist. """ if not algorithm.performs_projection: logger.warn("The project function should not have been called, since the algorithm does not perform projection.") return # the file selector object fs = FileSelector.instance() # load the projector algorithm.load_projector(fs.projector_file) feature_files = fs.feature_list(groups=groups) projected_files = fs.projected_list(groups=groups) metadata = fs.original_data_list(groups=groups) # select a subset of indices to iterate if indices is not None: index_range = range(indices[0], indices[1]) logger.info("- Projection: splitting of index range %s", str(indices)) else: index_range = range(len(feature_files)) logger.info("- Projection: projecting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['extracted'], fs.directories['projected']) # extract the features for i in index_range: feature_file = feature_files[i] projected_file = projected_files[i] if not os.path.exists(feature_file): if allow_missing_files: logger.debug("... Cannot find extracted feature file %s; skipping", feature_file) continue else: logger.error("Cannot find extracted feature file %s", feature_file) if not utils.check_file(projected_file, force, algorithm.min_projected_file_size): logger.debug("... Projecting features for file '%s' (%d/%d)", feature_file, index_range.index(i)+1, len(index_range)) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) bob.io.base.create_directories_safe(os.path.dirname(projected_file)) # load feature feature = extractor.read_feature(feature_file) # project feature if "metadata" in inspect.getargspec(algorithm.project).args: projected = algorithm.project(feature, metadata=metadata[i]) else: projected = algorithm.project(feature) if projected is None: if allow_missing_files: logger.debug("... Projection for extracted file %s failed; skipping", feature_file) continue else: raise RuntimeError("Projection of file '%s' was not successful" % feature_file) # write it algorithm.write_feature(projected, projected_file) else: logger.debug("... Skipping feature file '%s' since projected file '%s' exists", feature_file, projected_file)
[docs]def train_enroller(algorithm, extractor, allow_missing_files = False, force = False): """Trains the model enroller using the extracted or projected features, depending on your setup of the algorithm. This function should only be called, when the ``algorithm`` actually requires enroller training. The enroller of the given ``algorithm`` is trained using extracted or projected features. It writes the enroller to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if the target file already exist, it is not re-created. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, in which the enroller should be trained. It is assured that the projector file is read (if required) before the enroller training is started. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the training data, if unprojected features are used for enroller training. allow_missing_files : bool If set to ``True``, extracted files that are not found are silently ignored during training. force : bool If given, the enroller file is regenerated, even if it already exists. """ if not algorithm.requires_enroller_training: logger.warn("The train_enroller function should not have been called, since the algorithm does not need enroller training.") return # the file selector object fs = FileSelector.instance() if utils.check_file(fs.enroller_file, force, algorithm.min_enroller_file_size): logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) else: # define the tool that is required to read the features reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor bob.io.base.create_directories_safe(os.path.dirname(fs.enroller_file)) # first, load the projector if algorithm.requires_projector_training: algorithm.load_projector(fs.projector_file) # load training data train_files = fs.training_list('projected' if algorithm.use_projected_features_for_enrollment else 'extracted', 'train_enroller', arrange_by_client = True) logger.info("- Enrollment: loading %d enroller training files of %d identities", sum(len(client_files) for client_files in train_files), len(train_files)) train_features = read_features(train_files, reader, True, allow_missing_files) # perform training logger.info("- Enrollment: training enroller '%s' using %d identities", fs.enroller_file, len(train_features)) algorithm.train_enroller(train_features, fs.enroller_file)
[docs]def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['N', 'T'], allow_missing_files = False, force = False): """Enroll the models for the given groups, eventually for both models and T-Norm-models. This function uses the extracted or projected features to compute the models, depending on your setup of the given ``algorithm``. The given ``algorithm`` is used to enroll all models required for the current experiment. It writes the models into the directories specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if target files already exist, they are not re-created. The extractor is only used to load features in a coherent way. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, used for enrolling model and writing them to file. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the extracted features, if the algorithm enrolls models from unprojected data. compute_zt_norm : bool If set to ``True`` and `'T'`` is part of the ``types``, also T-norm models are extracted. indices : (int, int) or None If specified, only the models for the given index range ``range(begin, end)`` should be enrolled. This is usually given, when parallel threads are executed. groups : some of ``('dev', 'eval')`` The list of groups, for which models should be enrolled. allow_missing_files : bool If set to ``True``, extracted or ptojected files that are not found are silently ignored. If none of the enroll files are found, no model file will be written. force : bool If given, files are regenerated, even if they already exist. """ # the file selector object fs = FileSelector.instance() # read the projector file, if needed if algorithm.requires_projector_training: algorithm.load_projector(fs.projector_file) # read the model enrollment file algorithm.load_enroller(fs.enroller_file) # which tool to use to read the features... reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor # Checking if we need to ship the metadata to the method enroll has_metadata = utils.is_argument_available("metadata", algorithm.enroll) # Create Models if 'N' in types: for group in groups: model_ids = fs.model_ids(group) if indices is not None: model_ids = model_ids[indices[0]:indices[1]] logger.info("- Enrollment: splitting of index range %s", str(indices)) logger.info("- Enrollment: enrolling models of group '%s'", group) for pos, model_id in enumerate(model_ids): # Path to the model model_file = fs.model_file(model_id, group) # Removes old file if required if not utils.check_file(model_file, force, algorithm.min_model_file_size): enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: enroll_files = utils.filter_missing_files(enroll_files) if not enroll_files: logger.debug("... Skipping model file %s since no feature file could be found", model_file) continue logger.debug("... Enrolling model '%s' from %d feature(s) to " "file '%s' (%d/%d)", model_id, len(enroll_files), model_file, pos+1, len(model_ids)) bob.io.base.create_directories_safe(os.path.dirname(model_file)) # load all files into memory enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files] if has_metadata: metadata = fs.database.enroll_files(group=group, model_id=model_id) model = algorithm.enroll(enroll_features, metadata=metadata) else: model = algorithm.enroll(enroll_features) if model is None: if allow_missing_files: logger.debug("... Enrollment for model %s failed; skipping", model_id) continue else: raise RuntimeError("Enrollemnt of model '%s' was not successful" % model_id) # save the model algorithm.write_model(model, model_file) else: logger.debug("... Skipping model file '%s' since it exists", model_file) # T-Norm-Models if 'T' in types and compute_zt_norm: for group in groups: t_model_ids = fs.t_model_ids(group) if indices is not None: t_model_ids = t_model_ids[indices[0]:indices[1]] logger.info("- Enrollment: splitting of index range %s", str(indices)) logger.info("- Enrollment: enrolling T-models of group '%s'", group) for t_model_id in t_model_ids: # Path to the model t_model_file = fs.t_model_file(t_model_id, group) # Removes old file if required if not utils.check_file(t_model_file, force, algorithm.min_model_file_size): t_enroll_files = fs.t_enroll_files(t_model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: t_enroll_files = utils.filter_missing_files(t_enroll_files) if not t_enroll_files: logger.debug("... Skipping T-model file %s since no feature file could be found", t_model_file) continue logger.debug("... Enrolling T-model from %d features to file '%s'", len(t_enroll_files), t_model_file) bob.io.base.create_directories_safe(os.path.dirname(t_model_file)) # load all files into memory t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files] if has_metadata: metadata = fs.database.enroll_files(group=group, model_id=t_model_id) t_model = algorithm.enroll(t_enroll_features, metadata=metadata) else: t_model = algorithm.enroll(t_enroll_features) if t_model is None: if allow_missing_files: logger.debug("... Enrollment for T-model %s failed; skipping", t_model_id) continue else: raise RuntimeError("Enrollemnt of T-model '%s' was not successful", t_model_id) # save model algorithm.write_model(t_model, t_model_file) else: logger.debug("... Skipping T-model file '%s' since it exists", t_model_file)