Source code for bob.bio.base.tools.algorithm

import bob.io.base
import os

import logging
logger = logging.getLogger("bob.bio.base")

from .FileSelector import FileSelector
from .extractor import read_features
from .. import utils


[docs]def train_projector(algorithm, extractor, force = False): """Trains the feature projector using extracted features of the ``'world'`` group, if the algorithm requires projector training. This function should only be called, when the ``algorithm`` actually requires projector training. The projector of the given ``algorithm`` is trained using extracted features. It writes the projector to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if the target file already exist, it is not re-created. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, in which the projector should be trained. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the training data. force : bool If given, the projector file is regenerated, even if it already exists. """ if not algorithm.requires_projector_training: logger.warn("The train_projector function should not have been called, since the algorithm does not need projector training.") return # the file selector object fs = FileSelector.instance() if utils.check_file(fs.projector_file, force, 1000): logger.info("- Projection: projector '%s' already exists.", fs.projector_file) else: bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) # train projector logger.info("- Projection: loading training data") train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client) if algorithm.split_training_features_by_client: logger.info("- Projection: training projector '%s' using %d identities: ", fs.projector_file, len(train_files)) else: logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) # perform training algorithm.train_projector(train_features, fs.projector_file)
[docs]def project(algorithm, extractor, groups = None, indices = None, force = False): """Projects the features for all files of the database. The given ``algorithm`` is used to project all features required for the current experiment. It writes the projected data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if target files already exist, they are not re-created. The extractor is only used to load the data in a coherent way. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, used for projecting features and writing them to file. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the extracted features, which should be projected. groups : some of ``('world', 'dev', 'eval')`` or ``None`` The list of groups, for which the data should be projected. indices : (int, int) or None If specified, only the features for the given index range ``range(begin, end)`` should be projected. This is usually given, when parallel threads are executed. force : bool If given, files are regenerated, even if they already exist. """ if not algorithm.performs_projection: logger.warn("The project function should not have been called, since the algorithm does not perform projection.") return # the file selector object fs = FileSelector.instance() # load the projector algorithm.load_projector(fs.projector_file) feature_files = fs.feature_list(groups=groups) projected_files = fs.projected_list(groups=groups) # select a subset of indices to iterate if indices != None: index_range = range(indices[0], indices[1]) logger.info("- Projection: splitting of index range %s", str(indices)) else: index_range = range(len(feature_files)) logger.info("- Projection: projecting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['extracted'], fs.directories['projected']) # extract the features for i in index_range: feature_file = str(feature_files[i]) projected_file = str(projected_files[i]) if not utils.check_file(projected_file, force, 1000): # load feature feature = extractor.read_feature(feature_file) # project feature projected = algorithm.project(feature) # write it bob.io.base.create_directories_safe(os.path.dirname(projected_file)) algorithm.write_feature(projected, projected_file)
[docs]def train_enroller(algorithm, extractor, force = False): """Trains the model enroller using the extracted or projected features, depending on your setup of the algorithm. This function should only be called, when the ``algorithm`` actually requires enroller training. The enroller of the given ``algorithm`` is trained using extracted or projected features. It writes the enroller to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if the target file already exist, it is not re-created. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, in which the enroller should be trained. It is assured that the projector file is read (if required) before the enroller training is started. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the training data, if unprojected features are used for enroller training. force : bool If given, the enroller file is regenerated, even if it already exists. """ if not algorithm.requires_enroller_training: logger.warn("The train_enroller function should not have been called, since the algorithm does not need enroller training.") return # the file selector object fs = FileSelector.instance() if utils.check_file(fs.enroller_file, force, 1000): logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) else: # define the tool that is required to read the features reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor bob.io.base.create_directories_safe(os.path.dirname(fs.enroller_file)) # first, load the projector algorithm.load_projector(fs.projector_file) # load training data train_files = fs.training_list('projected' if algorithm.use_projected_features_for_enrollment else 'extracted', 'train_enroller', arrange_by_client = True) logger.info("- Enrollment: loading %d enroller training files", len(train_files)) train_features = read_features(train_files, reader, True) # perform training logger.info("- Enrollment: training enroller '%s' using %d identities: ", fs.enroller_file, len(train_features)) algorithm.train_enroller(train_features, fs.enroller_file)
[docs]def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['N', 'T'], force = False): """Enroll the models for the given groups, eventually for both models and T-Norm-models. This function uses the extracted or projected features to compute the models, depending on your setup of the given ``algorithm``. The given ``algorithm`` is used to enroll all models required for the current experiment. It writes the models into the directories specified by the :py:class:`bob.bio.base.tools.FileSelector`. By default, if target files already exist, they are not re-created. The extractor is only used to load features in a coherent way. **Parameters:** algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived The algorithm, used for enrolling model and writing them to file. extractor : py:class:`bob.bio.base.extractor.Extractor` or derived The extractor, used for reading the extracted features, if the algorithm enrolls models from unprojected data. compute_zt_norm : bool If set to ``True`` and `'T'`` is part of the ``types``, also T-norm models are extracted. indices : (int, int) or None If specified, only the models for the given index range ``range(begin, end)`` should be enrolled. This is usually given, when parallel threads are executed. groups : some of ``('dev', 'eval')`` The list of groups, for which models should be enrolled. force : bool If given, files are regenerated, even if they already exist. """ # the file selector object fs = FileSelector.instance() # read the projector file, if needed algorithm.load_projector(fs.projector_file) # read the model enrollment file algorithm.load_enroller(fs.enroller_file) # which tool to use to read the features... reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor # Create Models if 'N' in types: for group in groups: model_ids = fs.model_ids(group) if indices != None: model_ids = model_ids[indices[0]:indices[1]] logger.info("- Enrollment: splitting of index range %s", str(indices)) logger.info("- Enrollment: enrolling models of group '%s'", group) for model_id in model_ids: # Path to the model model_file = str(fs.model_file(model_id, group)) # Removes old file if required if not utils.check_file(model_file, force, 1000): enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') # load all files into memory enroll_features = [reader.read_feature(str(enroll_file)) for enroll_file in enroll_files] model = algorithm.enroll(enroll_features) # save the model bob.io.base.create_directories_safe(os.path.dirname(model_file)) algorithm.write_model(model, model_file) # T-Norm-Models if 'T' in types and compute_zt_norm: for group in groups: t_model_ids = fs.t_model_ids(group) if indices != None: t_model_ids = t_model_ids[indices[0]:indices[1]] logger.info("- Enrollment: splitting of index range %s", str(indices)) logger.info("- Enrollment: enrolling T-models of group '%s'", group) for t_model_id in t_model_ids: # Path to the model t_model_file = str(fs.t_model_file(t_model_id, group)) # Removes old file if required if not utils.check_file(t_model_file, force, 1000): t_enroll_files = fs.t_enroll_files(t_model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') # load all files into memory t_enroll_features = [reader.read_feature(str(t_enroll_file)) for t_enroll_file in t_enroll_files] t_model = algorithm.enroll(t_enroll_features) # save model bob.io.base.create_directories_safe(os.path.dirname(t_model_file)) algorithm.write_model(t_model, t_model_file)