Source code for bob.bio.base.extractor.Extractor

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Tue Oct  2 12:12:39 CEST 2012

import os

from .. import utils

class Extractor (object):
  """This is the base class for all feature extractors.
  It defines the minimum requirements that a derived feature extractor class need to implement.

  If your derived class requires training, please register this here.

  **Parameters**

  requires_training : bool
    Set this flag to ``True`` if your feature extractor needs to be trained.
    In that case, please override the :py:meth:`train` and :py:meth:`load` methods

  split_training_data_by_client : bool
    Set this flag to ``True`` if your feature extractor requires the training data to be split by clients.
    Ignored, if ``requires_training`` is ``False``

  min_extractor_file_size : int
      The minimum file size of a saved extractor file for extractors that
      require training in bytes. If the saved file size is smaller than this, it
      is assumed to be a corrupt file and the extractor will be trained again.

  min_feature_file_size : int
      The minimum file size of extracted features in bytes. If the saved file
      size is smaller than this, it is assumed to be a corrupt file and the
      features will be extracted again.

  kwargs : ``key=value`` pairs
    A list of keyword arguments to be written in the `__str__` function.
  """

  def __init__(
      self,
      requires_training = False, # enable, if your extractor needs training
      split_training_data_by_client = False, # enable, if your extractor needs the training files sorted by client
      min_extractor_file_size=1000,
      min_feature_file_size=1000,
      **kwargs                   # the parameters of the extractor, to be written in the __str__() method
  ):
    # Each class needs to have a constructor taking
    # all the parameters that are required for the feature extraction as arguments
    self.requires_training = requires_training
    self.split_training_data_by_client = split_training_data_by_client
    self.min_extractor_file_size = min_extractor_file_size
    self.min_feature_file_size = min_feature_file_size
    self._kwargs = kwargs


  ############################################################
  ### functions that must be overwritten in derived classes
  ############################################################

  def __call__(self, data):
    """__call__(data) -> feature

    This function will actually perform the feature extraction.
    It must be overwritten by derived classes.
    It takes the (preprocessed) data and returns the features extracted from the data.

    **Parameters**

    data : object (usually :py:class:`numpy.ndarray`)
      The *preprocessed* data from which features should be extracted.

    **Returns:**

    feature : object (usually :py:class:`numpy.ndarray`)
      The extracted feature.
    """
    raise NotImplementedError("Please overwrite this function in your derived class")


  def __str__(self):
    """__str__() -> info

    This function returns all parameters of this class (and its derived class).

    **Returns:**

    info : str
      A string containing the full information of all parameters of this (and the derived) class.
    """
    return utils.pretty_print(self, self._kwargs)


  ############################################################
  ### Special functions that might be overwritten on need
  ############################################################

  def write_feature(self, feature, feature_file):
    """Writes the given *extracted* feature to a file with the given name.
    In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that.
    If you have a different format, please overwrite this function.

    **Parameters:**

    feature : object
      The extracted feature, i.e., what is returned from `__call__`.

    feature_file : str or :py:class:`bob.io.base.HDF5File`
      The file open for writing, or the name of the file to write.
    """
    utils.save(feature, feature_file)


  def read_feature(self, feature_file):
    """Reads the *extracted* feature from file.
    In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that.
    If you have different format, please overwrite this function.

    **Parameters:**

    feature_file : str or :py:class:`bob.io.base.HDF5File`
      The file open for reading or the name of the file to read from.

    **Returns:**

    feature : object (usually :py:class:`numpy.ndarray`)
      The feature read from file.
    """
    return utils.load(feature_file)


  def load(self, extractor_file):
    """Loads the parameters required for feature extraction from the extractor file.
    This function usually is only useful in combination with the :py:meth:`train` function.
    In this base class implementation, it does nothing.

    **Parameters:**

    extractor_file : str
      The file to read the extractor from.
    """
    pass


  def train(self, training_data, extractor_file):
    """This function can be overwritten to train the feature extractor.
    If you do this, please also register the function by calling this base class constructor
    and enabling the training by ``requires_training = True``.

    **Parameters:**

    training_data : [object] or [[object]]
      A list of *preprocessed* data that can be used for training the extractor.
      Data will be provided in a single list, if ``split_training_features_by_client = False`` was specified in the constructor,
      otherwise the data will be split into lists, each of which contains the data of a single (training-)client.

    extractor_file : str
      The file to write.
      This file should be readable with the :py:meth:`load` function.
    """
    raise NotImplementedError("Please overwrite this function in your derived class, or unset the 'requires_training' option in the constructor.")