#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Tue Oct 2 12:12:39 CEST 2012
from .. import utils
class Preprocessor (object):
"""This is the base class for all preprocessors.
It defines the minimum requirements for all derived proprocessor classes.
**Parameters:**
writes_data : bool
Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values.
read_original_data: callable or ``None``
This function is used to read the original data from file.
It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``).
If ``None``, the default function :py:func:`bob.bio.base.read_original_data` is used.
min_preprocessed_file_size: int
The minimum file size of a saved preprocessd data in bytes. If the saved
preprocessed data file size is smaller than this, it is assumed to be a
corrupt file and the data will be processed again.
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the `__str__` function.
"""
def __init__(self, writes_data=True, read_original_data=None,
min_preprocessed_file_size=1000, **kwargs):
# Each class needs to have a constructor taking
# all the parameters that are required for the preprocessing as arguments
self.writes_data = writes_data
if read_original_data is None:
read_original_data = utils.read_original_data
self.read_original_data = read_original_data
self.min_preprocessed_file_size = min_preprocessed_file_size
self._kwargs = kwargs
pass
# The call function (i.e. the operator() in C++ terms)
def __call__(self, data, annotations):
"""__call__(data, annotations) -> data
This is the call function that you have to overwrite in the derived class.
The parameters that this function will receive are:
**Parameters:**
data : object
The original data that needs preprocessing, usually a :py:class:`numpy.ndarray`, but might be different.
annotations : {} or None
The annotations (if any) that belongs to the given ``data``; as a dictionary.
The type of the annotation depends on your kind of problem.
**Returns:**
data : object
The *preprocessed* data, usually a :py:class:`numpy.ndarray`, but might be different.
"""
raise NotImplementedError("Please overwrite this function in your derived class")
def __str__(self):
"""__str__() -> info
This function returns all parameters of this class (and its derived class).
**Returns:**
info : str
A string containing the full information of all parameters of this (and the derived) class.
"""
return utils.pretty_print(self, self._kwargs)
############################################################
### Special functions that might be overwritten on need
############################################################
[docs] def write_data(self, data, data_file):
"""Writes the given *preprocessed* data to a file with the given name.
In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that.
If you have a different format (e.g. not images), please overwrite this function.
**Parameters:**
data : object
The preprocessed data, i.e., what is returned from `__call__`.
data_file : str or :py:class:`bob.io.base.HDF5File`
The file open for writing, or the name of the file to write.
"""
utils.save(data, data_file)
[docs] def read_data(self, data_file):
"""read_data(data_file) -> data
Reads the *preprocessed* data from file.
In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that.
If you have different format, please overwrite this function.
**Parameters:**
data_file : str or :py:class:`bob.io.base.HDF5File`
The file open for reading or the name of the file to read from.
**Returns:**
data : object (usually :py:class:`numpy.ndarray`)
The preprocessed data read from file.
"""
return utils.load(data_file)