Coverage for src/bob/learn/em/whitening.py: 100%

1#!/usr/bin/env python

2# @author: Tiago de Freitas Pereira

4import dask

6from scipy.linalg import pinv

7from sklearn.base import BaseEstimator, TransformerMixin

10class Whitening(TransformerMixin, BaseEstimator):

11 """

12 Trains an Estimator perform Cholesky whitening.

14 The whitening transformation is a decorrelation method that converts the covariance matrix of a set of samples into the identity matrix :math:`I`.

15 This effectively linearly transforms random variables such that the resulting variables are uncorrelated and have the same variances as the original random variables.

17 This transformation is invertible.

18 The method is called the whitening transform because it transforms the input matrix :math:`X` closer towards white noise (let's call it :math:`\\tilde{X}`):

20 .. math::

21 Cov(\\tilde{X}) = I

23 with:

24 .. math:: \\tilde{X} = X W

26 where :math:`W` is the projection matrix that allows us to linearly project the data matrix :math:`X` to another (sub) space such that:

28 .. math::

29 Cov(X) = W W^T

32 :math:`W` is computed using Cholesky decomposition:

34 .. math::

35 W = cholesky([Cov(X)]^{-1})

38 References:

39 - 1. https://rtmath.net/help/html/e9c12dc0-e813-4ca9-aaa3-82340f1c5d24.htm

40 - 2. http://en.wikipedia.org/wiki/Cholesky_decomposition

42 """

44 def __init__(self, pinv: bool = False, **kwargs):

45 super().__init__(**kwargs)

46 self.pinv = pinv

48 def fit(self, X, y=None):

49 # CHECKING THE TYPES

50 if isinstance(X, dask.array.Array):

51 import dask.array as numerical_module

53 from dask.array.linalg import cholesky, inv

55 else:

56 import numpy as numerical_module

58 from scipy.linalg import cholesky, inv

60 # 1. Computes the mean vector and the covariance matrix of the training set

61 mu = numerical_module.mean(X, axis=0)

62 cov = numerical_module.cov(numerical_module.transpose(X))

64 # 2. Computes the inverse of the covariance matrix

65 inv_cov = pinv(cov) if self.pinv else inv(cov)

67 # 3. Computes the Cholesky decomposition of the inverse covariance matrix

68 self.weights = cholesky(

69 inv_cov, lower=True

70 ) # Setting lower true to have the same implementation as in the previous code

71 self.input_subtract = mu

72 self.input_divide = 1.0

74 return self

76 def transform(self, X):

77 return ((X - self.input_subtract) / self.input_divide) @ self.weights