Coverage for src/bob/learn/em/whitening.py: 100%
22 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:34 +0200
« prev ^ index » next coverage.py v7.0.5, created at 2023-06-16 14:34 +0200
1#!/usr/bin/env python
2# @author: Tiago de Freitas Pereira
4import dask
6from scipy.linalg import pinv
7from sklearn.base import BaseEstimator, TransformerMixin
10class Whitening(TransformerMixin, BaseEstimator):
11 """
12 Trains an Estimator perform Cholesky whitening.
14 The whitening transformation is a decorrelation method that converts the covariance matrix of a set of samples into the identity matrix :math:`I`.
15 This effectively linearly transforms random variables such that the resulting variables are uncorrelated and have the same variances as the original random variables.
17 This transformation is invertible.
18 The method is called the whitening transform because it transforms the input matrix :math:`X` closer towards white noise (let's call it :math:`\\tilde{X}`):
20 .. math::
21 Cov(\\tilde{X}) = I
23 with:
24 .. math:: \\tilde{X} = X W
26 where :math:`W` is the projection matrix that allows us to linearly project the data matrix :math:`X` to another (sub) space such that:
28 .. math::
29 Cov(X) = W W^T
32 :math:`W` is computed using Cholesky decomposition:
34 .. math::
35 W = cholesky([Cov(X)]^{-1})
38 References:
39 - 1. https://rtmath.net/help/html/e9c12dc0-e813-4ca9-aaa3-82340f1c5d24.htm
40 - 2. http://en.wikipedia.org/wiki/Cholesky_decomposition
42 """
44 def __init__(self, pinv: bool = False, **kwargs):
45 super().__init__(**kwargs)
46 self.pinv = pinv
48 def fit(self, X, y=None):
49 # CHECKING THE TYPES
50 if isinstance(X, dask.array.Array):
51 import dask.array as numerical_module
53 from dask.array.linalg import cholesky, inv
55 else:
56 import numpy as numerical_module
58 from scipy.linalg import cholesky, inv
60 # 1. Computes the mean vector and the covariance matrix of the training set
61 mu = numerical_module.mean(X, axis=0)
62 cov = numerical_module.cov(numerical_module.transpose(X))
64 # 2. Computes the inverse of the covariance matrix
65 inv_cov = pinv(cov) if self.pinv else inv(cov)
67 # 3. Computes the Cholesky decomposition of the inverse covariance matrix
68 self.weights = cholesky(
69 inv_cov, lower=True
70 ) # Setting lower true to have the same implementation as in the previous code
71 self.input_subtract = mu
72 self.input_divide = 1.0
74 return self
76 def transform(self, X):
77 return ((X - self.input_subtract) / self.input_divide) @ self.weights