Coverage for src/bob/learn/em/whitening.py: 100%

22 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-06-16 14:34 +0200

1#!/usr/bin/env python 

2# @author: Tiago de Freitas Pereira 

3 

4import dask 

5 

6from scipy.linalg import pinv 

7from sklearn.base import BaseEstimator, TransformerMixin 

8 

9 

10class Whitening(TransformerMixin, BaseEstimator): 

11 """ 

12 Trains an Estimator perform Cholesky whitening. 

13 

14 The whitening transformation is a decorrelation method that converts the covariance matrix of a set of samples into the identity matrix :math:`I`. 

15 This effectively linearly transforms random variables such that the resulting variables are uncorrelated and have the same variances as the original random variables. 

16 

17 This transformation is invertible. 

18 The method is called the whitening transform because it transforms the input matrix :math:`X` closer towards white noise (let's call it :math:`\\tilde{X}`): 

19 

20 .. math:: 

21 Cov(\\tilde{X}) = I 

22 

23 with: 

24 .. math:: \\tilde{X} = X W 

25 

26 where :math:`W` is the projection matrix that allows us to linearly project the data matrix :math:`X` to another (sub) space such that: 

27 

28 .. math:: 

29 Cov(X) = W W^T 

30 

31 

32 :math:`W` is computed using Cholesky decomposition: 

33 

34 .. math:: 

35 W = cholesky([Cov(X)]^{-1}) 

36 

37 

38 References: 

39 - 1. https://rtmath.net/help/html/e9c12dc0-e813-4ca9-aaa3-82340f1c5d24.htm 

40 - 2. http://en.wikipedia.org/wiki/Cholesky_decomposition 

41 

42 """ 

43 

44 def __init__(self, pinv: bool = False, **kwargs): 

45 super().__init__(**kwargs) 

46 self.pinv = pinv 

47 

48 def fit(self, X, y=None): 

49 # CHECKING THE TYPES 

50 if isinstance(X, dask.array.Array): 

51 import dask.array as numerical_module 

52 

53 from dask.array.linalg import cholesky, inv 

54 

55 else: 

56 import numpy as numerical_module 

57 

58 from scipy.linalg import cholesky, inv 

59 

60 # 1. Computes the mean vector and the covariance matrix of the training set 

61 mu = numerical_module.mean(X, axis=0) 

62 cov = numerical_module.cov(numerical_module.transpose(X)) 

63 

64 # 2. Computes the inverse of the covariance matrix 

65 inv_cov = pinv(cov) if self.pinv else inv(cov) 

66 

67 # 3. Computes the Cholesky decomposition of the inverse covariance matrix 

68 self.weights = cholesky( 

69 inv_cov, lower=True 

70 ) # Setting lower true to have the same implementation as in the previous code 

71 self.input_subtract = mu 

72 self.input_divide = 1.0 

73 

74 return self 

75 

76 def transform(self, X): 

77 return ((X - self.input_subtract) / self.input_divide) @ self.weights