Coverage for src/bob/bio/base/transformers/preprocessing.py: 54%

13 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 22:34 +0200

1import numpy as np 

2 

3from sklearn.preprocessing import OrdinalEncoder 

4 

5 

6class ReferenceIdEncoder(OrdinalEncoder): 

7 """An OrdinalEncoder that can converts subject_id strings to integers. 

8 This is used to prepare labels used in training supervised transformers like 

9 the ISV algorithm. 

10 """ 

11 

12 # Default values of init args are different from the base class 

13 def __init__( 

14 self, 

15 *, 

16 categories="auto", 

17 dtype=int, 

18 handle_unknown="use_encoded_value", 

19 unknown_value=-1, 

20 **kwargs, 

21 ): 

22 super().__init__( 

23 categories=categories, 

24 dtype=dtype, 

25 handle_unknown=handle_unknown, 

26 unknown_value=unknown_value, 

27 **kwargs, 

28 ) 

29 

30 def fit(self, X, y=None): 

31 # X is a SampleBatch or list of subject_id strings 

32 # we want a 2d array of shape (N, 1) 

33 X = np.asarray(X).reshape((-1, 1)) 

34 return super().fit(X) 

35 

36 def transform(self, X): 

37 X = np.asarray(X).reshape((-1, 1)) 

38 # we output a flat array instead 

39 return super().transform(X).flatten() 

40 

41 def _more_tags(self): 

42 return { 

43 "bob_input": "subject_id", 

44 "bob_output": "subject_id_int", 

45 }