Coverage for src/bob/bio/base/transformers/preprocessing.py: 54%
13 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:34 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:34 +0200
1import numpy as np
3from sklearn.preprocessing import OrdinalEncoder
6class ReferenceIdEncoder(OrdinalEncoder):
7 """An OrdinalEncoder that can converts subject_id strings to integers.
8 This is used to prepare labels used in training supervised transformers like
9 the ISV algorithm.
10 """
12 # Default values of init args are different from the base class
13 def __init__(
14 self,
15 *,
16 categories="auto",
17 dtype=int,
18 handle_unknown="use_encoded_value",
19 unknown_value=-1,
20 **kwargs,
21 ):
22 super().__init__(
23 categories=categories,
24 dtype=dtype,
25 handle_unknown=handle_unknown,
26 unknown_value=unknown_value,
27 **kwargs,
28 )
30 def fit(self, X, y=None):
31 # X is a SampleBatch or list of subject_id strings
32 # we want a 2d array of shape (N, 1)
33 X = np.asarray(X).reshape((-1, 1))
34 return super().fit(X)
36 def transform(self, X):
37 X = np.asarray(X).reshape((-1, 1))
38 # we output a flat array instead
39 return super().transform(X).flatten()
41 def _more_tags(self):
42 return {
43 "bob_input": "subject_id",
44 "bob_output": "subject_id_int",
45 }