Coverage for src/bob/bio/base/database/file.py: 61%

33 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 22:34 +0200

1import bob.io.base 

2 

3from bob.bio.base.utils.annotations import read_annotation_file 

4from bob.pipelines.sample import _ReprMixin 

5 

6from .legacy import File as LegacyFile 

7 

8 

9class BioFile(LegacyFile, _ReprMixin): 

10 """ 

11 A simple base class that defines basic properties of File object for the use 

12 in verification experiments 

13 

14 Attributes 

15 ---------- 

16 client_id : str or int 

17 The id of the client this file belongs to. 

18 Its type depends on your implementation. 

19 If you use an SQL database, this should be an SQL type like Integer or 

20 String. 

21 path : object 

22 see :py:class:`bob.bio.base.database.legacy.File` constructor 

23 file_id : object 

24 see :py:class:`bob.bio.base.database.legacy.File` constructor 

25 original_directory : str or None 

26 The path to the original directory of the file 

27 original_extension : str or None 

28 The extension of the original files. This attribute is deprecated. 

29 Please try to include the extension in the ``path`` attribute 

30 annotation_directory : str or None 

31 The path to the directory of the annotations 

32 annotation_extension : str or None 

33 The extension of annotation files. Default is ``.json`` 

34 annotation_type : str or None 

35 The type of the annotation file, see 

36 :`bob.bio.base.utils.read_annotation_file`. Default is 

37 ``json``. 

38 """ 

39 

40 def __init__( 

41 self, 

42 client_id, 

43 path, 

44 file_id=None, 

45 original_directory=None, 

46 original_extension=None, 

47 annotation_directory=None, 

48 annotation_extension=None, 

49 annotation_type=None, 

50 **kwargs, 

51 ): 

52 super(BioFile, self).__init__(path, file_id, **kwargs) 

53 

54 # just copy the information 

55 self.client_id = client_id 

56 """The id of the client, to which this file belongs to.""" 

57 self.original_directory = original_directory 

58 self.original_extension = original_extension 

59 self.annotation_directory = annotation_directory 

60 self.annotation_extension = annotation_extension or ".json" 

61 self.annotation_type = annotation_type or "json" 

62 

63 def load(self, original_directory=None, original_extension=None): 

64 """Loads the data at the specified location and using the given extension. 

65 Override it if you need to load differently. 

66 

67 Parameters 

68 ---------- 

69 

70 original_directory: :obj:`str` (optional) 

71 The path to the root of the dataset structure. 

72 If `None`, will try to use `self.original_directory`. 

73 

74 original_extension: :obj:`str` (optional) 

75 The filename extension of every files in the dataset. 

76 If `None`, will try to use `self.original_extension`. 

77 

78 Returns 

79 ------- 

80 object 

81 The loaded data (normally :py:class:`numpy.ndarray`). 

82 """ 

83 

84 if original_directory is None: 

85 original_directory = self.original_directory 

86 if original_extension is None: 

87 original_extension = self.original_extension 

88 # get the path 

89 path = self.make_path( 

90 original_directory or "", original_extension or "" 

91 ) 

92 return bob.io.base.load(path) 

93 

94 @property 

95 def annotations(self): 

96 path = self.make_path( 

97 self.annotation_directory or "", self.annotation_extension or "" 

98 ) 

99 return read_annotation_file(path, annotation_type=self.annotation_type) 

100 

101 

102class BioFileSet(BioFile): 

103 """This class defines the minimum interface of a set of database files that needs to be exported. 

104 Use this class, whenever the database provides several files that belong to the same probe. 

105 Each file set has an id, and a list of associated files, which are of 

106 type :py:class:`bob.bio.base.database.BioFile` of the same client. 

107 The file set id can be anything hashable, but needs to be unique all over the database. 

108 

109 Parameters 

110 ---------- 

111 

112 file_set_id : str or int 

113 A unique ID that identifies the file set. 

114 

115 files : [:py:class:`bob.bio.base.database.BioFile`] 

116 A non-empty list of BioFile objects that should be stored inside this file. 

117 All files of that list need to have the same client ID. 

118 """ 

119 

120 def __init__(self, file_set_id, files, path=None, **kwargs): 

121 # don't accept empty file lists 

122 assert len(files), "Cannot create an empty BioFileSet" 

123 

124 # call base class constructor 

125 super(BioFileSet, self).__init__( 

126 files[0].client_id, 

127 "+".join(f.path for f in files) if path is None else path, 

128 file_set_id, 

129 **kwargs, 

130 ) 

131 

132 # check that all files come from the same client 

133 assert all(f.client_id == self.client_id for f in files) 

134 

135 # The list of files contained in this set 

136 self.files = files 

137 

138 def __lt__(self, other): 

139 """Defines an order between file sets by using the order of the file set ids.""" 

140 # compare two BioFile set objects by comparing their IDs 

141 return self.id < other.id