Coverage for src/bob/bio/base/database/filelist/models.py: 91%

91 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 22:34 +0200

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> 

4# @date: Wed Oct 24 10:47:43 CEST 2012 

5# 

6# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland 

7# 

8# This program is free software: you can redistribute it and/or modify 

9# it under the terms of the GNU General Public License as published by 

10# the Free Software Foundation, version 3 of the License. 

11# 

12# This program is distributed in the hope that it will be useful, 

13# but WITHOUT ANY WARRANTY; without even the implied warranty of 

14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

15# GNU General Public License for more details. 

16# 

17# You should have received a copy of the GNU General Public License 

18# along with this program. If not, see <http://www.gnu.org/licenses/>. 

19 

20""" 

21This file defines a simple interface that are comparable with other bob.db databases. 

22""" 

23 

24import fileinput 

25import os 

26import re 

27 

28 

29class FileListFile(object): 

30 """ 

31 Initialize the File object with the minimum required data. 

32 

33 If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical. 

34 If the ``claimed_id`` is not specified, it is expected to be the ``client_id``. 

35 

36 Parameters 

37 ---------- 

38 

39 client_id : various type 

40 The id of the client, this file belongs to. 

41 The type of it is dependent on your implementation. 

42 If you use an SQL database, this should be an SQL type like Integer or String. 

43 

44 path : str 

45 The path of this file, relative to the basic directory. 

46 If you use an SQL database, this should be the SQL type String. 

47 Please do not specify any file extensions. 

48 

49 file_id : various type 

50 The id of the file. 

51 The type of it is dependent on your implementation. 

52 If you use an SQL database, this should be an SQL type like Integer or String. 

53 If you are using an automatically determined file id, you can skip selecting the file id. 

54 """ 

55 

56 def __init__(self, file_name, client_id, model_id=None, claimed_id=None): 

57 # super(FileListFile, self).__init__(client_id=client_id, path=file_name, file_id=file_name) 

58 super(FileListFile, self).__init__() 

59 self.client_id = client_id 

60 self.path = file_name 

61 self.id = file_name 

62 

63 # Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file. 

64 # Hence, there might be several probe files with the same file id, but different model ids. 

65 # Therefore, please DO NOT USE the model_id outside of this class (or the according database queries). 

66 # when the model id is not specified, we use the client id instead 

67 self._model_id = client_id if model_id is None else model_id 

68 # when the claimed id is not specified, we use the client id instead 

69 self.claimed_id = client_id if claimed_id is None else claimed_id 

70 

71 

72############################################################################# 

73# internal access functions for the file lists; do not export! 

74############################################################################# 

75 

76 

77class ListReader(object): 

78 def __init__(self, store_lists): 

79 self.m_read_lists = {} 

80 self.m_model_dicts = {} 

81 self.m_store_lists = store_lists 

82 

83 def _read_multi_column_list(self, list_file): 

84 rows = [] 

85 if not os.path.isfile(list_file): 

86 raise RuntimeError("File %s does not exist." % (list_file,)) 

87 try: 

88 for line in fileinput.input(list_file): 

89 if line.strip().startswith("#"): 

90 continue 

91 parsed_line = re.findall(r"[\w/(-.)]+", line) 

92 if len(parsed_line): 

93 # perform some sanity checks 

94 if len(parsed_line) not in (2, 3, 4): 

95 raise IOError( 

96 "The read line '%s' from file '%s' could not be parsed successfully!" 

97 % (line.rstrip(), list_file) 

98 ) 

99 if len(rows) and len(rows[0]) != len(parsed_line): 

100 raise IOError( 

101 "The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" 

102 % (parsed_line, list_file, rows[0]) 

103 ) 

104 # append the read line 

105 rows.append(parsed_line) 

106 fileinput.close() 

107 except IOError as e: 

108 raise RuntimeError( 

109 "Error reading the file '%s' : '%s'." % (list_file, e) 

110 ) 

111 

112 # return the read list as a vector of columns 

113 return rows 

114 

115 def _read_column_list(self, list_file, column_count): 

116 # read the list 

117 rows = self._read_multi_column_list(list_file) 

118 # extract the file from the first two columns 

119 file_list = [] 

120 for row in rows: 

121 if column_count == 2: 

122 assert len(row) == 2 

123 # we expect: filename client_id 

124 file_list.append( 

125 FileListFile(file_name=row[0], client_id=row[1]) 

126 ) 

127 elif column_count == 3: 

128 assert len(row) in (2, 3) 

129 # we expect: filename, model_id, client_id 

130 file_list.append( 

131 FileListFile( 

132 file_name=row[0], 

133 client_id=row[2] if len(row) > 2 else row[1], 

134 model_id=row[1], 

135 ) 

136 ) 

137 elif column_count == 4: 

138 assert len(row) in (3, 4) 

139 # we expect: filename, model_id, claimed_id, client_id 

140 file_list.append( 

141 FileListFile( 

142 file_name=row[0], 

143 client_id=row[3] if len(row) > 3 else row[1], 

144 model_id=row[1], 

145 claimed_id=row[2], 

146 ) 

147 ) 

148 else: 

149 raise ValueError( 

150 "The given column count %d cannot be interpreted. This is a BUG, please report to the author." 

151 % column_count 

152 ) 

153 

154 return file_list 

155 

156 def _create_model_dictionary(self, files): 

157 # remember model ids 

158 retval = {} 

159 for file in files: 

160 if file._model_id not in retval: 

161 retval[file._model_id] = file.client_id 

162 else: 

163 if retval[file._model_id] != file.client_id: 

164 raise ValueError( 

165 "The read model id '%s' is associated to two different client ids '%s' and '%s'!" 

166 % ( 

167 file._model_id, 

168 file.client_id, 

169 retval[file._model_id], 

170 ) 

171 ) 

172 return retval 

173 

174 def read_list(self, list_file, group, type=None): 

175 """Reads the list of Files from the given list file (if not done yet) and returns it.""" 

176 if group in ("world", "optional_world_1", "optional_world_2"): 

177 if group not in self.m_read_lists: 

178 # read the world list into memory 

179 list = self._read_column_list(list_file, 2) 

180 if self.m_store_lists: 

181 self.m_read_lists[group] = list 

182 return list 

183 # just return the previously read list 

184 return self.m_read_lists[group] 

185 

186 else: 

187 if group not in self.m_read_lists: 

188 self.m_read_lists[group] = {} 

189 if type not in self.m_read_lists[group]: 

190 if type in ("for_models", "for_tnorm"): 

191 list = self._read_column_list(list_file, 3) 

192 elif type == "for_scores": 

193 list = self._read_column_list(list_file, 4) 

194 elif type in ("for_probes", "for_znorm"): 

195 list = self._read_column_list(list_file, 2) 

196 else: 

197 raise ValueError( 

198 "The given type must be one of %s, but not '%s'" 

199 % ( 

200 ( 

201 "for_models", 

202 "for_scores", 

203 "for_probes", 

204 "for_tnorm", 

205 "for_znorm", 

206 ), 

207 type, 

208 ) 

209 ) 

210 if self.m_store_lists: 

211 self.m_read_lists[group][type] = list 

212 return list 

213 return self.m_read_lists[group][type] 

214 

215 def read_models(self, list_file, group, type=None): 

216 """Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it""" 

217 assert group in ( 

218 "dev", 

219 "eval", 

220 "world", 

221 "optional_world_1", 

222 "optional_world_2", 

223 ) 

224 assert type in ("for_models", "for_tnorm") 

225 if group not in self.m_model_dicts: 

226 self.m_model_dicts[group] = {} 

227 if type not in self.m_model_dicts[group]: 

228 dict = self._create_model_dictionary( 

229 self.read_list(list_file, group, type) 

230 ) 

231 if self.m_store_lists: 

232 self.m_model_dicts[group][type] = dict 

233 return dict 

234 return self.m_model_dicts[group][type]