Coverage for src/bob/bio/base/database/filelist/models.py: 91%
91 statements
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
4# @date: Wed Oct 24 10:47:43 CEST 2012
5#
6# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
7#
8# This program is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, version 3 of the License.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program. If not, see <http://www.gnu.org/licenses/>.
20"""
21This file defines a simple interface that are comparable with other bob.db databases.
22"""
24import fileinput
25import os
26import re
29class FileListFile(object):
30 """
31 Initialize the File object with the minimum required data.
33 If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical.
34 If the ``claimed_id`` is not specified, it is expected to be the ``client_id``.
36 Parameters
37 ----------
39 client_id : various type
40 The id of the client, this file belongs to.
41 The type of it is dependent on your implementation.
42 If you use an SQL database, this should be an SQL type like Integer or String.
44 path : str
45 The path of this file, relative to the basic directory.
46 If you use an SQL database, this should be the SQL type String.
47 Please do not specify any file extensions.
49 file_id : various type
50 The id of the file.
51 The type of it is dependent on your implementation.
52 If you use an SQL database, this should be an SQL type like Integer or String.
53 If you are using an automatically determined file id, you can skip selecting the file id.
54 """
56 def __init__(self, file_name, client_id, model_id=None, claimed_id=None):
57 # super(FileListFile, self).__init__(client_id=client_id, path=file_name, file_id=file_name)
58 super(FileListFile, self).__init__()
59 self.client_id = client_id
60 self.path = file_name
61 self.id = file_name
63 # Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file.
64 # Hence, there might be several probe files with the same file id, but different model ids.
65 # Therefore, please DO NOT USE the model_id outside of this class (or the according database queries).
66 # when the model id is not specified, we use the client id instead
67 self._model_id = client_id if model_id is None else model_id
68 # when the claimed id is not specified, we use the client id instead
69 self.claimed_id = client_id if claimed_id is None else claimed_id
72#############################################################################
73# internal access functions for the file lists; do not export!
74#############################################################################
77class ListReader(object):
78 def __init__(self, store_lists):
79 self.m_read_lists = {}
80 self.m_model_dicts = {}
81 self.m_store_lists = store_lists
83 def _read_multi_column_list(self, list_file):
84 rows = []
85 if not os.path.isfile(list_file):
86 raise RuntimeError("File %s does not exist." % (list_file,))
87 try:
88 for line in fileinput.input(list_file):
89 if line.strip().startswith("#"):
90 continue
91 parsed_line = re.findall(r"[\w/(-.)]+", line)
92 if len(parsed_line):
93 # perform some sanity checks
94 if len(parsed_line) not in (2, 3, 4):
95 raise IOError(
96 "The read line '%s' from file '%s' could not be parsed successfully!"
97 % (line.rstrip(), list_file)
98 )
99 if len(rows) and len(rows[0]) != len(parsed_line):
100 raise IOError(
101 "The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!"
102 % (parsed_line, list_file, rows[0])
103 )
104 # append the read line
105 rows.append(parsed_line)
106 fileinput.close()
107 except IOError as e:
108 raise RuntimeError(
109 "Error reading the file '%s' : '%s'." % (list_file, e)
110 )
112 # return the read list as a vector of columns
113 return rows
115 def _read_column_list(self, list_file, column_count):
116 # read the list
117 rows = self._read_multi_column_list(list_file)
118 # extract the file from the first two columns
119 file_list = []
120 for row in rows:
121 if column_count == 2:
122 assert len(row) == 2
123 # we expect: filename client_id
124 file_list.append(
125 FileListFile(file_name=row[0], client_id=row[1])
126 )
127 elif column_count == 3:
128 assert len(row) in (2, 3)
129 # we expect: filename, model_id, client_id
130 file_list.append(
131 FileListFile(
132 file_name=row[0],
133 client_id=row[2] if len(row) > 2 else row[1],
134 model_id=row[1],
135 )
136 )
137 elif column_count == 4:
138 assert len(row) in (3, 4)
139 # we expect: filename, model_id, claimed_id, client_id
140 file_list.append(
141 FileListFile(
142 file_name=row[0],
143 client_id=row[3] if len(row) > 3 else row[1],
144 model_id=row[1],
145 claimed_id=row[2],
146 )
147 )
148 else:
149 raise ValueError(
150 "The given column count %d cannot be interpreted. This is a BUG, please report to the author."
151 % column_count
152 )
154 return file_list
156 def _create_model_dictionary(self, files):
157 # remember model ids
158 retval = {}
159 for file in files:
160 if file._model_id not in retval:
161 retval[file._model_id] = file.client_id
162 else:
163 if retval[file._model_id] != file.client_id:
164 raise ValueError(
165 "The read model id '%s' is associated to two different client ids '%s' and '%s'!"
166 % (
167 file._model_id,
168 file.client_id,
169 retval[file._model_id],
170 )
171 )
172 return retval
174 def read_list(self, list_file, group, type=None):
175 """Reads the list of Files from the given list file (if not done yet) and returns it."""
176 if group in ("world", "optional_world_1", "optional_world_2"):
177 if group not in self.m_read_lists:
178 # read the world list into memory
179 list = self._read_column_list(list_file, 2)
180 if self.m_store_lists:
181 self.m_read_lists[group] = list
182 return list
183 # just return the previously read list
184 return self.m_read_lists[group]
186 else:
187 if group not in self.m_read_lists:
188 self.m_read_lists[group] = {}
189 if type not in self.m_read_lists[group]:
190 if type in ("for_models", "for_tnorm"):
191 list = self._read_column_list(list_file, 3)
192 elif type == "for_scores":
193 list = self._read_column_list(list_file, 4)
194 elif type in ("for_probes", "for_znorm"):
195 list = self._read_column_list(list_file, 2)
196 else:
197 raise ValueError(
198 "The given type must be one of %s, but not '%s'"
199 % (
200 (
201 "for_models",
202 "for_scores",
203 "for_probes",
204 "for_tnorm",
205 "for_znorm",
206 ),
207 type,
208 )
209 )
210 if self.m_store_lists:
211 self.m_read_lists[group][type] = list
212 return list
213 return self.m_read_lists[group][type]
215 def read_models(self, list_file, group, type=None):
216 """Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it"""
217 assert group in (
218 "dev",
219 "eval",
220 "world",
221 "optional_world_1",
222 "optional_world_2",
223 )
224 assert type in ("for_models", "for_tnorm")
225 if group not in self.m_model_dicts:
226 self.m_model_dicts[group] = {}
227 if type not in self.m_model_dicts[group]:
228 dict = self._create_model_dictionary(
229 self.read_list(list_file, group, type)
230 )
231 if self.m_store_lists:
232 self.m_model_dicts[group][type] = dict
233 return dict
234 return self.m_model_dicts[group][type]