Coverage for src/bob/bio/base/database/legacy.py: 50%
107 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:15 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:15 +0200
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
6"""
7Set of legacy functionality for the bob.bio.base.database.Database interface.
8"""
10import logging
11import os
12import warnings
14import bob.io.base
16logger = logging.getLogger(__name__)
19def convert_names_to_highlevel(names, low_level_names, high_level_names):
20 """
21 Converts group names from a low level to high level API
23 This is useful for example when you want to return ``db.groups()`` for
24 the :py:mod:`bob.bio.base`. Your instance of the database should
25 already have ``low_level_names`` and ``high_level_names`` initialized.
27 """
29 if names is None:
30 return None
31 mapping = dict(zip(low_level_names, high_level_names))
32 if isinstance(names, str):
33 return mapping.get(names)
34 return [mapping[g] for g in names]
37def convert_names_to_lowlevel(names, low_level_names, high_level_names):
38 """Same as :py:meth:`convert_names_to_highlevel` but on reverse"""
40 if names is None:
41 return None
42 mapping = dict(zip(high_level_names, low_level_names))
43 if isinstance(names, str):
44 return mapping.get(names)
45 return [mapping[g] for g in names]
48def file_names(files, directory, extension):
49 """file_names(files, directory, extension) -> paths
51 Returns the full path of the given File objects.
53 Parameters
54 ----------
55 files : list of ``bob.db.base.File``
56 The list of file object to retrieve the file names for.
58 directory : str
59 The base directory, where the files can be found.
61 extension : str
62 The file name extension to add to all files.
64 Returns
65 -------
66 paths : list of :obj:`str`
67 The paths extracted for the files, in the same order.
68 """
69 # return the paths of the files, do not remove duplicates
70 return [f.make_path(directory, extension) for f in files]
73def sort_files(files):
74 """Returns a sorted version of the given list of File's (or other structures
75 that define an 'id' data member). The files will be sorted according to their
76 id, and duplicate entries will be removed.
78 Parameters
79 ----------
80 files : list of ``bob.db.base.File``
81 The list of files to be uniquified and sorted.
83 Returns
84 -------
85 sorted : list of ``bob.db.base.File``
86 The sorted list of files, with duplicate `BioFile.id`\\s being removed.
87 """
88 # sort files using their sort function
89 sorted_files = sorted(files)
90 # remove duplicates
91 return [
92 f
93 for i, f in enumerate(sorted_files)
94 if not i or sorted_files[i - 1].id != f.id
95 ]
98def check_parameters_for_validity(
99 parameters, parameter_description, valid_parameters, default_parameters=None
100):
101 """Checks the given parameters for validity.
103 Checks a given parameter is in the set of valid parameters. It also
104 assures that the parameters form a tuple or a list. If parameters is
105 'None' or empty, the default_parameters will be returned (if
106 default_parameters is omitted, all valid_parameters are returned).
108 This function will return a tuple or list of parameters, or raise a
109 ValueError.
112 Parameters
113 ----------
114 parameters : str or list of :obj:`str` or None
115 The parameters to be checked. Might be a string, a list/tuple of
116 strings, or None.
118 parameter_description : str
119 A short description of the parameter. This will be used to raise an
120 exception in case the parameter is not valid.
122 valid_parameters : list of :obj:`str`
123 A list/tuple of valid values for the parameters.
125 default_parameters : list of :obj:`str` or None
126 The list/tuple of default parameters that will be returned in case
127 parameters is None or empty. If omitted, all valid_parameters are used.
129 Returns
130 -------
131 tuple
132 A list or tuple containing the valid parameters.
134 Raises
135 ------
136 ValueError
137 If some of the parameters are not valid.
139 """
141 if not parameters:
142 # parameters are not specified, i.e., 'None' or empty lists
143 parameters = (
144 default_parameters
145 if default_parameters is not None
146 else valid_parameters
147 )
149 if not isinstance(parameters, (list, tuple, set)):
150 # parameter is just a single element, not a tuple or list -> transform it
151 # into a tuple
152 parameters = (parameters,)
154 # perform the checks
155 for parameter in parameters:
156 if parameter not in valid_parameters:
157 raise ValueError(
158 "Invalid %s '%s'. Valid values are %s, or lists/tuples of those"
159 % (parameter_description, parameter, valid_parameters)
160 )
162 # check passed, now return the list/tuple of parameters
163 return parameters
166def check_parameter_for_validity(
167 parameter, parameter_description, valid_parameters, default_parameter=None
168):
169 """Checks the given parameter for validity
171 Ensures a given parameter is in the set of valid parameters. If the
172 parameter is ``None`` or empty, the value in ``default_parameter`` will
173 be returned, in case it is specified, otherwise a :py:exc:`ValueError`
174 will be raised.
176 This function will return the parameter after the check tuple or list
177 of parameters, or raise a :py:exc:`ValueError`.
179 Parameters
180 ----------
181 parameter : :obj:`str` or :obj:`None`
182 The single parameter to be checked. Might be a string or None.
184 parameter_description : str
185 A short description of the parameter. This will be used to raise an
186 exception in case the parameter is not valid.
188 valid_parameters : list of :obj:`str`
189 A list/tuple of valid values for the parameters.
191 default_parameter : list of :obj:`str`, optional
192 The default parameter that will be returned in case parameter is None or
193 empty. If omitted and parameter is empty, a ValueError is raised.
195 Returns
196 -------
197 str
198 The validated parameter.
200 Raises
201 ------
202 ValueError
203 If the specified parameter is invalid.
205 """
207 if parameter is None:
208 # parameter not specified ...
209 if default_parameter is not None:
210 # ... -> use default parameter
211 parameter = default_parameter
212 else:
213 # ... -> raise an exception
214 raise ValueError(
215 "The %s has to be one of %s, it might not be 'None'."
216 % (parameter_description, valid_parameters)
217 )
219 if isinstance(parameter, (list, tuple, set)):
220 # the parameter is in a list/tuple ...
221 if len(parameter) > 1:
222 raise ValueError(
223 "The %s has to be one of %s, it might not be more than one "
224 "(%s was given)."
225 % (parameter_description, valid_parameters, parameter)
226 )
227 # ... -> we take the first one
228 parameter = parameter[0]
230 # perform the check
231 if parameter not in valid_parameters:
232 raise ValueError(
233 "The given %s '%s' is not allowed. Please choose one of %s."
234 % (parameter_description, parameter, valid_parameters)
235 )
237 # tests passed -> return the parameter
238 return parameter
241class File(object):
242 """Abstract class that define basic properties of File objects.
244 Your file instance should have at least the self.id and self.path
245 properties.
246 """
248 def __init__(self, path, file_id=None, **kwargs):
249 """**Constructor Documentation**
251 Initialize the File object with the minimum required data.
253 Parameters
254 ----------
255 path : str
256 The path to this file, relative to the basic directory.
257 If you use an SQL database, this should be the SQL type String.
258 Please do not specify any file extensions.
260 file_id : object
261 The id of the file (various type). Its type depends on your
262 implementation. If you use an SQL database, this should be an SQL type
263 like Integer or String. If you are using an automatically determined
264 file id, you don't need to specify this parameter.
266 Raises
267 ------
268 NotImplementedError
269 If self.id is not set and not specified during initialization through
270 `file_id`.
271 """
273 self.path = path
274 """A relative path, which includes file name but excludes file extension"""
276 # set file id only, when specified
277 if file_id:
278 self.id = file_id
279 """A unique identifier of the file."""
280 else:
281 # check that the file id at least exists
282 if not hasattr(self, "id"):
283 raise NotImplementedError(
284 "Please either specify the file id as parameter, or create an "
285 "'id' member variable in the derived class that is automatically "
286 "determined (e.g. by SQLite)"
287 )
289 super(File, self).__init__(**kwargs)
291 def __lt__(self, other):
292 """This function defines the order on the File objects. File objects are
293 always ordered by their ID, in ascending order."""
294 return self.id < other.id
296 def __repr__(self):
297 """This function describes how to convert a File object into a string."""
298 return "<File('%s': '%s')>" % (str(self.id), str(self.path))
300 def make_path(self, directory=None, extension=None):
301 """Wraps the current path so that a complete path is formed
303 Parameters
304 ----------
305 directory : :obj:`str`, optional
306 An optional directory name that will be prefixed to the returned
307 result.
308 extension : :obj:`str`, optional
309 An optional extension that will be suffixed to the returned filename.
310 The extension normally includes the leading ``.`` character as in
311 ``.jpg`` or ``.hdf5``.
313 Returns
314 -------
315 str
316 Returns a string containing the newly generated file path.
317 """
318 # assure that directory and extension are actually strings
319 # create the path
320 return str(os.path.join(directory or "", self.path + (extension or "")))
322 def save(
323 self, data, directory=None, extension=".hdf5", create_directories=True
324 ):
325 """Saves the input data at the specified location and using the given
326 extension. Override it if you need to save differently.
328 Parameters
329 ----------
330 data : object
331 The data blob to be saved (normally a :py:class:`numpy.ndarray`).
332 directory : :obj:`str`, optional
333 If not empty or None, this directory is prefixed to the final
334 file destination
335 extension : :obj:`str`, optional
336 The extension of the filename - this will control the type of
337 output and the codec for saving the input blob.
338 create_directories : :obj:`bool`, optional
339 Whether to create the required directories to save the data.
341 """
342 # get the path
343 path = self.make_path(directory or "", extension or "")
344 # use the bob API to save the data
345 bob.io.base.save(data, path, create_directories=create_directories)
347 def load(self, directory=None, extension=".hdf5"):
348 """Loads the data at the specified location and using the given extension.
349 Override it if you need to load differently.
351 Parameters
352 ----------
353 directory : :obj:`str`, optional
354 If not empty or None, this directory is prefixed to the final
355 file destination
356 extension : :obj:`str`, optional
357 If not empty or None, this extension is suffixed to the final
358 file destination
360 Returns
361 -------
362 object
363 The loaded data (normally :py:class:`numpy.ndarray`).
365 """
366 # get the path
367 path = self.make_path(directory or "", extension or "")
368 return bob.io.base.load(path)
371class FileDatabase(object):
372 """Low-level File-based Database API to be used within Bob.
374 Not all Databases in Bob need to inherit from this class. Use this class
375 only if in your database one sample correlates to one actual file.
377 Attributes
378 ----------
379 original_directory : str
380 The directory where the raw files are located.
381 original_extension : str
382 The extension of raw data files, e.g. ``.png``.
383 """
385 def __init__(self, original_directory, original_extension, **kwargs):
386 super(FileDatabase, self).__init__(**kwargs)
387 self.original_directory = original_directory
388 self.original_extension = original_extension
390 def original_file_names(self, files):
391 """Returns the full path of the original data of the given File objects.
393 Parameters
394 ----------
395 files : list of ``bob.db.base.File``
396 The list of file object to retrieve the original data file names for.
398 Returns
399 -------
400 list of :obj:`str`
401 The paths extracted for the files, in the same order.
402 """
403 if self.original_directory is None:
404 logger.warning(
405 "self.original_directory was not provided (must not be None)!"
406 )
407 if self.original_extension is None:
408 logger.warning(
409 "self.original_extension was not provided (must not be None)!"
410 )
411 return file_names(
412 files, self.original_directory, self.original_extension
413 )
415 def original_file_name(self, file):
416 """This function returns the original file name for the given File
417 object.
419 Parameters
420 ----------
421 file
422 ``bob.db.base.File`` or a derivative
423 The File objects for which the file name should be retrieved
425 Returns
426 -------
427 str
428 The original file name for the given ``bob.db.base.File``
429 object.
431 Raises
432 ------
433 ValueError
434 if the file is not found.
435 """
436 # check if directory is set
437 if not self.original_directory or not self.original_extension:
438 logger.warning(
439 "The original_directory and/or the original_extension were not"
440 " specified in the constructor."
441 )
442 # extract file name
443 file_name = file.make_path(
444 self.original_directory, self.original_extension
445 )
447 if not self.check_existence or os.path.exists(file_name):
448 return file_name
450 raise ValueError(
451 "The file '%s' was not found. Please check the "
452 "original directory '%s' and extension '%s'?"
453 % (file_name, self.original_directory, self.original_extension)
454 )
456 # Deprecated Methods below
458 def check_parameters_for_validity(
459 self,
460 parameters,
461 parameter_description,
462 valid_parameters,
463 default_parameters=None,
464 ):
465 warnings.warn(
466 "check_parameters_for_validity is deprecated. Please use "
467 "the equivalent function in this file",
468 DeprecationWarning,
469 stacklevel=2,
470 )
471 return check_parameters_for_validity(
472 parameters,
473 parameter_description,
474 valid_parameters,
475 default_parameters,
476 )
478 def check_parameter_for_validity(
479 self,
480 parameter,
481 parameter_description,
482 valid_parameters,
483 default_parameter=None,
484 ):
485 warnings.warn(
486 "check_parameter_for_validity is deprecated. Please use the "
487 "equivalent function in this file",
488 DeprecationWarning,
489 stacklevel=2,
490 )
491 return check_parameter_for_validity(
492 parameter,
493 parameter_description,
494 valid_parameters,
495 default_parameter,
496 )
498 def convert_names_to_highlevel(
499 self, names, low_level_names, high_level_names
500 ):
501 warnings.warn(
502 "convert_names_to_highlevel is deprecated. Please use the "
503 "equivalent function in this file",
504 DeprecationWarning,
505 stacklevel=2,
506 )
507 return convert_names_to_highlevel(
508 names, low_level_names, high_level_names
509 )
511 def convert_names_to_lowlevel(
512 self, names, low_level_names, high_level_names
513 ):
514 warnings.warn(
515 "convert_names_to_lowlevel is deprecated. Please use the "
516 "equivalent function in this file",
517 DeprecationWarning,
518 stacklevel=2,
519 )
520 return convert_names_to_lowlevel(
521 names, low_level_names, high_level_names
522 )
524 def file_names(self, files, directory, extension):
525 warnings.warn(
526 "file_names is deprecated. Please use the "
527 "equivalent function in this file",
528 DeprecationWarning,
529 stacklevel=2,
530 )
531 return file_names(files, directory, extension)
533 def sort(self, files):
534 warnings.warn(
535 "sort is deprecated. Please use " "sort_files in bob.db.base.utils",
536 DeprecationWarning,
537 stacklevel=2,
538 )
539 return sort_files(files)
542class Database(FileDatabase):
543 """This class is deprecated. New databases should use the
544 :py:class:`bob.db.base.FileDatabase` class if required"""
546 def __init__(
547 self, original_directory=None, original_extension=None, **kwargs
548 ):
549 warnings.warn(
550 "The bob.db.base.Database class is deprecated. "
551 "Please use bob.db.base.FileDatabase instead.",
552 DeprecationWarning,
553 stacklevel=2,
554 )
555 super(Database, self).__init__(
556 original_directory, original_extension, **kwargs
557 )