Source code for beat.backend.python.data_loaders

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################


"""
============
data_loaders
============

This module implements all the data communication related classes
"""

import six

from .data import mixDataIndices

# ----------------------------------------------------------


[docs]class DataView(object): """Provides access to a subset of data from a group of inputs synchronized together Data views are created from a data loader (see :py:class:`DataLoader`), which are provided to the algorithms of types 'sequential' and 'autonomous' (see :py:class:`DataLoaderList`). Example: .. code-block:: python view = data_loader.view('input1', 0) for i in range(view.count()) (data, start_index, end_index) = view[i] Parameters: data_loader (:py:class:`DataLoader`): Name of the data channel of the group of inputs data_indices (:py:class:`list`): Data indices to consider as a list of tuples Attributes: data_index_start (int): Lower data index across all inputs (see the section *Inputs synchronization* of the User's Guide) data_index_end (int): Bigger data index across all inputs (see the section *Inputs synchronization* of the User's Guide) """ def __init__(self, data_loader, data_indices): self.infos = {} self.data_indices = data_indices self.nb_data_units = len(data_indices) self.data_index_start = data_indices[0][0] self.data_index_end = data_indices[-1][1] for input_name, infos in data_loader.infos.items(): input_data_indices = [] current_start = self.data_index_start for i in range(self.data_index_start, self.data_index_end + 1): for indices in infos["data_indices"]: if indices[1] == i: input_data_indices.append((current_start, i)) current_start = i + 1 break if (len(input_data_indices) == 0) or ( input_data_indices[-1][1] != self.data_index_end ): input_data_indices.append((current_start, self.data_index_end)) self.infos[input_name] = dict( data_source=infos["data_source"], data_indices=input_data_indices, data=None, start_index=-1, end_index=-1, )
[docs] def count(self, input_name=None): """Returns the number of available data indexes for the given input name. If none given the number of available data units. Parameters: input_name (str): Name of the input for which the count is requested Returns: (int): Number of data indexes for the input given or the number of data units. """ if input_name is not None: try: return len(self.infos[input_name]["data_indices"]) except Exception: return None else: return self.nb_data_units
def __getitem__(self, index): if index < 0: return (None, None, None) try: indices = self.data_indices[index] except Exception: return (None, None, None) result = {} for input_name, infos in self.infos.items(): if (indices[0] < infos["start_index"]) or (infos["end_index"] < indices[0]): (infos["data"], infos["start_index"], infos["end_index"]) = infos[ "data_source" ].getAtDataIndex(indices[0]) result[input_name] = infos["data"] return (result, indices[0], indices[1])
# ----------------------------------------------------------
[docs]class DataLoader(object): """Provides access to data from a group of inputs synchronized together Data loaders are provided to the algorithms of types 'sequential' and 'autonomous' (see :py:class:`DataLoaderList`). Example: .. code-block:: python # Iterate through all the data for i in range(data_loader.count()) (data, start_index, end_index) = data_loader[i] print(data['input1'].data) # Restrict to a subset of the data view = data_loader.view('input1', 0) for i in range(view.count()) (data, start_index, end_index) = view[i] Parameters: channel (str): Name of the data channel of the group of inputs Attributes: data_index_start (int): Lower data index across all inputs (see the section *Inputs synchronization* of the User's Guide) data_index_end (int): Bigger data index across all inputs (see the section *Inputs synchronization* of the User's Guide) channel (str): Name of the data channel of the group """ def __init__(self, channel): self.channel = str(channel) self.infos = {} self.mixed_data_indices = None self.nb_data_units = 0 self.data_index_start = -1 # Lower index across all inputs self.data_index_end = -1 # Bigger index across all inputs
[docs] def reset(self): """Reset all the data sources""" for infos in self.infos.values(): data_source = infos.get("data_source") if data_source: data_source.reset()
[docs] def add(self, input_name, data_source): self.infos[input_name] = dict( data_source=data_source, data_indices=data_source.data_indices(), data=None, start_index=-1, end_index=-1, ) self.mixed_data_indices = mixDataIndices( [x["data_indices"] for x in self.infos.values()] ) self.nb_data_units = len(self.mixed_data_indices) self.data_index_start = self.mixed_data_indices[0][0] self.data_index_end = self.mixed_data_indices[-1][1]
[docs] def input_names(self): """Returns the name of all inputs associated to this data loader""" return self.infos.keys()
[docs] def count(self, input_name=None): """Returns the number of available data indexes for the given input name. If none given the number of available data units. Parameters: input_name (str): Name of the input for which the count is requested Returns: (int): Number of data indexes for the input given or the number of data units. """ if input_name is not None: try: return len(self.infos[input_name]["data_indices"]) except Exception: return 0 else: return self.nb_data_units
[docs] def view(self, input_name, index): """Returns the view associated with this data loader Parameters: input_name (str): Name of the input to get data from index (int): Position of the data indexes to retrieve Returns: (:py:class:`DataView`) either a DataView matching the query or None """ if index < 0: return None try: indices = self.infos[input_name]["data_indices"][index] except Exception: return None limited_data_indices = [ x for x in self.mixed_data_indices if (indices[0] <= x[0]) and (x[1] <= indices[1]) ] return DataView(self, limited_data_indices)
def __getitem__(self, index): if index < 0: return (None, None, None) try: indices = self.mixed_data_indices[index] except Exception: return (None, None, None) result = {} for input_name, infos in self.infos.items(): if (indices[0] < infos["start_index"]) or (infos["end_index"] < indices[0]): (infos["data"], infos["start_index"], infos["end_index"]) = infos[ "data_source" ].getAtDataIndex(indices[0]) result[input_name] = infos["data"] return (result, indices[0], indices[1]) def __getstate__(self): state = self.__dict__.copy() #  reset the data cached as its content is not pickable for infos in state["infos"].values(): infos["data"] = None infos["start_index"] = -1 infos["end_index"] = -1 return state
# ----------------------------------------------------------
[docs]class DataLoaderList(object): """Represents a list of data loaders Inputs are organized by groups. The inputs inside a group are all synchronized together (see the section *Inputs synchronization* of the User's Guide). A data loader provides access to data from a group of inputs. A list implementing this interface is provided to the algorithms of types 'sequential' and 'autonomous'. One group of inputs is always considered as the **main** one, and is used to drive the algorithm. The usage of the other groups is left to the algorithm. See :py:class:`DataLoader` Example: .. code-block:: python data_loaders = DataLoaderList() ... # Retrieve a data loader by name data_loader = data_loaders['labels'] # Retrieve a data loader by index for index in range(0, len(data_loaders)): data_loader = data_loaders[index] # Iteration over all data loaders for data_loader in data_loaders: ... # Retrieve the data loader an input belongs to, by input name data_loader = data_loaders.loaderOf('label') Attributes: main_loader (DataLoader): Main data loader """ def __init__(self): self._loaders = [] self.main_loader = None
[docs] def add(self, data_loader): """Add a data loader to the list :param DataLoader data_loader: The data loader to add """ if self.main_loader is None: self.main_loader = data_loader self._loaders.append(data_loader)
def __getitem__(self, name_or_index): try: if isinstance(name_or_index, six.string_types): return [x for x in self._loaders if x.channel == name_or_index][0] elif isinstance(name_or_index, int): return self._loaders[name_or_index] except Exception: return None def __iter__(self): for i in range(len(self._loaders)): yield self._loaders[i] def __len__(self): return len(self._loaders)
[docs] def loaderOf(self, input_name): """Returns the data loader matching the input name""" try: return [k for k in self._loaders if input_name in k.input_names()][0] except Exception: return None
[docs] def secondaries(self): """Returns a list of all data loaders except the main one""" secondaries_list = DataLoaderList() for data_loader in self._loaders: if data_loader is not self.main_loader: secondaries_list.add(data_loader) secondaries_list.main_loader = None return secondaries_list