Source code for beat.core.dataformat

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################


"""
==========
dataformat
==========

Validation and parsing for dataformats

Forward importing from :py:mod:`beat.backend.python.dataformat`:
:py:class:`beat.backend.python.dataformat.Storage`
"""
import copy

import six

from beat.backend.python.dataformat import DataFormat as BackendDataFormat
from beat.backend.python.dataformat import Storage  # noqa

from . import prototypes
from . import schema
from . import utils


[docs]class DataFormat(BackendDataFormat): """Data formats define the chunks of data that circulate between blocks. Parameters: prefix (str): Establishes the prefix of your installation. data (:py:class:`object`, Optional): The piece of data representing the data format. It must validate against the schema defined for data formats. If a string is passed, it is supposed to be a valid path to an data format in the designated prefix area. If ``None`` is passed, loads our default prototype for data formats. parent (:py:class:`tuple`, Optional): The parent DataFormat for this format. If set to ``None``, this means this dataformat is the first one on the hierarchy tree. If set to a tuple, the contents are ``(format-instance, field-name)``, which indicates the originating object that is this object's parent and the name of the field on that object that points to this one. dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping dataformat names to loaded dataformats. This parameter is optional and, if passed, may greatly speed-up data format loading times as dataformats that are already loaded may be re-used. If you use this parameter, you must guarantee that the cache is refreshed as appropriate in case the underlying dataformats change. Attributes: name (str): The full, valid name of this dataformat description (str): The short description string, loaded from the JSON file if one was set. documentation (str): The full-length docstring for this object. storage (object): A simple object that provides information about file paths for this dataformat errors (list): A list of strings containing errors found while loading this dataformat. data (dict): The original data for this dataformat, as loaded by our JSON decoder. resolved (dict): A dictionary similar to :py:attr:`data`, but with references fully resolved. referenced (dict): A dictionary pointing to all loaded dataformats. parent (beat.core.dataformat.DataFormat): The pointer to the dataformat to which the current format is part of. It is useful for internal error reporting. """ def __init__(self, prefix, data, parent=None, dataformat_cache=None): super(DataFormat, self).__init__(prefix, data, parent, dataformat_cache) def _load(self, data, dataformat_cache): """Loads the dataformat""" self._name = None self.storage = None self.referenced = {} self.resolved = None self.errors = [] self.data = None if data is None: # loads prototype and validates it self.data, self.errors = prototypes.load("dataformat") assert not self.errors, "\n * %s" % "\n *".join(self.errors) # nosec else: if not isinstance(data, dict): # user has passed a file pointer # make sure to log this into the cache (avoids recursion) dataformat_cache[data] = None self._name = data self.storage = Storage(self.prefix, data) data = self.storage.json.path if not self.storage.exists(): self.errors.append( "Dataformat declaration file not found: %s" % data ) return # this runs basic validation, including JSON loading if required self.data, self.errors = schema.validate("dataformat", data) self.resolved = copy.deepcopy(self.data) # remove reserved fields def is_reserved(x): """Returns if the field name is a reserved name""" return (x.startswith("__") and x.endswith("__")) or x in ( "#description", "#schema_version", ) for key in list(self.resolved): if is_reserved(key): del self.resolved[key] if self.errors: # don't proceed with the rest of validation self.errors = utils.uniq(self.errors) return def maybe_load_format(name, obj, dataformat_cache): """Tries to load a given dataformat from its relative path""" if isinstance(obj, six.string_types) and obj.find("/") != -1: # load it if obj in dataformat_cache: # reuse if dataformat_cache[obj] is None: # recursion detected self.errors.append( "recursion for dataformat `%s' detected" % obj ) return self self.referenced[obj] = dataformat_cache[obj] else: # load it self.referenced[obj] = DataFormat( self.prefix, obj, (self, name), dataformat_cache ) if not self.referenced[obj].valid: self.errors.append("referred dataformat `%s' is invalid" % obj) return self.referenced[obj] elif isinstance(obj, dict): # can cache it, must load from scratch return DataFormat(self.prefix, obj, (self, name), dataformat_cache) elif isinstance(obj, list): retval = copy.deepcopy(obj) retval[-1] = maybe_load_format(field, obj[-1], dataformat_cache) return retval return obj # now checks that every referred dataformat also validates, and accumulates # errors for field, value in self.data.items(): if field in ("#description", "#schema_version"): continue # skip the description and schema version meta attributes self.resolved[field] = maybe_load_format(field, value, dataformat_cache) if isinstance(self.resolved[field], DataFormat): if not self.resolved[field].valid: self.errors.append("referred dataformat `%s' is invalid" % value) # at this point, there should be no more external references in # ``self.resolved``. We treat the "#extends" property, which requires a # special handling, given its nature. if "#extends" in self.resolved: ext = self.data["#extends"] self.referenced[ext] = maybe_load_format(self.name, ext, dataformat_cache) basetype = self.resolved["#extends"] # before updating, checks there is no name clash if basetype.valid: if basetype.valid: for attrname in self.resolved: if attrname == "#extends": continue if attrname in basetype.resolved: self.errors.append( "the attribute `%s' in `%s' clashes with an " "attribute with the same name on the extended class " "`%s'" % (attrname, self.name, basetype.name) ) tmp = self.resolved self.resolved = basetype.resolved self.resolved.update(tmp) del self.resolved["#extends"] # avoids infinite recursion else: self.errors.append("referred dataformat `%s' is invalid" % ext) # all references are resolved at this point and the final model is built # you can lookup the original data in ``self.data`` and the final model # in ``self.resolved``. if self.errors: self.errors = utils.uniq(self.errors)