Source code for beat.core.dataformat
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
"""
==========
dataformat
==========
Validation and parsing for dataformats
Forward importing from :py:mod:`beat.backend.python.dataformat`:
:py:class:`beat.backend.python.dataformat.Storage`
"""
import copy
import six
from beat.backend.python.dataformat import DataFormat as BackendDataFormat
from beat.backend.python.dataformat import Storage # noqa
from . import prototypes
from . import schema
from . import utils
[docs]class DataFormat(BackendDataFormat):
"""Data formats define the chunks of data that circulate between blocks.
Parameters:
prefix (str): Establishes the prefix of your installation.
data (:py:class:`object`, Optional): The piece of data representing the
data format. It must validate against the schema defined for data
formats. If a string is passed, it is supposed to be a valid path to an
data format in the designated prefix area. If ``None`` is passed, loads
our default prototype for data formats.
parent (:py:class:`tuple`, Optional): The parent DataFormat for this
format. If set to ``None``, this means this dataformat is the first one
on the hierarchy tree. If set to a tuple, the contents are
``(format-instance, field-name)``, which indicates the originating
object that is this object's parent and the name of the field on that
object that points to this one.
dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
dataformat names to loaded dataformats. This parameter is optional and,
if passed, may greatly speed-up data format loading times as
dataformats that are already loaded may be re-used. If you use this
parameter, you must guarantee that the cache is refreshed as
appropriate in case the underlying dataformats change.
Attributes:
name (str): The full, valid name of this dataformat
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this dataformat
errors (list): A list of strings containing errors found while loading
this dataformat.
data (dict): The original data for this dataformat, as loaded by our JSON
decoder.
resolved (dict): A dictionary similar to :py:attr:`data`, but with
references fully resolved.
referenced (dict): A dictionary pointing to all loaded dataformats.
parent (beat.core.dataformat.DataFormat): The pointer to the
dataformat to which the current format is part of. It is useful for
internal error reporting.
"""
def __init__(self, prefix, data, parent=None, dataformat_cache=None):
super(DataFormat, self).__init__(prefix, data, parent, dataformat_cache)
def _load(self, data, dataformat_cache):
"""Loads the dataformat"""
self._name = None
self.storage = None
self.referenced = {}
self.resolved = None
self.errors = []
self.data = None
if data is None: # loads prototype and validates it
self.data, self.errors = prototypes.load("dataformat")
assert not self.errors, "\n * %s" % "\n *".join(self.errors) # nosec
else:
if not isinstance(data, dict): # user has passed a file pointer
# make sure to log this into the cache (avoids recursion)
dataformat_cache[data] = None
self._name = data
self.storage = Storage(self.prefix, data)
data = self.storage.json.path
if not self.storage.exists():
self.errors.append(
"Dataformat declaration file not found: %s" % data
)
return
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate("dataformat", data)
self.resolved = copy.deepcopy(self.data)
# remove reserved fields
def is_reserved(x):
"""Returns if the field name is a reserved name"""
return (x.startswith("__") and x.endswith("__")) or x in (
"#description",
"#schema_version",
)
for key in list(self.resolved):
if is_reserved(key):
del self.resolved[key]
if self.errors:
# don't proceed with the rest of validation
self.errors = utils.uniq(self.errors)
return
def maybe_load_format(name, obj, dataformat_cache):
"""Tries to load a given dataformat from its relative path"""
if isinstance(obj, six.string_types) and obj.find("/") != -1: # load it
if obj in dataformat_cache: # reuse
if dataformat_cache[obj] is None: # recursion detected
self.errors.append(
"recursion for dataformat `%s' detected" % obj
)
return self
self.referenced[obj] = dataformat_cache[obj]
else: # load it
self.referenced[obj] = DataFormat(
self.prefix, obj, (self, name), dataformat_cache
)
if not self.referenced[obj].valid:
self.errors.append("referred dataformat `%s' is invalid" % obj)
return self.referenced[obj]
elif isinstance(obj, dict): # can cache it, must load from scratch
return DataFormat(self.prefix, obj, (self, name), dataformat_cache)
elif isinstance(obj, list):
retval = copy.deepcopy(obj)
retval[-1] = maybe_load_format(field, obj[-1], dataformat_cache)
return retval
return obj
# now checks that every referred dataformat also validates, and accumulates
# errors
for field, value in self.data.items():
if field in ("#description", "#schema_version"):
continue # skip the description and schema version meta attributes
self.resolved[field] = maybe_load_format(field, value, dataformat_cache)
if isinstance(self.resolved[field], DataFormat):
if not self.resolved[field].valid:
self.errors.append("referred dataformat `%s' is invalid" % value)
# at this point, there should be no more external references in
# ``self.resolved``. We treat the "#extends" property, which requires a
# special handling, given its nature.
if "#extends" in self.resolved:
ext = self.data["#extends"]
self.referenced[ext] = maybe_load_format(self.name, ext, dataformat_cache)
basetype = self.resolved["#extends"]
# before updating, checks there is no name clash if basetype.valid:
if basetype.valid:
for attrname in self.resolved:
if attrname == "#extends":
continue
if attrname in basetype.resolved:
self.errors.append(
"the attribute `%s' in `%s' clashes with an "
"attribute with the same name on the extended class "
"`%s'" % (attrname, self.name, basetype.name)
)
tmp = self.resolved
self.resolved = basetype.resolved
self.resolved.update(tmp)
del self.resolved["#extends"] # avoids infinite recursion
else:
self.errors.append("referred dataformat `%s' is invalid" % ext)
# all references are resolved at this point and the final model is built
# you can lookup the original data in ``self.data`` and the final model
# in ``self.resolved``.
if self.errors:
self.errors = utils.uniq(self.errors)