Source code for beat.backend.python.hash

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################


"""
====
hash
====

Various functions for hashing platform contributions and others
"""

import collections
import copy
import hashlib
import os

import simplejson
import six

# ----------------------------------------------------------


def _sha256(s):
    """A python2/3 shortcut for :py:func:`haslib.sha256.hexdigest` to will
    ensure that the given string is unicode before going further.
    """
    if isinstance(s, six.string_types):
        try:
            s = six.u(s).encode("utf-8")
        except Exception:
            s = s.encode("utf-8")
    return hashlib.sha256(s).hexdigest()


# ----------------------------------------------------------


def _stringify(dictionary):
    names = sorted(dictionary.keys())

    converted_dictionary = "{"
    for name in names:
        converted_dictionary += '"%s":%s,' % (name, str(dictionary[name]))

    if len(converted_dictionary) > 1:
        converted_dictionary = converted_dictionary[:-1]

    converted_dictionary += "}"

    return converted_dictionary


# ----------------------------------------------------------


def _compact(text):
    return text.replace(" ", "").replace("\n", "")


# ----------------------------------------------------------


[docs]def toPath(hash, suffix=".data"): """Returns the path on disk which corresponds to the hash given. Parameters: hash (str): Hash from which the path is generated suffix (str): Suffix of the file for which the path is generated Returns: str: Path to file based on hash """ return os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:] + suffix)
# ----------------------------------------------------------
[docs]def toUserPath(username): """Returns the path to the user specific folder Parameters: username (str): User name to get the path from Return: str: Path on file system for the user """ hash = _sha256(username) return os.path.join(hash[0:2], hash[2:4], username)
# ----------------------------------------------------------
[docs]def hash(dictionary_or_string): """Generates a hash for the given parameter Parameters: dictionary_or_string (str or dict): Input to hash Returns: str: Hash from input """ if isinstance(dictionary_or_string, dict): return _sha256(_stringify(dictionary_or_string)) else: return _sha256(dictionary_or_string)
# ----------------------------------------------------------
[docs]def hashJSON(contents, description): """Hashes the pre-loaded JSON object using :py:meth:`hashlib.hash.hexdigest` Excludes description changes Returns: str: hash """ if description in contents: contents = copy.deepcopy(contents) # temporary copy del contents[description] contents = simplejson.dumps(contents, sort_keys=True) return hashlib.sha256(contents.encode("utf-8")).hexdigest()
# ----------------------------------------------------------
[docs]def hashJSONFile(path, description): """Hashes the JSON file contents using :py:meth:`hashlib.hash.hexdigest` Excludes description changes Returns: str: hash """ try: with open(path, "rb") as f: # preserve order return hashJSON( simplejson.loads( f.read().decode("utf-8"), object_pairs_hook=collections.OrderedDict ), description, ) except simplejson.JSONDecodeError: # falls back to normal file content hashing return hashFileContents(path)
# ----------------------------------------------------------
[docs]def hashFileContents(path): """Hashes the file contents using :py:meth:`hashlib.hash.hexdigest`. Returns: str: hash """ with open(path, "rb") as f: sha256 = hashlib.sha256() for chunk in iter(lambda: f.read(sha256.block_size * 1000), b""): sha256.update(chunk) return sha256.hexdigest()
# ----------------------------------------------------------
[docs]def hashDataset(database_name, protocol_name, set_name): """Hashes a Dataset Parameters: database_name (str): Database name protocol_name (str): Database protocol name set_name (str): Name of the set in the database Returns: str: hash """ s = ( _compact( """{ "database": "%s", "protocol": "%s", "set": "%s" }""" ) % (database_name, protocol_name, set_name) ) return hash(s)