Source code for beat.core.execution.subprocess

# vim: set fileencoding=utf-8 :

###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################


"""
==========
subprocess
==========

Execution utilities
"""
import logging
import os
import shutil
import subprocess as sp  # nosec
import sys
import tempfile

from beat.backend.python.execution import MessageHandler

from .. import stats
from .. import utils
from .remote import RemoteExecutor

logger = logging.getLogger(__name__)


def _which(program):
    """Pythonic version of the `which` command-line application"""

    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath and is_exe(program):
        return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, fname)
            if is_exe(exe_file):
                return exe_file

    return None


[docs]class SubprocessExecutor(RemoteExecutor):
    """SubprocessExecutor runs the code given an execution block information,
    using a subprocess


    Parameters:

      prefix (str): Establishes the prefix of your installation.

      data (dict, str): The piece of data representing the block to be executed.
        It must validate against the schema defined for execution blocks. If a
        string is passed, it is supposed to be a fully qualified absolute path to
        a JSON file containing the block execution information.

      cache (:py:class:`str`, Optional): If your cache is not located under
        ``<prefix>/cache``, then specify a full path here. It will be used
        instead.

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up database loading times as dataformats
        that are already loaded may be re-used. If you use this parameter, you
        must guarantee that the cache is refreshed as appropriate in case the
        underlying dataformats change.

      database_cache (:py:class:`dict`, Optional): A dictionary mapping
        database names to loaded databases. This parameter is optional and, if
        passed, may greatly speed-up database loading times as databases that
        are already loaded may be re-used. If you use this parameter, you must
        guarantee that the cache is refreshed as appropriate in case the
        underlying databases change.

      algorithm_cache (:py:class:`dict`, Optional): A dictionary mapping
        algorithm names to loaded algorithms. This parameter is optional and,
        if passed, may greatly speed-up database loading times as algorithms
        that are already loaded may be re-used. If you use this parameter, you
        must guarantee that the cache is refreshed as appropriate in case the
        underlying algorithms change.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used. If you use this parameter, you must
        guarantee that the cache is refreshed as appropriate in case the
        underlying libraries change.

      custom_root_folders (dict): A dictionary mapping databases name and
        their location on disk
      ip_address (str): IP address of the machine to connect to for the database
        execution and message handlers.
      python_path (str): Path to the python executable of the environment to use
        for experiment execution.

    Attributes:

      cache (str): The path to the cache currently being used

      errors (list): A list containing errors found while loading this execution
        block.

      data (dict): The original data for this executor, as loaded by our JSON
        decoder.

      algorithm (beat.core.algorithm.Algorithm): An object representing the
        algorithm to be run.

      databases (dict): A dictionary in which keys are strings with database
        names and values are :py:class:`.database.Database`, representing the
        databases required for running this block. The dictionary may be empty
        in case all inputs are taken from the file cache.

      views (dict): A dictionary in which the keys are tuples pointing to the
        ``(<database-name>, <protocol>, <set>)`` and the value is a setup view
        for that particular combination of details. The dictionary may be empty
        in case all inputs are taken from the file cache.

      input_list (beat.backend.python.inputs.InputList): A list of inputs that
        will be served to the algorithm.

      output_list (beat.backend.python.outputs.OutputList): A list of outputs
        that the algorithm will produce.

      data_sources (list): A list with all data-sources created by our execution
        loader.

      data_sinks (list): A list with all data-sinks created by our execution
        loader. These are useful for clean-up actions in case of problems.

    """

    def __init__(
        self,
        prefix,
        data,
        cache=None,
        dataformat_cache=None,
        database_cache=None,
        algorithm_cache=None,
        library_cache=None,
        custom_root_folders=None,
        ip_address="127.0.0.1",
        python_path=None,
    ):
        super(SubprocessExecutor, self).__init__(
            prefix,
            data,
            ip_address,
            cache=cache,
            dataformat_cache=dataformat_cache,
            database_cache=database_cache,
            algorithm_cache=algorithm_cache,
            library_cache=library_cache,
            custom_root_folders=custom_root_folders,
        )

        if python_path is None:
            base_path = os.path.dirname(sys.argv[0])

            # We need three apps to run this function: databases_provider and execute
            self.EXECUTE_BIN = _which(os.path.join(base_path, "execute"))
            self.LOOP_EXECUTE_BIN = _which(os.path.join(base_path, "loop_execute"))
            self.DBPROVIDER_BIN = _which(os.path.join(base_path, "databases_provider"))
        else:
            base_path = os.path.dirname(python_path)
            self.EXECUTE_BIN = os.path.join(base_path, "execute")
            self.LOOP_EXECUTE_BIN = os.path.join(base_path, "loop_execute")
            self.DBPROVIDER_BIN = os.path.join(base_path, "databases_provider")

        if any(
            [
                not os.path.exists(executable)
                for executable in [
                    self.EXECUTE_BIN,
                    self.LOOP_EXECUTE_BIN,
                    self.DBPROVIDER_BIN,
                ]
            ]
        ):
            raise RuntimeError("Invalid environment")

    def __create_db_process(self, configuration_name=None):
        databases_process = None
        databases_configuration_path = None
        database_port = None

        database_port = utils.find_free_port()

        # Configuration and needed files
        databases_configuration_path = utils.temporary_directory()
        self.dump_databases_provider_configuration(databases_configuration_path)

        # Creation of the subprocess
        # Note: we only support one databases image loaded at the same time

        cmd = [
            self.DBPROVIDER_BIN,
            self.ip_address + (":%d" % database_port),
            databases_configuration_path,
            self.cache,
        ]

        if logger.getEffectiveLevel() <= logging.DEBUG:
            cmd.insert(1, "--debug")

        if configuration_name is not None:
            cmd.append(configuration_name)

        databases_process_stdout = tempfile.NamedTemporaryFile(delete=False)
        databases_process_stderr = tempfile.NamedTemporaryFile(delete=False)

        logger.debug("Starting database process with: %s" % " ".join(cmd))

        databases_process = sp.Popen(
            cmd, stdout=databases_process_stdout, stderr=databases_process_stderr
        )

        retval = dict(
            configuration_path=databases_configuration_path,
            process=databases_process,
            port=database_port,
            stdout=databases_process_stdout,
            stderr=databases_process_stderr,
        )
        return retval

[docs]    def process(
        self, virtual_memory_in_megabytes=0, max_cpu_percent=0, timeout_in_minutes=0
    ):
        """Executes the user algorithm code using an external program.

        The execution interface follows the backend API as described in our
        documentation.

        We use green subprocesses this implementation. Each co-process is linked
        to us via 2 uni-directional pipes which work as datain and dataout
        end-points. The parent process (i.e. the current one) establishes the
        connection to the child and then can pass/receive commands, data and logs.

        Usage of the data pipes (datain, dataout) is **synchronous** - you send a
        command and block for an answer. The co-process is normally controlled by
        the current process, except for data requests, which are user-code driven.
        The nature of our problem does not require an *asynchronous* implementation
        which, in turn, would require a much more complex set of dependencies (on
        asyncio or Twisted for example).


        Parameters:

          virtual_memory_in_megabytes (:py:class:`int`, Optional): The amount
            of virtual memory (in Megabytes) available for the job. If set to
            zero, no limit will be applied.

          max_cpu_percent (:py:class:`int`, Optional): The maximum amount of
            CPU usage allowed in a system. This number must be an integer
            number between 0 and ``100*number_of_cores`` in your system. For
            instance, if your system has 2 cores, this number can go between 0
            and 200. If it is <= 0, then we don't track CPU usage.

          timeout_in_minutes (int): The number of minutes to wait for the user
            process to execute. After this amount of time, the user process is
            killed with ``signal.SIGKILL``. If set to zero, no timeout will
            be applied.

        Returns:

          dict: A dictionary which is JSON formattable containing the summary of
            this block execution.

        """

        if not self.valid:
            raise RuntimeError(
                "execution information is bogus:\n  * %s" % "\n  * ".join(self.errors)
            )

        missing_scripts = []
        if (len(self.databases) > 0) and self.DBPROVIDER_BIN is None:
            missing_scripts.append("databases_provider")

        if self.EXECUTE_BIN is None:
            missing_scripts.append("execute")

        if self.LOOP_EXECUTE_BIN is None:
            missing_scripts.append("loop_execute")

        if missing_scripts:
            raise RuntimeError(
                "Scripts not found at PATH (%s): %s"
                % (os.environ.get("PATH", ""), ", ".join(missing_scripts))
            )

        # Creates the message handler
        algorithm_process = None

        def _kill():
            algorithm_process.terminate()

        self.message_handler = MessageHandler(self.ip_address, kill_callback=_kill)

        # ----- (If necessary) Instantiate the subprocess that provide the databases

        database_infos = {}

        if len(self.databases) > 0:
            database_infos["db"] = self.__create_db_process()

        # Configuration and needed files
        configuration_path = utils.temporary_directory()
        self.dump_runner_configuration(configuration_path)

        # import ipdb;ipdb.set_trace()

        # ----- Instantiate the algorithm subprocess
        loop_algorithm_port = None
        loop_algorithm_process = None

        if self.loop_algorithm is not None:
            if len(self.databases) > 0:
                database_infos["loop_db"] = self.__create_db_process("loop")

            loop_algorithm_port = utils.find_free_port()
            cmd = [
                self.LOOP_EXECUTE_BIN,
                self.ip_address + (":%d" % loop_algorithm_port),
                configuration_path,
                self.cache,
            ]

            if len(self.databases) > 0:
                cmd.append(
                    "tcp://"
                    + self.ip_address
                    + (":%d" % database_infos["loop_db"]["port"])
                )

            if logger.getEffectiveLevel() <= logging.DEBUG:
                cmd.insert(1, "--debug")

            # Creation of the container
            loop_algorithm_process_stdout = tempfile.NamedTemporaryFile(delete=False)
            loop_algorithm_process_stderr = tempfile.NamedTemporaryFile(delete=False)

            logger.debug("Starting loop process with: %s" % " ".join(cmd))

            loop_algorithm_process = sp.Popen(
                cmd,
                stdout=loop_algorithm_process_stdout,
                stderr=loop_algorithm_process_stderr,
            )

            # Process the messages until the subprocess is done
            # self.loop_message_handler.start()

        # ----- Instantiate the algorithms subprocess

        # Command to execute
        cmd = [
            self.EXECUTE_BIN,
            "--cache=%s" % self.cache,
            self.message_handler.address,
            configuration_path,
        ]

        if len(self.databases) > 0:
            cmd.append(
                "tcp://" + self.ip_address + (":%d" % database_infos["db"]["port"])
            )

        if self.loop_algorithm is not None:
            cmd.append(
                "--loop=tcp://" + self.ip_address + (":%d" % loop_algorithm_port)
            )

        if logger.getEffectiveLevel() <= logging.DEBUG:
            cmd.insert(1, "--debug")

        # Creation of the container
        algorithm_process_stdout = tempfile.NamedTemporaryFile(delete=False)
        algorithm_process_stderr = tempfile.NamedTemporaryFile(delete=False)

        logger.debug("Starting algorithm process with: %s" % " ".join(cmd))

        algorithm_process = sp.Popen(
            cmd, stdout=algorithm_process_stdout, stderr=algorithm_process_stderr
        )

        # Process the messages until the subprocess is done
        self.message_handler.start()

        try:
            algorithm_process.communicate()
            status = algorithm_process.returncode

        except KeyboardInterrupt:  # Developer pushed CTRL-C
            logger.info("stopping user process on CTRL-C console request")
            algorithm_process.kill()
            status = algorithm_process.wait()

        except Exception:
            algorithm_process.kill()
            status = algorithm_process.wait()

        finally:
            self.message_handler.stop.set()

        # Collects final information and returns to caller
        with open(algorithm_process_stdout.name, "r") as f:
            stdout = f.read()

        stderr = ""
        if status != 0:
            with open(algorithm_process_stderr.name, "r") as f:
                stderr = f.read()

        retval = dict(
            status=status,
            stdout=stdout,
            stderr=stderr,
            timed_out=False,
            statistics={
                "data": self.message_handler.statistics,
                "cpu": {
                    "user": 0.0,
                    "system": 0.0,
                    "total": 0.0,
                    "percent": 0.0,
                    "processors": 1,
                },
                "memory": {"rss": 0, "limit": 0, "percent": 0.0},
            },
            system_error=self.message_handler.system_error,
            user_error=self.message_handler.user_error,
        )

        stats.update(retval["statistics"]["data"], self.io_statistics)

        for name, db_info in database_infos.items():
            logger.debug("Stopping %s process" % name)
            databases_process = db_info["process"]

            databases_process.terminate()
            databases_process.communicate()
            status = databases_process.returncode

            with open(db_info["stdout"].name, "r") as f:
                retval["stdout"] += "\n" + f.read()

            if status != 0:
                with open(db_info["stderr"].name, "r") as f:
                    retval["stderr"] += "\n" + f.read()

        if loop_algorithm_process is not None:
            logger.debug("Stopping loop process")
            loop_algorithm_process.terminate()
            loop_algorithm_process.communicate()
            status = loop_algorithm_process.returncode

            with open(loop_algorithm_process_stdout.name, "r") as f:
                retval["stdout"] += "\n" + f.read()

            if status != 0:
                with open(loop_algorithm_process_stderr.name, "r") as f:
                    retval["stderr"] += "\n" + f.read()

        self.message_handler.destroy()
        self.message_handler = None

        if not self.debug:
            for _, db_info in database_infos.items():
                shutil.rmtree(db_info["configuration_path"])
            shutil.rmtree(configuration_path)

        return retval