Source code for beat.web.search.models

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.web module of the BEAT platform.              #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

import copy
import datetime
import operator

import simplejson as json
from django.contrib.auth.models import User
from django.db import models
from django.urls import reverse

from beat.core.hash import hash

from ..algorithms.models import Algorithm
from ..common.models import Contribution
from ..common.texts import Messages
from ..experiments.models import Experiment
from ..experiments.models import Result

# date/time for the 1st january 1970, UTC
EPOCH = datetime.datetime.utcfromtimestamp(0)


[docs]class Search(Contribution):
    class Meta:

        verbose_name_plural = "searches"

    filters = models.TextField(default="", blank=True)
    settings = models.TextField(default="", blank=True)
    description = models.TextField(
        default="", blank=True, help_text=Messages["description"]
    )

[docs]    def fullname(self):
        return "{}/{}".format(self.author.username, self.name)

[docs]    def save(self, *args, **kwargs):
        # Compute the hash
        self.hash = hash(self.filters + self.settings)

        # Invoke the base implementation
        super(Search, self).save(*args, **kwargs)

        # If there is a leaderboard, make sure to update it as well
        if hasattr(self, "leaderboard"):
            self.leaderboard.save()  # force update

[docs]    def get_absolute_url(self):
        return reverse("search:view", args=(self.author.username, self.name,),)

[docs]    def get_notify_url(self):
        return reverse("search:notify", args=(self.author.username, self.name,),)

[docs]    def get_api_update_url(self):
        """Returns the endpoint to update this object"""

        return reverse("api_search:save", args=(self.author.username, self.name,),)

[docs]    def get_api_share_url(self):
        """Returns the endpoint to share this object"""

        return reverse("api_search:share", args=(self.author.username, self.name,),)

[docs]    def has_leaderboard(self):
        return hasattr(self, "leaderboard")


[docs]class LeaderboardManager(models.Manager):
[docs]    def get_by_natural_key(self, username, name, version):
        return self.get(
            search__author__username=username,
            search__name=name,
            search__version=version,
        )


[docs]class Leaderboard(models.Model):
    """Keeps track of experiments"""

    search = models.OneToOneField(
        Search, related_name="leaderboard", on_delete=models.CASCADE
    )

    created = models.DateTimeField(auto_now_add=True)
    updated = models.DateTimeField(auto_now=True)
    changed = models.DateTimeField(default=datetime.datetime.now)
    notify = models.ManyToManyField(
        User,
        limit_choices_to={"is_active": True},
        blank=True,
        help_text="If set, and the leader board changes, an e-mail notification will be sent to people on this list, every time it changes.",
    )

    experiments = models.ManyToManyField(
        Experiment,
        through="Rank",
        related_name="leaderboards",
        blank=True,
        help_text="Experiments currently set on the leaderboard",
    )

    objects = LeaderboardManager()

    def __str__(self):
        return str(self.search)

[docs]    def natural_key(self):
        return (self.search.author.username, self.search.name, self.search.version)

    natural_key.dependencies = ["search.search"]

[docs]    def get_absolute_url(self):
        return reverse(
            "search:view", args=(self.search.author.username, self.search.name,),
        )

[docs]    def current_experiments(self, time_delta=None):
        """Returns a list of experiments, sorted by criteria in settings

        If you specify a ``time_delta``, then we won't consider experiments
        which are newer than ``now - time_delta``. ``time_delta`` should be set
        as a ``datetime.timedelta`` object.
        """

        # reset experiments
        from .views import search_experiments

        filters = json.loads(self.search.filters)
        results = search_experiments(self.search.author, filters, time_delta)

        # creates multiple tables (per common analyzer), with experiments
        # and existing table-able results (simple numbers)
        sorted_experiments = []
        for analyzer, blocks in results["common_analyzers"]:
            table = []
            header = []
            for block in blocks:

                analyzer_output = Result.objects.filter(
                    cache__in=block.outputs.all(), type__in=Result.SIMPLE_TYPE_NAMES
                ).order_by("name")

                if not header:  # first row, set order
                    header = [k.name for k in analyzer_output]

                table.append([block.experiment] + list(analyzer_output))

            sorted_experiments.append((analyzer, header, table))

        # loads the sorting, applies it to each relevant table
        ordering = json.loads(self.search.settings)

        for entry in ordering:
            if not entry["analyzers"]:
                continue

            # if an analyzer is set, apply ordering to each relevant table
            analyzer = entry["analyzers"][0]

            for index, (algorithm, header, table) in enumerate(sorted_experiments):

                if analyzer == algorithm.fullname():  # applies the sorting

                    getters = []  # getters for sorting
                    tmp_table = copy.deepcopy(table)

                    for row in tmp_table:
                        # 1. replace result by value on tmp_table
                        for k, element in enumerate(row):
                            if isinstance(element, Result):
                                row[k] = element.value()
                        # 2. append experiment end date (in seconds) to the end
                        row.append(-1 * (row[0].end_date - EPOCH).total_seconds())

                    # replaces experiments with indexes
                    for i, row in enumerate(table):
                        tmp_table[i][0] = i

                    # get index of columns to use for sorting
                    for col in entry["columns"]:
                        g = header.index(col["name"].rsplit(".", 1)[1]) + 1
                        if not col["ascending"]:
                            for row in tmp_table:
                                row[g] *= -1
                        getters.append(g)

                    # if there are matching rows, sort by date (latest first)
                    getters.append(len(tmp_table[0]) - 1)

                    # sort table tmp_table and apply results back
                    tmp_table.sort(key=operator.itemgetter(*getters))

                    # apply indexes to the original result table
                    table = [table[k[0]] for k in tmp_table]

                    # remove unwanted columns
                    del getters[-1]  # remove date element from list
                    getters.insert(0, 0)  # keep experiment pointers
                    for j, row in enumerate(table):
                        table[j] = [v for k, v in enumerate(row) if k in getters]

                    # remove unwanted headers
                    del getters[0]  # not useful for header manipulation
                    header = [v for k, v in enumerate(header) if k + 1 in getters]

                sorted_experiments[index] = (algorithm, header, table)

        return sorted_experiments

[docs]    def table(self):
        """Returns the leader board tables for all algorithms"""

        retval = []

        for algo in Algorithm.objects.filter(rank__leaderboard=self).distinct():

            table = []

            for rank in (
                Rank.objects.filter(leaderboard=self, algorithm=algo)
                .distinct()
                .order_by("order")
            ):
                results = rank.result.order_by("name")
                if not table:  # add header
                    table.append(["experiment"] + [k.name for k in results])
                table.append([rank.experiment] + [k.value() for k in results])

            retval.append((algo, table))

        return retval

[docs]    def update_experiments(self):
        """Updates internal experiment table, returns ``True`` if changed"""

        prev_table = self.table()

        self.experiments.clear()

        # at this point, all tables are sorted, set results
        for algorithm, header, table in self.current_experiments():
            for k, row in enumerate(table):
                r = Rank(
                    leaderboard=self, experiment=row[0], algorithm=algorithm, order=k
                )
                r.save()
                for j in row[1:]:
                    r.result.add(j)

        return prev_table != self.table()

[docs]    def save(self, *args, **kwargs):
        """Overload of Django's built-in"""

        super(Leaderboard, self).save(*args, **kwargs)

        if self.update_experiments():  # changed
            self.changed = datetime.datetime.now()
            super(Leaderboard, self).save(*args, **kwargs)


[docs]class Rank(models.Model):
    """Keeps experiments ordered on the experiments relationship for searches"""

    leaderboard = models.ForeignKey(Leaderboard, on_delete=models.CASCADE)
    experiment = models.ForeignKey(Experiment, on_delete=models.CASCADE)
    algorithm = models.ForeignKey(Algorithm, on_delete=models.CASCADE)
    order = models.PositiveIntegerField()
    result = models.ManyToManyField(Result)

    def __str__(self):

        return "%s: [%d] %s" % (
            self.leaderboard.search.fullname(),
            self.order,
            self.experiment.fullname(),
        )