Source code for beat.web.search.views

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.web module of the BEAT platform.              #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

from django.http import Http404
from django.shortcuts import render, redirect
from django.shortcuts import get_object_or_404
from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Count
from django.contrib import messages
from django.contrib.auth.decorators import login_required

from .models import Search
from .utils import apply_filter

from ..team.models import Team
from ..ui.templatetags.markup import restructuredtext
from ..experiments.models import Experiment, Block
from ..algorithms.models import Algorithm
from ..databases.models import DatabaseProtocol, DatabaseSet
from ..toolchains.models import Toolchain

import datetime
import simplejson as json


[docs]def search_experiments(user, filters, time_delta=None):
    """Retrieves all experiment results and parses for commonalities.

    Parameters:

      user (User): The user making the request and for which we will filter
        results for

      filters (list): A set of filters defined by the user on how to subselect
        experiments

      time_delta (datetime.timedelta): A optional time delta to restrict the
        search to experiments which are older than a given delta.

    Returns:

      dict: Containing the following fields:

        'experiments': [Experiment, ...]
        'common_toolchains':[],
        'common_protocols': [Protocol, ...],
        'common_analyzers': [(Algorithm, (Block,...), ...],


    """

    results = {
        'experiments':      [],
        'common_toolchains':[],
        'common_protocols': [],
        'common_analyzers': [],
    }

    if len(filters) == 0: return results

    # Use the experiment filters
    experiments = Experiment.objects.for_user(user, True).filter(status=Experiment.DONE)

    # Apply time delta restriction
    if time_delta is not None:
        past = datetime.datetime.now() - time_delta
        experiments = experiments.filter(end_date__lte=past)

    for filter_entry in filters:
        experiments = apply_filter(experiments, filter_entry)

    if not experiments.count():
        return results

    experiments = experiments.distinct()

    # Make it a list as it is faster to process (don't have to re-run the query)
    experiments = list(experiments)

    # All used toolchains
    toolchains = Toolchain.objects.filter(experiments__in=experiments).distinct()
    if toolchains.count() == 1: results['common_toolchains'] = toolchains

    # Dictionary analyzer (algorithm) -> blocks in each experiment
    # The issue: for each experiment, the analyzer block name and associated
    # analyzer algorithm may be different. We're seeking to merge results that
    # make sense together. Here are the use cases:
    #
    # 1. There is only one analyzer which is used for all experiments:
    #    Subcase 1: Each experiment has 1 analysis block
    #    Subcase 2: Each experiment has multiple analysis blocks using
    #    (necessarily) the same algorithm. In this case it is not easy to
    #    compare the outcomes as we don't know the mapping between the
    #    experiment block names from experiment to experiment so we cannot know
    #    which results should be compared to similar results in the other
    #    experiment. We then try an organization by block name (loosely relies
    #    on experiments having the same toolchain or similar enough)
    #
    # 2. There are multiple analyzers per experiment:
    #    Total chaos ;-) We can still get away with the same analysis as in
    #    item 1, Subcase 2.
    #
    # All of the cases above can be correctly treated with the following
    # algorithm:
    #
    #   For each unique block name, organize under the same list the blocks
    #   from the experiments which have the same block name. Then, try to remap
    #   algorithm -> blocks. If algorithms for the same block name don't match
    #   and there are multiple algorithms being used, then we give up -
    #   experiments in these conditions cannot be compared. If the names of the
    #   blocks are different, but they all use the same algorithm and there is
    #   only 1 analyzer block per experiment, re-group all blocks under the
    #   same list.
    #
    # Example (difficult) use-case:
    #
    # Experiment 1: "analysis"  -> "user/analyzer/1"
    #               "analysis2" -> "user/analyzer/2"
    # Experiment 2: "analysis"  -> "user/analyzer/3"
    #               "analysis2" -> "user/analyzer/1"
    # Experiment 3: "analysis2" -> "user/analyzer/1"
    #               "analysis"  -> "user/analyzer/2"
    #
    # Experiments 1 and 3 are compatible and can be compared (note analysis
    # block names don't match). Experiment 2 analysis2 is compatible with the
    # other 2 experiments.

    # This works if the toolchain is the same or very similar (in terms of
    # block names). Any number of matching blocks is captured.
    blocks = blocks__in=Block.objects.filter(analyzer=True, experiment__in=experiments)

    ordered_blocks = blocks.order_by('name', 'algorithm__author__username', 'algorithm__name', 'algorithm__version')
    unused_blocks = []
    blocks_by_name = {}
    for b in ordered_blocks:
        blocks_by_name.setdefault(b.name, []).append(b)
    for name, elements in blocks_by_name.items():
        algorithms = set([k.algorithm for k in elements])
        if len(algorithms) == 1 and len(elements) == len(experiments):
            # full match, algorithm is a common analyzer across all experiments
            # n.b.: because block names are unique within experiments the above
            # equality works fine.
            results['common_analyzers'].append((algorithms.pop(), elements))

        # else: blocks with the same name use different algorithms
        else:
            unused_blocks += elements

    # Grouping by algorithm is a delicate process as we need to ensure
    # uniqueness: every experiment must have exactly the same number of blocks
    # with the same set of algorithms. We only probe unused blocks that we
    # could not match with experiment block names from the phase above.
    blocks_by_algorithm = {}
    for block in unused_blocks:
        blocks_by_algorithm.setdefault(block.algorithm, []).append(block)
    for algorithm, algo_blocks in blocks_by_algorithm.items():
        uniq_experiments = set([k.experiment for k in algo_blocks])
        if len(algo_blocks) == len(experiments) and \
                len(uniq_experiments) == len(experiments):
            results['common_analyzers'].append((algorithm, algo_blocks))

    # Protocols
    protocols = DatabaseProtocol.objects.filter(sets__in=DatabaseSet.objects.filter(experiments__in=experiments)).distinct()

    if protocols.count() == 1:
        results['common_protocols'] = protocols
    else: #do we have the same number of protocols everywhere?
        # calculates the distinct protocols per experiment
        if all([DatabaseProtocol.objects.filter(sets__in=DatabaseSet.objects.filter(experiments__in=[k])).distinct().count() == protocols.count() for k in experiments]):
            results['common_protocols'] = protocols

    results['experiments'] = experiments
    return results


#------------------------------------------------


[docs]def filters_from_query(query):
    """Breaks down the input query into filters"""

    filters = []
    any_fields = []

    if not query.strip(): return filters #empty

    def _operator_chooser(arr):
        return 'contains' if len(arr) == 1 else 'contains-any-of'

    def _value_chooser(arr):
        return arr[0] if len(arr) == 1 else arr

    def _make_context(name, entries):
        return {
            'context': name,
                'operator': _operator_chooser(entries),
                'value': _value_chooser(entries),
                'name': None,
        }

    keywords = [x.strip() for x in query.split() if x.strip()]

    for keyword in keywords:

        offset = keyword.find(':')
        if offset != -1:
            command = keyword[:offset]
            entries = [x.strip() for x in keyword[offset+1:].split(',')]
            entries = [x for x in entries if x]
            if command in ['db', 'database']:
                filters.append(_make_context('database-name', entries))
            elif command in ['tc', 'toolchain']:
                filters.append(_make_context('toolchain', entries))
            elif command in ['algo', 'algorithm']:
                filters.append(_make_context('algorithm', entries))
            elif command == 'analyzer':
                filters.append(_make_context('analyzer', entries))
            elif command == 'type':
                continue
            else:
                any_fields.extend(entries)
        else:
            any_fields.append(keyword)

    if any_fields:
        filters.append({
            'context': 'any-field',
            'operator': 'contains-any-of',
            'value': any_fields,
            'name': None,
        })

    return filters


#------------------------------------------------


[docs]def search(request):
    """Casual search request"""

    filters = '[]' #by default, no filters, display all visible experiments
    display_settings = [] #by default, no special settings

    # A sequence of filter overrides that must be respected
    if 'query' in request.GET:
        query = request.GET['query'].strip()
        filters = json.dumps(filters_from_query(query))

    if 'query' in request.POST: #overrules the GET request
        query = request.POST['query'].strip()
        filters = json.dumps(filters_from_query(query))

    if 'filters' in request.POST: #overrules the other two
        filters = request.POST['filters'].strip()

    if 'settings' in request.POST: #overrules the other two
        display_settings = request.POST['settings'].strip()

    return render(request,
                  'search/view.html',
                  dict(
                      search=None,
                      owner=False,
                      filters=filters,
                      settings=display_settings,
                      results=search_experiments(request.user, json.loads(filters)),
                      URL_PREFIX=settings.URL_PREFIX,
                  ))


#------------------------------------------------


[docs]def view(request, author_name, query_name):
    """Stored search request"""

    # get the query from the DB
    obj = get_object_or_404(Search,
                            author__username = author_name,
                            name = query_name)

    # makes sure that the current user has access to it
    has_access, _ = obj.accessibility_for(request.user)

    if not has_access: raise Http404()

    # user requests a filter update on a saved search, overrides defaults
    if 'filters' in request.POST:
        filters = request.POST['filters'].strip()
    else: #use the obj filters
        filters = obj.filters

    if 'settings' in request.POST:
        display_settings = request.POST['settings'].strip()
    else: #use the obj settings
        display_settings = obj.settings

    # Users the object can be shared with
    users = User.objects.exclude(username__in=settings.ACCOUNTS_TO_EXCLUDE_FROM_TEAMS).order_by('username')

    return render(request,
                  'search/view.html',
                  {
                      'search': obj,
                      'owner': (request.user == obj.author),
                      'filters': filters,
                      'settings': display_settings,
                      'results': search_experiments(request.user, json.loads(filters)),
                      'URL_PREFIX': settings.URL_PREFIX,
                      'users': users,
                      'teams': Team.objects.for_user(request.user, True)
                  })


#----------------------------------------------------------


[docs]def ls(request, author_name):
    '''List all accessible searches to the request user'''

    if not author_name: return public_ls(request)

    # check that the user exists on the system
    author = get_object_or_404(User, username=author_name)

    # orders searchs so that the latest information is displayed first
    objects = Search.objects.from_author_and_public(request.user,
                                                    author_name).order_by('-creation_date')

    return render(request,
                  'search/list.html',
                  dict(
                      objects=objects,
                      author=author,
                      owner=(request.user==author),
                  ))


#----------------------------------------------------------


[docs]def public_ls(request):
    '''List all public searches'''

    # orders searchs so that the latest information is displayed first
    objects = Search.objects.public().order_by('-creation_date')

    return render(request,
                  'search/list.html',
                  dict(
                      objects=objects,
                      author=request.user,
                      owner=False,
                  ))


#----------------------------------------------------------


[docs]@login_required
def notify(request, author_name, query_name):
    """Toggles notification for user"""

    # get the query from the DB
    obj = get_object_or_404(Search,
                            author__username = author_name,
                            name = query_name)

    # makes sure that the current user has access to it
    has_access, _ = obj.accessibility_for(request.user)

    if not has_access: raise Http404()

    if not obj.has_leaderboard():
        # user page is outdated, reload it
        return redirect(obj)

    if obj.leaderboard.notify.filter(id=request.user.id).exists():
        obj.leaderboard.notify.remove(request.user)
        messages.success(request, "Successfuly unsubscribed %s %s (%s) from leaderboard %s" % (request.user.first_name, request.user.last_name, request.user.username, obj))
    else:
        obj.leaderboard.notify.add(request.user)
        messages.success(request, "Successfuly subscribed %s %s (%s) to leaderboard %s" % (request.user.first_name, request.user.last_name, request.user.username, obj))

    if 'HTTP_REFERER' in request.META:
        return redirect(request.META['HTTP_REFERER'])

    return redirect(obj)