Source code for beat.web.search.api

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.web module of the BEAT platform.              #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Q
from django.shortcuts import get_object_or_404
from django.utils import six

from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework import permissions
from rest_framework import generics
from rest_framework import status

from .utils import apply_filter
from .utils import FilterGenerator
from .utils import OR

from ..algorithms.models import Algorithm
from ..databases.models import Database
from ..dataformats.models import DataFormat
from ..experiments.models import Experiment
from ..toolchains.models import Toolchain
from ..common.models import Shareable
from ..common.mixins import IsAuthorOrReadOnlyMixin
from ..common.api import ShareView
from ..common.utils import ensure_html

from .models import Search

from ..common.responses import BadRequestResponse
from ..common.mixins import CommonContextMixin, SerializerFieldsMixin

from ..ui.templatetags.gravatar import gravatar_hash

from .serializers import SearchResultSerializer, SearchSerializer, SearchWriteSerializer

import simplejson as json


#------------------------------------------------


[docs]class SearchView(APIView):
    """
    Search endpoint

    This view allows to run a search through the various
    elements composing the beat platform either using a
    query which will run a global search or by using filters
    that will restrict the search to the given domains.

    Available filters are:
    'results', 'toolchains', 'algorithms', 'dataformats', 'databases', 'users'
    Available options are:
    'order-by'

    """
    permission_classes = [permissions.AllowAny]


    FILTER_IEXACT      = 0
    FILTER_ICONTAINS   = 1
    FILTER_ISTARTSWITH = 2
    FILTER_IENDSWITH   = 3


[docs]    @staticmethod
    def build_name_and_description_query(keywords):
        return reduce(lambda a, b: a & b, map(lambda keyword: Q(name__icontains=keyword) |
                                              Q(short_description__icontains=keyword), keywords))

[docs]    def post(self, request):
        data = request.data

        query = None
        filters = None
        display_settings = None

        if 'query' in data:
            if not(isinstance(data['query'], six.string_types)) or \
               (len(data['query']) == 0):
                return BadRequestResponse('Invalid query data')

            query = data['query']
        else:
            if not(isinstance(data['filters'], list)) or (len(data['filters']) == 0):
                return BadRequestResponse('Invalid filter data')

            filters = data['filters']

            if 'settings' in data:
                display_settings = data['settings']


        # Process the query
        scope_database  = None
        scope_type      = None
        scope_toolchain = None
        scope_algorithm = None
        scope_analyzer  = None
        keywords        = []

        if filters is None:
            for keyword in map(lambda x: x.strip(), query.split(' ')):
                offset = keyword.find(':')
                if offset != -1:
                    command = keyword[:offset]
                    keyword = keyword[offset+1:]

                    if command in ['db', 'database']:
                        scope_database = keyword.split(',')
                    elif command in ['tc', 'toolchain']:
                        scope_toolchain = keyword.split(',')
                    elif command in ['algo', 'algorithm']:
                        scope_algorithm = keyword.split(',')
                    elif command == 'analyzer':
                        scope_analyzer = keyword.split(',')
                    elif command == 'type':
                        if keyword in ['results', 'toolchains', 'algorithms', 'analyzers',
                                       'dataformats', 'databases', 'users']:
                            scope_type = keyword
                else:
                    keywords.append(keyword)

            if (scope_type is None) or (scope_type == 'results'):
                filters = []

                if scope_toolchain is not None:
                    if len(scope_toolchain) > 1:
                        filters.append({
                            'context': 'toolchain',
                            'name': None,
                            'operator': 'contains-any-of',
                            'value': scope_toolchain,
                        })
                    elif len(scope_toolchain) == 1:
                        filters.append({
                            'context': 'toolchain',
                            'name': None,
                            'operator': 'contains',
                            'value': scope_toolchain[0],
                        })

                if scope_algorithm is not None:
                    if len(scope_algorithm) > 1:
                        filters.append({
                            'context': 'algorithm',
                            'name': None,
                            'operator': 'contains-any-of',
                            'value': scope_algorithm,
                        })
                    elif len(scope_algorithm) == 1:
                        filters.append({
                            'context': 'algorithm',
                            'name': None,
                            'operator': 'contains',
                            'value': scope_algorithm[0],
                        })

                if scope_analyzer is not None:
                    if len(scope_analyzer) > 1:
                        filters.append({
                            'context': 'analyzer',
                            'name': None,
                            'operator': 'contains-any-of',
                            'value': scope_analyzer,
                        })
                    elif len(scope_analyzer) == 1:
                        filters.append({
                            'context': 'analyzer',
                            'name': None,
                            'operator': 'contains',
                            'value': scope_analyzer[0],
                        })

                if scope_database is not None:
                    if len(scope_database) > 1:
                        filters.append({
                            'context': 'database-name',
                            'name': None,
                            'operator': 'contains-any-of',
                            'value': scope_database,
                        })
                    elif len(scope_database) == 1:
                        filters.append({
                            'context': 'database-name',
                            'name': None,
                            'operator': 'contains',
                            'value': scope_database[0],
                        })

                if len(keywords) > 0:
                    filters.append({
                        'context': 'any-field',
                        'name': None,
                        'operator': 'contains-any-of',
                        'value': keywords,
                    })
        else:
            scope_type = 'results'


        result = {
            'users':       [],
            'toolchains':  [],
            'algorithms':  [],
            'analyzers':   [],
            'dataformats': [],
            'databases':   [],
            'results':     [],
            'filters':     filters,
            'settings':    display_settings,
            'query': {
                'type': scope_type,
            },
        }


        # Search for users matching the query
        if (scope_database is None) and (scope_toolchain is None) and \
           (scope_algorithm is None) and (scope_analyzer is None) and \
           ((scope_type is None) or (scope_type == 'users')):
            result['users'] = []
            if len(keywords) > 0:
                q = reduce(lambda a, b: a & b, map(lambda keyword: Q(username__icontains=keyword), keywords))
                users = User.objects.filter(q).exclude(username__in=settings.ACCOUNTS_TO_EXCLUDE_FROM_SEARCH).order_by('username')

                result['users'] = map(lambda u: { 'username': u.username,
                                                  'gravatar_hash': gravatar_hash(u.email),
                                                  'join_date': u.date_joined.strftime('%b %d, %Y')
                                      }, users)

        query = None
        if len(keywords) > 0:
            query = self.build_name_and_description_query(keywords)

        # Search for toolchains matching the query
        if (scope_database is None) and (scope_algorithm is None) and \
           (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'toolchains')):
            result['toolchains'] = self._retrieve_contributions(
                Toolchain.objects.for_user(request.user, True),
                scope_toolchain, query
            )

        # Search for algorithms matching the query
        if (scope_database is None) and (scope_toolchain is None) and \
           (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'algorithms')):
            result['algorithms'] = self._retrieve_contributions(
                Algorithm.objects.for_user(request.user, True).filter(result_dataformat__isnull=True),
                scope_algorithm, query
            )

        # Search for analyzers matching the query
        if (scope_database is None) and (scope_toolchain is None) and \
           (scope_algorithm is None) and ((scope_type is None) or (scope_type == 'analyzers')):
            result['analyzers'] = self._retrieve_contributions(
                Algorithm.objects.for_user(request.user, True).filter(result_dataformat__isnull=False),
                scope_analyzer, query
            )

        # Search for data formats matching the query
        if (scope_database is None) and (scope_toolchain is None) and \
           (scope_algorithm is None) and (scope_analyzer is None) and \
           ((scope_type is None) or (scope_type == 'dataformats')):
            dataformats = DataFormat.objects.for_user(request.user, True)
            if query:
                dataformats = dataformats.filter(query)

            serializer = SearchResultSerializer(dataformats, many=True)
            result['dataformats'] = serializer.data

        # Search for databases matching the query
        if (scope_toolchain is None) and (scope_algorithm is None) and \
           (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'databases')):
            result['databases'] = self._retrieve_databases(Database.objects.for_user(request.user, True), scope_database, query)

        # Search for experiments matching the query
        if ((scope_type is None) or (scope_type == 'results')):
            result['results'] = self._retrieve_experiment_results(request.user, filters)

        # Sort the results
        result['toolchains'].sort(lambda x, y: cmp(x['name'], y['name']))
        result['algorithms'].sort(lambda x, y: cmp(x['name'], y['name']))
        result['analyzers'].sort(lambda x, y: cmp(x['name'], y['name']))
        result['dataformats'].sort(lambda x, y: cmp(x['name'], y['name']))
        result['databases'].sort(lambda x, y: cmp(x['name'], y['name']))

        return Response(result)


    def _retrieve_contributions(self, queryset, scope, query):
        generator = FilterGenerator()

        scope_filters = []
        if scope is not None:
            for contribution_name in scope:
                scope_filters.append(generator.process_contribution_name(contribution_name))

        if len(scope_filters):
            queryset = queryset.filter(OR(scope_filters))

        if query:
            queryset = queryset.filter(query)

        serializer = SearchResultSerializer(queryset, many=True)
        return serializer.data


    def _retrieve_databases(self, queryset, scope, query):
        generator = FilterGenerator()

        scope_filters = []
        if scope is not None:
            for dataset_name in scope:
                scope_filters.append(generator.process_dataset_name(dataset_name))

        if len(scope_filters):
            queryset = queryset.filter(OR(scope_filters))

        if query:
            queryset = queryset.filter(query)

        queryset= queryset.distinct()

        serializer = SearchResultSerializer(queryset, many=True, name_field='name')
        return serializer.data


    def _retrieve_experiment_results(self, user, filters):
        results = {
            'experiments':      [],
            'dataformats':      {},
            'common_analyzers': [],
            'common_protocols': [],
        }

        if len(filters) == 0:
            return results


        # Use the experiment filters
        experiments = Experiment.objects.for_user(user, True).filter(status=Experiment.DONE)

        for filter_entry in filters:
            experiments = apply_filter(experiments, filter_entry)

        experiments = experiments.distinct()

        if experiments.count() == 0:
            return results


        # Retrieve informations about each experiment and determine if there is at least
        # one common analyzer
        common_protocols = None
        common_analyzers = None

        for experiment in experiments.iterator():
            experiment_entry = {
                'name':               experiment.fullname(),
                'toolchain':          experiment.toolchain.fullname(),
                'description':        experiment.short_description,
                'public':             (experiment.sharing == Shareable.PUBLIC),
                'attestation_number': None,
                'attestation_locked': False,
                'end_date':           experiment.end_date,
                'protocols':          list(set(map(lambda x: x.protocol.fullname(), experiment.referenced_datasets.iterator()))),
                'analyzers':          [],
            }

            if experiment.has_attestation():
                experiment_entry['attestation_number'] = experiment.attestation.number
                experiment_entry['attestation_locked'] = experiment.attestation.locked

            experiment_analyzers = []
            for analyzer_block in experiment.blocks.filter(analyzer=True).iterator():
                analyzer_entry = {
                    'name':    analyzer_block.algorithm.fullname(),
                    'block':   analyzer_block.name,
                    'results': {},
                }

                experiment_entry['analyzers'].append(analyzer_entry)
                experiment_analyzers.append(analyzer_entry['name'])

                if analyzer_entry['name'] not in results['dataformats']:
                    results['dataformats'][analyzer_entry['name']] = json.loads(analyzer_block.algorithm.result_dataformat)

            if common_analyzers is None:
                common_analyzers = experiment_analyzers
            elif len(common_analyzers) > 0:
                common_analyzers = filter(lambda x: x in experiment_analyzers, common_analyzers)

            if common_protocols is None:
                common_protocols = experiment_entry['protocols']
            elif len(common_protocols) > 0:
                common_protocols = filter(lambda x: x in experiment_entry['protocols'], common_protocols)

            results['experiments'].append(experiment_entry)

        results['common_analyzers'] = common_analyzers
        results['common_protocols'] = common_protocols


        # No common analyzer found, don't retrieve any result
        if len(common_analyzers) == 0:
            results['dataformats'] = {}
            return results


        # Retrieve the results of each experiment
        for index, experiment in enumerate(experiments.iterator()):
            for analyzer_block in experiment.blocks.filter(analyzer=True).iterator():
                analyzer_entry = filter(lambda x: x['block'] == analyzer_block.name,
                                        results['experiments'][index]['analyzers'])[0]

                for analyzer_result in analyzer_block.results.iterator():
                    analyzer_entry['results'][analyzer_result.name] = {
                        'type':    analyzer_result.type,
                        'primary': analyzer_result.primary,
                        'value':   analyzer_result.value()
                    }

        return results


#------------------------------------------------


[docs]class SearchSaveView(CommonContextMixin, SerializerFieldsMixin, generics.CreateAPIView, generics.UpdateAPIView):
    """
    This endpoint allows to save and update a search query

    Saving a search allows to re-run the same query later
    without having to redo the query/filtering which might get
    complex.

    Note that two consecutive runs of a search might yield
    different results
    """

    model = Search
    permission_classes = [permissions.IsAuthenticated]
    serializer_class = SearchWriteSerializer

[docs]    def build_results(self, request, search):
        result = {}

        fields_to_return = self.get_serializer_fields(request)
        # Retrieve the description in HTML format
        if 'html_description' in fields_to_return:
            description = search.description
            if len(description) > 0:
                result['html_description'] = ensure_html(description)
            else:
                result['html_description'] = ''
        return result

[docs]    def post(self, request):
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        search = serializer.save()
        result = self.build_results(request, search)
        result['fullname'] = search.fullname()
        result['url'] = search.get_absolute_url()
        return Response(result, status=status.HTTP_201_CREATED)

[docs]    def put(self, request, author_name, name):
        search = get_object_or_404(Search, author__username=author_name, name=name)
        serializer = self.get_serializer(instance=search, data=request.data, partial=True)
        serializer.is_valid(raise_exception=True)
        serializer.save()
        result = self.build_results(request, search)
        return Response(result)


#------------------------------------------------


[docs]class ListSearchView(CommonContextMixin, generics.ListAPIView):
    """
    Lists all available search from a user
    """
    permission_classes = [permissions.AllowAny]
    serializer_class = SearchSerializer

[docs]    def get_queryset(self):
        author_name = self.kwargs['author_name']
        return Search.objects.for_user(self.request.user, True).select_related().filter(author__username=author_name)


#----------------------------------------------------------


[docs]class RetrieveDestroySearchAPIView(CommonContextMixin, SerializerFieldsMixin, IsAuthorOrReadOnlyMixin, generics.RetrieveDestroyAPIView):
    """
    Delete the given search
    """
    model = Search
    serializer_class = SearchSerializer


[docs]    def get_object(self):
        author_name = self.kwargs.get('author_name')
        name = self.kwargs.get('object_name')
        user = self.request.user
        return get_object_or_404(self.model.objects.for_user(user, True),
                                 author__username=author_name,
                                 name=name)

[docs]    def get(self, request, *args, **kwargs):
        search = self.get_object()
        # Process the query string
        allow_sharing = request.user == search.author

        fields_to_return = self.get_serializer_fields(request, allow_sharing=allow_sharing)

        serializer = self.get_serializer(search, fields=fields_to_return)
        return Response(serializer.data)

#------------------------------------------------


[docs]class ShareSearchView(ShareView):
    """
    Share the given search with other users/teams
    """
    model = Search
    permission_classes = [permissions.AllowAny]

[docs]    def get_queryset(self):
        self.kwargs['version'] = 1
        return super(ShareSearchView, self).get_queryset()