Source code for beat.web.search.api

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.web module of the BEAT platform.              #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Q
from django.shortcuts import get_object_or_404
from django.utils import six

from rest_framework.response import Response
from rest_framework.views import APIView
from rest_framework import permissions
from rest_framework import generics
from rest_framework import status

from .utils import apply_filter
from .utils import FilterGenerator
from .utils import OR

from ..algorithms.models import Algorithm
from ..databases.models import Database
from ..dataformats.models import DataFormat
from ..experiments.models import Experiment
from ..toolchains.models import Toolchain
from ..common.models import Shareable
from ..common.mixins import IsAuthorOrReadOnlyMixin
from ..common.api import ShareView
from ..common.utils import ensure_html

from .models import Search

from ..common.responses import BadRequestResponse
from ..common.mixins import CommonContextMixin, SerializerFieldsMixin

from ..ui.templatetags.gravatar import gravatar_hash

from .serializers import SearchResultSerializer, SearchSerializer, SearchWriteSerializer

import simplejson as json


#------------------------------------------------


[docs]class SearchView(APIView): """ Search endpoint This view allows to run a search through the various elements composing the beat platform either using a query which will run a global search or by using filters that will restrict the search to the given domains. Available filters are: 'results', 'toolchains', 'algorithms', 'dataformats', 'databases', 'users' Available options are: 'order-by' """ permission_classes = [permissions.AllowAny] FILTER_IEXACT = 0 FILTER_ICONTAINS = 1 FILTER_ISTARTSWITH = 2 FILTER_IENDSWITH = 3
[docs] @staticmethod def build_name_and_description_query(keywords): return reduce(lambda a, b: a & b, map(lambda keyword: Q(name__icontains=keyword) | Q(short_description__icontains=keyword), keywords))
[docs] def post(self, request): data = request.data query = None filters = None display_settings = None if 'query' in data: if not(isinstance(data['query'], six.string_types)) or \ (len(data['query']) == 0): return BadRequestResponse('Invalid query data') query = data['query'] else: if not(isinstance(data['filters'], list)) or (len(data['filters']) == 0): return BadRequestResponse('Invalid filter data') filters = data['filters'] if 'settings' in data: display_settings = data['settings'] # Process the query scope_database = None scope_type = None scope_toolchain = None scope_algorithm = None scope_analyzer = None keywords = [] if filters is None: for keyword in map(lambda x: x.strip(), query.split(' ')): offset = keyword.find(':') if offset != -1: command = keyword[:offset] keyword = keyword[offset+1:] if command in ['db', 'database']: scope_database = keyword.split(',') elif command in ['tc', 'toolchain']: scope_toolchain = keyword.split(',') elif command in ['algo', 'algorithm']: scope_algorithm = keyword.split(',') elif command == 'analyzer': scope_analyzer = keyword.split(',') elif command == 'type': if keyword in ['results', 'toolchains', 'algorithms', 'analyzers', 'dataformats', 'databases', 'users']: scope_type = keyword else: keywords.append(keyword) if (scope_type is None) or (scope_type == 'results'): filters = [] if scope_toolchain is not None: if len(scope_toolchain) > 1: filters.append({ 'context': 'toolchain', 'name': None, 'operator': 'contains-any-of', 'value': scope_toolchain, }) elif len(scope_toolchain) == 1: filters.append({ 'context': 'toolchain', 'name': None, 'operator': 'contains', 'value': scope_toolchain[0], }) if scope_algorithm is not None: if len(scope_algorithm) > 1: filters.append({ 'context': 'algorithm', 'name': None, 'operator': 'contains-any-of', 'value': scope_algorithm, }) elif len(scope_algorithm) == 1: filters.append({ 'context': 'algorithm', 'name': None, 'operator': 'contains', 'value': scope_algorithm[0], }) if scope_analyzer is not None: if len(scope_analyzer) > 1: filters.append({ 'context': 'analyzer', 'name': None, 'operator': 'contains-any-of', 'value': scope_analyzer, }) elif len(scope_analyzer) == 1: filters.append({ 'context': 'analyzer', 'name': None, 'operator': 'contains', 'value': scope_analyzer[0], }) if scope_database is not None: if len(scope_database) > 1: filters.append({ 'context': 'database-name', 'name': None, 'operator': 'contains-any-of', 'value': scope_database, }) elif len(scope_database) == 1: filters.append({ 'context': 'database-name', 'name': None, 'operator': 'contains', 'value': scope_database[0], }) if len(keywords) > 0: filters.append({ 'context': 'any-field', 'name': None, 'operator': 'contains-any-of', 'value': keywords, }) else: scope_type = 'results' result = { 'users': [], 'toolchains': [], 'algorithms': [], 'analyzers': [], 'dataformats': [], 'databases': [], 'results': [], 'filters': filters, 'settings': display_settings, 'query': { 'type': scope_type, }, } # Search for users matching the query if (scope_database is None) and (scope_toolchain is None) and \ (scope_algorithm is None) and (scope_analyzer is None) and \ ((scope_type is None) or (scope_type == 'users')): result['users'] = [] if len(keywords) > 0: q = reduce(lambda a, b: a & b, map(lambda keyword: Q(username__icontains=keyword), keywords)) users = User.objects.filter(q).exclude(username__in=settings.ACCOUNTS_TO_EXCLUDE_FROM_SEARCH).order_by('username') result['users'] = map(lambda u: { 'username': u.username, 'gravatar_hash': gravatar_hash(u.email), 'join_date': u.date_joined.strftime('%b %d, %Y') }, users) query = None if len(keywords) > 0: query = self.build_name_and_description_query(keywords) # Search for toolchains matching the query if (scope_database is None) and (scope_algorithm is None) and \ (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'toolchains')): result['toolchains'] = self._retrieve_contributions( Toolchain.objects.for_user(request.user, True), scope_toolchain, query ) # Search for algorithms matching the query if (scope_database is None) and (scope_toolchain is None) and \ (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'algorithms')): result['algorithms'] = self._retrieve_contributions( Algorithm.objects.for_user(request.user, True).filter(result_dataformat__isnull=True), scope_algorithm, query ) # Search for analyzers matching the query if (scope_database is None) and (scope_toolchain is None) and \ (scope_algorithm is None) and ((scope_type is None) or (scope_type == 'analyzers')): result['analyzers'] = self._retrieve_contributions( Algorithm.objects.for_user(request.user, True).filter(result_dataformat__isnull=False), scope_analyzer, query ) # Search for data formats matching the query if (scope_database is None) and (scope_toolchain is None) and \ (scope_algorithm is None) and (scope_analyzer is None) and \ ((scope_type is None) or (scope_type == 'dataformats')): dataformats = DataFormat.objects.for_user(request.user, True) if query: dataformats = dataformats.filter(query) serializer = SearchResultSerializer(dataformats, many=True) result['dataformats'] = serializer.data # Search for databases matching the query if (scope_toolchain is None) and (scope_algorithm is None) and \ (scope_analyzer is None) and ((scope_type is None) or (scope_type == 'databases')): result['databases'] = self._retrieve_databases(Database.objects.for_user(request.user, True), scope_database, query) # Search for experiments matching the query if ((scope_type is None) or (scope_type == 'results')): result['results'] = self._retrieve_experiment_results(request.user, filters) # Sort the results result['toolchains'].sort(lambda x, y: cmp(x['name'], y['name'])) result['algorithms'].sort(lambda x, y: cmp(x['name'], y['name'])) result['analyzers'].sort(lambda x, y: cmp(x['name'], y['name'])) result['dataformats'].sort(lambda x, y: cmp(x['name'], y['name'])) result['databases'].sort(lambda x, y: cmp(x['name'], y['name'])) return Response(result)
def _retrieve_contributions(self, queryset, scope, query): generator = FilterGenerator() scope_filters = [] if scope is not None: for contribution_name in scope: scope_filters.append(generator.process_contribution_name(contribution_name)) if len(scope_filters): queryset = queryset.filter(OR(scope_filters)) if query: queryset = queryset.filter(query) serializer = SearchResultSerializer(queryset, many=True) return serializer.data def _retrieve_databases(self, queryset, scope, query): generator = FilterGenerator() scope_filters = [] if scope is not None: for dataset_name in scope: scope_filters.append(generator.process_dataset_name(dataset_name)) if len(scope_filters): queryset = queryset.filter(OR(scope_filters)) if query: queryset = queryset.filter(query) queryset= queryset.distinct() serializer = SearchResultSerializer(queryset, many=True, name_field='name') return serializer.data def _retrieve_experiment_results(self, user, filters): results = { 'experiments': [], 'dataformats': {}, 'common_analyzers': [], 'common_protocols': [], } if len(filters) == 0: return results # Use the experiment filters experiments = Experiment.objects.for_user(user, True).filter(status=Experiment.DONE) for filter_entry in filters: experiments = apply_filter(experiments, filter_entry) experiments = experiments.distinct() if experiments.count() == 0: return results # Retrieve informations about each experiment and determine if there is at least # one common analyzer common_protocols = None common_analyzers = None for experiment in experiments.iterator(): experiment_entry = { 'name': experiment.fullname(), 'toolchain': experiment.toolchain.fullname(), 'description': experiment.short_description, 'public': (experiment.sharing == Shareable.PUBLIC), 'attestation_number': None, 'attestation_locked': False, 'end_date': experiment.end_date, 'protocols': list(set(map(lambda x: x.protocol.fullname(), experiment.referenced_datasets.iterator()))), 'analyzers': [], } if experiment.has_attestation(): experiment_entry['attestation_number'] = experiment.attestation.number experiment_entry['attestation_locked'] = experiment.attestation.locked experiment_analyzers = [] for analyzer_block in experiment.blocks.filter(analyzer=True).iterator(): analyzer_entry = { 'name': analyzer_block.algorithm.fullname(), 'block': analyzer_block.name, 'results': {}, } experiment_entry['analyzers'].append(analyzer_entry) experiment_analyzers.append(analyzer_entry['name']) if analyzer_entry['name'] not in results['dataformats']: results['dataformats'][analyzer_entry['name']] = json.loads(analyzer_block.algorithm.result_dataformat) if common_analyzers is None: common_analyzers = experiment_analyzers elif len(common_analyzers) > 0: common_analyzers = filter(lambda x: x in experiment_analyzers, common_analyzers) if common_protocols is None: common_protocols = experiment_entry['protocols'] elif len(common_protocols) > 0: common_protocols = filter(lambda x: x in experiment_entry['protocols'], common_protocols) results['experiments'].append(experiment_entry) results['common_analyzers'] = common_analyzers results['common_protocols'] = common_protocols # No common analyzer found, don't retrieve any result if len(common_analyzers) == 0: results['dataformats'] = {} return results # Retrieve the results of each experiment for index, experiment in enumerate(experiments.iterator()): for analyzer_block in experiment.blocks.filter(analyzer=True).iterator(): analyzer_entry = filter(lambda x: x['block'] == analyzer_block.name, results['experiments'][index]['analyzers'])[0] for analyzer_result in analyzer_block.results.iterator(): analyzer_entry['results'][analyzer_result.name] = { 'type': analyzer_result.type, 'primary': analyzer_result.primary, 'value': analyzer_result.value() } return results
#------------------------------------------------
[docs]class SearchSaveView(CommonContextMixin, SerializerFieldsMixin, generics.CreateAPIView, generics.UpdateAPIView): """ This endpoint allows to save and update a search query Saving a search allows to re-run the same query later without having to redo the query/filtering which might get complex. Note that two consecutive runs of a search might yield different results """ model = Search permission_classes = [permissions.IsAuthenticated] serializer_class = SearchWriteSerializer
[docs] def build_results(self, request, search): result = {} fields_to_return = self.get_serializer_fields(request) # Retrieve the description in HTML format if 'html_description' in fields_to_return: description = search.description if len(description) > 0: result['html_description'] = ensure_html(description) else: result['html_description'] = '' return result
[docs] def post(self, request): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) search = serializer.save() result = self.build_results(request, search) result['fullname'] = search.fullname() result['url'] = search.get_absolute_url() return Response(result, status=status.HTTP_201_CREATED)
[docs] def put(self, request, author_name, name): search = get_object_or_404(Search, author__username=author_name, name=name) serializer = self.get_serializer(instance=search, data=request.data, partial=True) serializer.is_valid(raise_exception=True) serializer.save() result = self.build_results(request, search) return Response(result)
#------------------------------------------------
[docs]class ListSearchView(CommonContextMixin, generics.ListAPIView): """ Lists all available search from a user """ permission_classes = [permissions.AllowAny] serializer_class = SearchSerializer
[docs] def get_queryset(self): author_name = self.kwargs['author_name'] return Search.objects.for_user(self.request.user, True).select_related().filter(author__username=author_name)
#----------------------------------------------------------
[docs]class RetrieveDestroySearchAPIView(CommonContextMixin, SerializerFieldsMixin, IsAuthorOrReadOnlyMixin, generics.RetrieveDestroyAPIView): """ Delete the given search """ model = Search serializer_class = SearchSerializer
[docs] def get_object(self): author_name = self.kwargs.get('author_name') name = self.kwargs.get('object_name') user = self.request.user return get_object_or_404(self.model.objects.for_user(user, True), author__username=author_name, name=name)
[docs] def get(self, request, *args, **kwargs): search = self.get_object() # Process the query string allow_sharing = request.user == search.author fields_to_return = self.get_serializer_fields(request, allow_sharing=allow_sharing) serializer = self.get_serializer(search, fields=fields_to_return) return Response(serializer.data)
#------------------------------------------------
[docs]class ShareSearchView(ShareView): """ Share the given search with other users/teams """ model = Search permission_classes = [permissions.AllowAny]
[docs] def get_queryset(self): self.kwargs['version'] = 1 return super(ShareSearchView, self).get_queryset()