Source code for bob.bio.face.database.gbu

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# Sat 20 Aug 15:43:10 CEST 2016

import os
import xml.sax

from functools import partial

import bob.io.base

from bob.bio.base.pipelines.abstract_classes import Database
from bob.extension import rc
from bob.extension.download import get_file, search_file
from bob.pipelines import DelayedSample, SampleSet

"""
GBU Database

Several of the rules used in this code were imported from
https://gitlab.idiap.ch/bob/bob.db.gbu/-/blob/master/bob/db/gbu/create.py
"""


def load_annotations(annotations_file):
    annotations = dict()
    for i, line in enumerate(annotations_file.readlines()):
        # Skip the first line
        if i == 0:
            continue
        line = line.split(",")
        path = os.path.splitext(os.path.basename(line[0]))[0]
        annotations[path] = {
            "leye": (float(line[-1]), float(line[-2])),
            "reye": (float(line[2]), float(line[1])),
        }
    return annotations


class File(object):
    def __init__(self, subject_id, reference_id, path):
        self.subject_id = subject_id
        self.reference_id = reference_id
        self.path = path


class XmlFileReader(xml.sax.handler.ContentHandler):
    def __init__(self):
        self.m_signature = None
        self.m_path = None
        self.m_presentation = None
        self.m_file_list = dict()

    def startDocument(self):
        pass

    def endDocument(self):
        pass

    def startElement(self, name, attrs):
        if name == "biometric-signature":
            self.m_signature = attrs["name"]  # subject_id
        elif name == "presentation":
            self.m_path = os.path.splitext(attrs["file-name"])[0]  # path
            self.m_presentation = attrs["name"]  # reference_id
        else:
            pass

    def endElement(self, name):
        if name == "biometric-signature":
            # assert that everything was read correctly
            assert (
                self.m_signature is not None
                and self.m_path is not None
                and self.m_presentation is not None
            )
            # add a file to the sessions
            self.m_file_list[self.m_presentation] = File(
                subject_id_from_signature(self.m_signature),
                self.m_presentation,
                self.m_path,
            )

            self.m_presentation = self.m_signature = self.m_path = None
        else:
            pass


def subject_id_from_signature(signature):
    return int(signature[4:])


def read_list(xml_file, eye_file=None):
    """Reads the xml list and attaches the eye files, if given"""
    # create xml reading instance
    handler = XmlFileReader()
    xml.sax.parse(xml_file, handler)
    return handler.m_file_list


class GBUDatabase(Database):
    """
    The GBU (Good, Bad and Ugly) database consists of parts of the MBGC-V1 image set.
    It defines three protocols, i.e., `Good`, `Bad` and `Ugly` for which different model and probe images are used.


    .. warning::

      To use this dataset protocol, you need to have the original files of the IJBC datasets.
      Once you have it downloaded, please run the following command to set the path for Bob

        .. code-block:: sh

            bob config set bob.bio.face.gbu.directory [GBU PATH]


    The code below allows you to fetch the galery and probes of the "Good" protocol.

    .. code-block:: python

        >>> from bob.bio.face.database import GBUDatabase
        >>> gbu = GBUDatabase(protocol="Good")
        >>>
        >>> # Fetching the gallery
        >>> references = gbu.references()
        >>> # Fetching the probes
        >>> probes = gbu.probes()


    """

    def __init__(
        self,
        protocol,
        annotation_type="eyes-center",
        fixed_positions=None,
        original_directory=rc.get("bob.bio.face.gbu.directory"),
        extension=".jpg",
    ):

        # self.filename = "/idiap/user/tpereira/gitlab/bob/bob.nightlies/temp/gbu.tar.gz"
        # Downloading model if not exists
        urls = GBUDatabase.urls()
        self.filename = get_file(
            "gbu-xmls.tar.gz",
            urls,
            file_hash="827de43434ee84020c6a949ece5e4a4d",
        )

        self.references_dict = {}
        self.probes_dict = {}

        self.annotations = None
        self.original_directory = original_directory
        self.extension = extension

        self.background_samples = None
        self._background_files = [
            "GBU_Training_Uncontrolledx1.xml",
            "GBU_Training_Uncontrolledx2.xml",
            "GBU_Training_Uncontrolledx4.xml",
            "GBU_Training_Uncontrolledx8.xml",
        ]

        super().__init__(
            name="gbu",
            protocol=protocol,
            score_all_vs_all=True,
            annotation_type="eyes-center",
            fixed_positions=fixed_positions,
            memory_demanding=True,
        )

[docs]    @staticmethod
    def protocols():
        return ["Good", "Bad", "Ugly"]

[docs]    @staticmethod
    def urls():
        return [
            "https://www.idiap.ch/software/bob/databases/latest/gbu-xmls.tar.gz",
            "http://www.idiap.ch/software/bob/databases/latest/gbu-xmls.tar.gz",
        ]

[docs]    def background_model_samples(self):
        if self.background_samples is None:
            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv")
                )
            # for
            self.background_samples = []

            for b_files in self._background_files:

                f = search_file(self.filename, f"{b_files}")

                self.background_samples += self._make_sampleset_from_filedict(
                    read_list(f)
                )
        return self.background_samples

[docs]    def probes(self, group="dev"):
        if self.protocol not in self.probes_dict:
            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv")
                )

            f = search_file(self.filename, f"GBU_{self.protocol}_Query.xml")
            reference_ids = [x.reference_id for x in self.references()]

            self.probes_dict[
                self.protocol
            ] = self._make_sampleset_from_filedict(read_list(f), reference_ids)
        return self.probes_dict[self.protocol]

[docs]    def references(self, group="dev"):

        if self.protocol not in self.references_dict:

            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv")
                )

            f = search_file(self.filename, f"GBU_{self.protocol}_Target.xml")
            self.references_dict[
                self.protocol
            ] = self._make_sampleset_from_filedict(
                read_list(f),
            )

        return self.references_dict[self.protocol]

[docs]    def groups(self):
        return ["dev"]

[docs]    def all_samples(self, group="dev"):
        self._check_group(group)

        return self.references() + self.probes()

    def _check_protocol(self, protocol):
        assert (
            protocol in self.protocols()
        ), "Unvalid protocol `{}` not in {}".format(protocol, self.protocols())

    def _check_group(self, group):
        assert group in self.groups(), "Unvalid group `{}` not in {}".format(
            group, self.groups()
        )

    def _make_sampleset_from_filedict(self, file_dict, reference_ids=None):
        samplesets = []
        for key in file_dict:
            f = file_dict[key]

            annotations_key = os.path.basename(f.path)

            kwargs = (
                {"references": reference_ids}
                if reference_ids is not None
                else {}
            )

            samplesets.append(
                SampleSet(
                    key=f.path,
                    reference_id=f.reference_id,
                    subject_id=f.subject_id,
                    **kwargs,
                    samples=[
                        DelayedSample(
                            key=f.path,
                            annotations=self.annotations[annotations_key],
                            load=partial(
                                bob.io.base.load,
                                os.path.join(
                                    self.original_directory,
                                    f.path + self.extension,
                                ),
                            ),
                        )
                    ],
                )
            )
        return samplesets