Source code for bob.bio.base.pipelines.vanilla_biometrics.legacy

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""Re-usable blocks for legacy bob.bio.base algorithms"""

import functools
import logging
import os

from bob.bio.base.algorithm import Algorithm
from bob.pipelines import DelayedSample
from bob.pipelines import DelayedSampleSet
from bob.pipelines import SampleSet
from bob.db.base.utils import (
    check_parameters_for_validity,
    convert_names_to_highlevel,
    convert_names_to_lowlevel,
)
import pickle
from .abstract_classes import BioAlgorithm
from .abstract_classes import Database
import tempfile
from . import pickle_compress, uncompress_unpickle
import numpy as np

logger = logging.getLogger("bob.bio.base")


def get_temp_directory(sub_dir):
    """
    Get Temporary directory for legacy algorithms.
    Most of the legacy algorithms are not pickled serializable, therefore,
    we can't wrap their transformations as :any:`bob.pipelines.Sample`.
    Hence, we need to make them automatically checkpointable.
    This function returns possible temporary directories to store such checkpoints.
    The possible temporary directorys are (in order of priority):
       1. `/.../temp/[user]/bob.bio.base.legacy_cache` if you are at Idiap and has acess to temp.
       2. `~/temp/` in case the algorithm runs in the CI
       3. `/tmp/bob.bio.base.legacy_cache` in case your are not at Idiap

    Parameters
    ----------

    sub_dir: str
        Sub-directory to checkpoint your data

    """

    default_temp = (
        os.path.join("/idiap", "temp", os.environ["USER"])
        if "USER" in os.environ
        else "~/temp"
    )
    if os.path.exists(default_temp):
        return os.path.join(default_temp, "bob.bio.base.legacy_cache", sub_dir)
    else:
        # if /idiap/temp/<USER> does not exist, use /tmp/tmpxxxxxxxx
        return os.path.join(tempfile.TemporaryDirectory().name, sub_dir)


def _biofile_to_delayed_sample(biofile, database, purpose="probe"):
    return DelayedSample(
        load=functools.partial(
            biofile.load, database.original_directory, database.original_extension,
        ),
        reference_id=str(biofile.client_id),
        key=f"{biofile.path}{purpose}",
        path=f"{biofile.path}{purpose}",
        delayed_attributes=dict(
            annotations=functools.partial(database.annotations, biofile)
        ),
    )


class DatabaseConnector(Database):
    """Wraps a legacy bob.bio.base database and generates conforming samples

    This connector allows wrapping generic bob.bio.base datasets and generate
    samples that conform to the specifications of biometric pipelines defined
    in this package.


    Parameters
    ----------

    database : object
        An instantiated version of a bob.bio.base.Database object

    protocol : str
        The name of the protocol to generate samples from.
        To be plugged at `bob.db.base.Database.objects`.

    allow_scoring_with_all_biometric_references: bool
        If True will allow the scoring function to be performed in one shot with multiple probes.
        This optimization is useful when all probes needs to be compared with all biometric references AND
        your scoring function allows this broadcast computation.

    annotation_type: str
        Type of the annotations that the database provide.
        Allowed types are: `eyes-center` and `bounding-box`

    fixed_positions: dict
        In case database contains one single annotation for all samples.
        This is useful for registered databases.

    memory_demanding: bool
        Sinalizes that a database has some memory demanding components.
        It might be useful for future processing

    append_purpose: bool
        If True, `sample.key` will be appended with the purpose of the sample (world, probe, or bio-ref).

    """

    def __init__(
        self,
        database,
        allow_scoring_with_all_biometric_references=True,
        annotation_type="eyes-center",
        fixed_positions=None,
        memory_demanding=False,
        append_purpose=False,
        **kwargs,
    ):
        super().__init__(
            name=database.name,
            protocol=database.protocol,
            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
            memory_demanding=memory_demanding,
            **kwargs,
        )
        self.append_purpose = append_purpose
        self.database = database

[docs]    def background_model_samples(self):
        """Returns :any:`bob.pipelines.Sample`'s to train a background model (group
        ``world``).

        Returns
        -------
        samples : list
            List of samples conforming the pipeline API for background
            model training.
        """
        objects = self.database.training_files()
        return [
            _biofile_to_delayed_sample(
                k, self.database, purpose="world" if self.append_purpose else ""
            )
            for k in objects
        ]

[docs]    def references(self, group="dev"):
        """Returns references to enroll biometric references


        Parameters
        ----------
        group : :py:class:`str`, optional
            A ``group`` to be plugged at ``database.objects``


        Returns
        -------
        references : list
            List of samples conforming the pipeline API for the creation of
            biometric references.  See, e.g., :py:func:`.pipelines.first`.

        """

        retval = []
        for m in self.database.model_ids(groups=group):

            objects = self.database.enroll_files(group=group, model_id=m)

            retval.append(
                SampleSet(
                    [
                        _biofile_to_delayed_sample(
                            k,
                            self.database,
                            purpose="bio-ref" if self.append_purpose else "",
                        )
                        for k in objects
                    ],
                    key=str(m),
                    path=str(m),
                    reference_id=(str(m)),
                    subject_id=str(self.database.client_id_from_model_id(m)),
                )
            )

        return retval

[docs]    def probes(self, group="dev"):
        """Returns probes to score biometric references


        Parameters
        ----------
        group : str
            A ``group`` to be plugged at ``database.objects``


        Returns
        -------
        probes : list
            List of samples conforming the pipeline API for the creation of
            biometric probes.
        """

        probes = dict()
        for m in self.database.model_ids(groups=group):

            # Getting all the probe objects from a particular biometric
            # reference
            objects = self.database.probe_files(group=group, model_id=m)
            # Creating probe samples
            for o in objects:
                if o.id not in probes:
                    probes[o.id] = SampleSet(
                        [
                            _biofile_to_delayed_sample(
                                o,
                                self.database,
                                purpose="probe" if self.append_purpose else "",
                            )
                        ],
                        key=str(o.path),
                        path=o.path,
                        reference_id=str(m),
                        references=[str(m)],
                        subject_id=o.client_id,
                    )
                else:
                    probes[o.id].references.append(str(str(m)))
        return list(probes.values())

[docs]    def all_samples(self, groups=None):
        """Returns all the legacy database files in Sample format

        Parameters
        ----------
        groups: list or `None`
            List of groups to consider ('train', 'dev', and/or 'eval').
            If `None` is given, returns samples from all the groups.

        Returns
        -------
        samples: list
            List of all the samples of a database in :class:`bob.pipelines.Sample`
            objects.
        """
        valid_groups = convert_names_to_highlevel(
            self.database.groups(),
            low_level_names=["world", "dev", "eval"],
            high_level_names=["train", "dev", "eval"],
        )
        groups = check_parameters_for_validity(
            parameters=groups,
            parameter_description="groups",
            valid_parameters=valid_groups,
            default_parameters=valid_groups,
        )
        logger.debug(f"Fetching all samples of groups '{groups}'.")
        low_level_groups = convert_names_to_lowlevel(
            names=groups,
            low_level_names=["world", "dev", "eval"],
            high_level_names=["train", "dev", "eval"],
        )
        objects = self.database.all_files(groups=low_level_groups)
        return [
            _biofile_to_delayed_sample(
                k, self.database, "all" if self.append_purpose else ""
            )
            for k in objects
        ]

[docs]    def groups(self):
        grps = self.database.groups()
        grps = convert_names_to_highlevel(
            names=grps,
            low_level_names=["world", "dev", "eval"],
            high_level_names=["train", "dev", "eval"],
        )
        return grps

[docs]    def protocols(self):
        return self.database.protocols()


class BioAlgorithmLegacy(BioAlgorithm):
    """Biometric Algorithm that handles :py:class:`bob.bio.base.algorithm.Algorithm`

    In this design, :any:`BioAlgorithm.enroll` maps to :py:meth:`bob.bio.base.algorithm.Algorithm.enroll`
    and :any:`BioAlgorithm.score` maps to :py:meth:`bob.bio.base.algorithm.Algorithm.score`

    .. note::
        Legacy algorithms are always checkpointable


    Parameters
    ----------
      instance: object
         An instance of :py:class:`bob.bio.base.algorithm.Algorithm`


    Example
    -------
        >>> from bob.bio.base.pipelines.vanilla_biometrics import BioAlgorithmLegacy
        >>> from bob.bio.base.algorithm import PCA
        >>> biometric_algorithm = BioAlgorithmLegacy(PCA(subspace_dimension=0.99), base_dir="./", projector_file="Projector.hdf5")

    """

    def __init__(
        self, instance, base_dir, force=False, projector_file=None, **kwargs,
    ):
        super().__init__(**kwargs)

        if not isinstance(instance, Algorithm):
            raise ValueError(
                f"Only `bob.bio.base.Algorithm` supported, not `{instance}`"
            )
        logger.info(f"Using `bob.bio.base` legacy algorithm {instance}")

        if instance.requires_projector_training and projector_file is None:
            raise ValueError(f"{instance} requires a `projector_file` to be set")

        self.instance = instance
        self.is_background_model_loaded = False

        self.projector_file = projector_file
        self.biometric_reference_dir = os.path.join(base_dir, "biometric_references")
        self._biometric_reference_extension = ".hdf5"
        self.score_dir = os.path.join(base_dir, "scores")
        self.force = force
        self._base_dir = base_dir
        self._score_extension = ".pickle.gz"

    @property
    def base_dir(self):
        return self._base_dir

    @base_dir.setter
    def base_dir(self, v):
        self._base_dir = v
        self.biometric_reference_dir = os.path.join(
            self._base_dir, "biometric_references"
        )
        self.score_dir = os.path.join(self._base_dir, "scores")
        if self.projector_file is not None:
            self.projector_file = os.path.join(
                self._base_dir, os.path.basename(self.projector_file)
            )

[docs]    def load_legacy_background_model(self):
        # Loading background model
        if not self.is_background_model_loaded:
            self.instance.load_projector(self.projector_file)
            self.is_background_model_loaded = True

[docs]    def enroll(self, enroll_features, **kwargs):
        self.load_legacy_background_model()
        return self.instance.enroll(enroll_features)

[docs]    def score(self, biometric_reference, data, **kwargs):
        self.load_legacy_background_model()
        scores = self.instance.score(biometric_reference, data)
        if isinstance(scores, list):
            scores = self.instance.probe_fusion_function(scores)

        return scores

[docs]    def score_multiple_biometric_references(self, biometric_references, data, **kwargs):
        scores = self.instance.score_for_multiple_models(biometric_references, data)

        # Preparing the 3d scoring format
        # look: https://gitlab.idiap.ch/bob/bob.bio.base/-/merge_requests/264
        scores = np.expand_dims(scores, axis=1)

        return scores

[docs]    def write_biometric_reference(self, sample, path):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        self.instance.write_model(sample.data, path)

    def _enroll_sample_set(self, sampleset):
        """
        Enroll a sample set with checkpointing
        """
        # Amending `models` directory
        path = os.path.join(
            self.biometric_reference_dir,
            str(sampleset.key) + self._biometric_reference_extension,
        )

        if self.force or not os.path.exists(path):
            enrolled_sample = super()._enroll_sample_set(sampleset)

            # saving the new sample
            self.write_biometric_reference(enrolled_sample, path)

        delayed_enrolled_sample = DelayedSample(
            functools.partial(self.instance.read_model, path), parent=sampleset
        )

        return delayed_enrolled_sample

[docs]    def write_scores(self, samples, path):
        pickle_compress(path, samples)

    def _score_sample_set(
        self,
        sampleset,
        biometric_references,
        allow_scoring_with_all_biometric_references=False,
    ):
        def _load(path):
            return uncompress_unpickle(path)

        def _make_name(sampleset, biometric_references):
            # The score file name is composed by sampleset key and the
            # first 3 biometric_references
            name = str(sampleset.key)
            suffix = "_".join([str(s.key) for s in biometric_references[0:3]])
            return name + suffix

        path = os.path.join(
            self.score_dir,
            _make_name(sampleset, biometric_references) + self._score_extension,
        )

        if self.force or not os.path.exists(path):

            # Computing score
            scored_sample_set = super()._score_sample_set(
                sampleset,
                biometric_references,
                allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
            )

            self.write_scores(scored_sample_set.samples, path)

            scored_sample_set = DelayedSampleSet(
                functools.partial(_load, path), parent=scored_sample_set
            )

        else:
            scored_sample_set = SampleSet(_load(path), parent=sampleset)

        return scored_sample_set