Source code for bob.learn.pytorch.datasets.casia_webface

#!/usr/bin/env python
# encoding: utf-8

import os
import numpy

from torch.utils.data import Dataset, DataLoader

import bob.io.base
import bob.io.image

from .utils import map_labels


[docs]class CasiaWebFaceDataset(Dataset):
    """Class representing the CASIA WebFace dataset

  Note that here the only label is identity

  Attributes
  ----------
  root_dir : str
    The path to the data
  transform : `torchvision.transforms`
    The transform(s) to apply to the face images
  data_files : list of :obj:`str`
    The list of data files
  id_labels : list of :obj:`int`
    The list of identities, for each data file

  """

    def __init__(self, root_dir, transform=None, start_index=0):
        """Init function

    Parameters
    ----------
    root_dir : str
      The path to the data
    transform : :py:class:`torchvision.transforms`
      The transform(s) to apply to the face images
    start_index : int
      label of the first identity (useful if you use
      several databases)
    
    """
        self.root_dir = root_dir
        self.transform = transform
        self.data_files = []
        id_labels = []

        for root, dirs, files in os.walk(self.root_dir):
            for name in files:
                filename = os.path.split(os.path.join(root, name))[-1]
                path = root.split(os.sep)
                subject = int(path[-1])
                self.data_files.append(os.path.join(root, name))
                id_labels.append(subject)

        self.id_labels = map_labels(id_labels, start_index)

    def __len__(self):
        """Returns the length of the dataset (i.e. nb of examples)
    
    Returns
    -------
    int 
      the number of examples in the dataset
    
    """
        return len(self.data_files)

    def __getitem__(self, idx):
        """Returns a sample from the dataset
   
    Returns
    -------
    dict
      an example of the dataset, containing the 
      transformed face image and its identity
    
    """
        image = bob.io.base.load(self.data_files[idx])
        identity = self.id_labels[idx]
        sample = {"image": image, "label": identity}

        if self.transform:
            sample = self.transform(sample)

        return sample


[docs]class CasiaDataset(Dataset):
    """Class representing the CASIA WebFace dataset

  Note that in this class, two labels are provided
  with each image: identity and pose.

  Pose labels have been automatically inferred using
  the ROC face recognirion SDK from RankOne.

  There are 13 pose labels, corresponding to cluster
  of 15 degrees, ranging from -90 degress (left profile)
  to 90 degrees (right profile)

  Attributes
  ----------
  root_dir: str
    The path to the data
  transform : `torchvision.transforms`
    The transform(s) to apply to the face images
  data_files: list of :obj:`str`
    The list of data files
  id_labels : list of :obj:`int`
    The list of identities, for each data file
  pose_labels : list of :obj:`int`
    The list containing the pose labels 

  """

    def __init__(self, root_dir, transform=None, start_index=0):
        """Init function

    Parameters
    ----------
    root_dir: str
      The path to the data
    transform: :py:class:`torchvision.transforms`
      The transform(s) to apply to the face images
    start_index : int
      label of the first identity (useful if you use
      several databases)
    
    """
        self.root_dir = root_dir
        self.transform = transform

        dir_to_pose_label = {
            "l90": "0",
            "l75": "1",
            "l60": "2",
            "l45": "3",
            "l30": "4",
            "l15": "5",
            "0": "6",
            "r15": "7",
            "r30": "8",
            "r45": "9",
            "r60": "10",
            "r75": "11",
            "r90": "12",
        }

        # get all the needed file, the pose labels, and the id labels
        self.data_files = []
        self.pose_labels = []
        id_labels = []

        for root, dirs, files in os.walk(self.root_dir):
            for name in files:
                filename = os.path.split(os.path.join(root, name))[-1]
                path = root.split(os.sep)
                subject = int(path[-1])
                cluster = path[-2]
                self.data_files.append(os.path.join(root, name))
                self.pose_labels.append(int(dir_to_pose_label[cluster]))
                id_labels.append(subject)

        self.id_labels = map_labels(id_labels, start_index)

    def __len__(self):
        """Returns the length of the dataset (i.e. nb of examples)
    
    Returns
    -------
    int 
      the number of examples in the dataset
    
    """
        return len(self.data_files)

    def __getitem__(self, idx):
        """Returns a sample from the dataset
   
    Returns
    -------
    dict
      an example of the dataset, containing the 
      transformed face image, its identity and pose information
    
    """
        image = bob.io.base.load(self.data_files[idx])
        identity = self.id_labels[idx]
        pose = self.pose_labels[idx]
        sample = {"image": image, "label": identity, "pose": pose}

        if self.transform:
            sample = self.transform(sample)

        return sample