Source code for bob.learn.pytorch.datasets.utils

#!/usr/bin/env python
# encoding: utf-8

import numpy

import torchvision.transforms as transforms


[docs]class FaceCropper: """ Class to crop a face, based on eyes position """ def __init__(self, cropped_height, cropped_width, color_channel="rgb"): # the face cropper from bob.bio.face_legacy.preprocessor import FaceCrop cropped_image_size = (cropped_height, cropped_width) right_eye_pos = (cropped_height // 5, cropped_width // 4 - 1) left_eye_pos = (cropped_height // 5, cropped_width // 4 * 3) cropped_positions = {"leye": left_eye_pos, "reye": right_eye_pos} self.color_channel = color_channel self.face_cropper = FaceCrop( cropped_image_size=cropped_image_size, cropped_positions=cropped_positions, color_channel=color_channel, dtype="uint8", ) def __call__(self, sample): cropped = self.face_cropper(sample["image"], sample["eyes"]) sample["image"] = cropped if self.color_channel == "gray": sample["image"] = sample["image"][..., numpy.newaxis] return sample
[docs]class FaceCropAlign: """ Wrapper to the FaceCropAlign of bob.pad.face preprocessor """ def __init__( self, face_size, rgb_output_flag=False, use_face_alignment=True, alignment_type="lightcnn", face_detection_method="mtcnn", ): """ Init function Parameters ---------- face_size: :obj:`int` The size of the cropped face (square) rgb_output_flag: :py:class:`bool` Return RGB cropped face if True, grayscale otherwise use_face_alignment: :py:class:`bool` If set to True, the face will be aligned, using the facial landmarks detected locally Works only when ``face_detection_method is not None``. alignment_type: :py:class:`str` Specifies the alignment type to use if ``use_face_alignment`` is set to ``True``. Two methods are currently implemented: ``default`` which would do alignment by making eyes horizontally ``lightcnn`` which aligns the face such that eye center and mouth centers are aligned to predefined positions. This option overrides the face size option as the output required is always 128x128. This is suitable for use with LightCNN model. face_detection_method: :py:class:`str` A package to be used for face detection and landmark detection. Options supported by this class: "dlib" and "mtcnn" """ from bob.pad.face.preprocessor import FaceCropAlign self.face_cropper = FaceCropAlign( face_size, rgb_output_flag, use_face_alignment, alignment_type=alignment_type, face_detection_method=face_detection_method, ) def __call__(self, sample): cropped = self.face_cropper(sample["image"]) if cropped is None: print("Face not detected ...") cropped = numpy.zeros((128, 128)) sample["image"] = cropped[..., numpy.newaxis] return sample
[docs]class RollChannels(object): """ Class to transform a bob image into skimage. i.e. CxHxW to HxWxC """ def __call__(self, sample): temp = numpy.rollaxis(numpy.rollaxis(sample["image"], 2), 2) sample["image"] = temp return sample
[docs]class ToTensor(object): def __init__(self): self.op = transforms.ToTensor() def __call__(self, sample): if len(sample["image"].shape) == 2: sample["image"] = sample["image"][..., numpy.newaxis] sample["image"] = self.op(sample["image"]) return sample
[docs]class Normalize(object): def __init__(self, mean, std): self.op = transforms.Normalize(mean, std) def __call__(self, sample): sample["image"] = self.op(sample["image"]) return sample
[docs]class Resize(object): def __init__(self, size): self.op = transforms.Resize(size) def __call__(self, sample): # convert to PIL image from PIL.Image import fromarray img = fromarray(sample["image"].squeeze()) img = self.op(img) sample["image"] = numpy.array(img) sample["image"] = sample["image"][..., numpy.newaxis] return sample
[docs]class ToGray(object): def __init__(self): self.op = transforms.Grayscale() def __call__(self, sample): # convert to PIL image from PIL.Image import fromarray img = fromarray(sample["image"].squeeze()) img = self.op(img) sample["image"] = numpy.array(img) sample["image"] = sample["image"][..., numpy.newaxis] return sample
[docs]def map_labels(raw_labels, start_index=0): """ Map the ID label to [0 - # of IDs] Parameters ---------- raw_labels: list of :obj:`int` The labels of the samples """ possible_labels = sorted(list(set(raw_labels))) labels = numpy.array(raw_labels) for i in range(len(possible_labels)): l = possible_labels[i] labels[numpy.where(labels == l)[0]] = i + start_index return labels
from torch.utils.data import Dataset import bob.io.base import bob.io.image
[docs]class ConcatDataset(Dataset): """ Class to concatenate two or more datasets for DR-GAN training **Parameters** datasets: list The list of datasets (as torch.utils.data.Dataset) """ def __init__(self, datasets): self.transform = datasets[0].transform self.data_files = sum((d.data_files for d in datasets), []) self.pose_labels = sum((d.pose_labels for d in datasets), []) self.id_labels = sum((d.id_labels for d in datasets), []) def __len__(self): """ return the length of the dataset (i.e. nb of examples) """ return len(self.data_files) def __getitem__(self, idx): """ return a sample from the dataset """ image = bob.io.base.load(self.data_files[idx]) identity = self.id_labels[idx] pose = self.pose_labels[idx] sample = {"image": image, "id": identity, "pose": pose} if self.transform: sample = self.transform(sample) return sample
[docs]class ChannelSelect(object): """Subselects or re-orders channels in a multi-channel image. Expects a numpy.ndarray as input with size `HxWxnum_channels` and returns an image with size `HxWxlen(selected_channels)`, where the last dimension is subselected using the indexes in the list `selected_channels`. Attributes ---------- selected_channels: list The indexes of the channels to be selected. img: numpy.ndarray A multi channel image, HxWxnum_channels """ def __init__(self, selected_channels=[0, 1, 2, 3]): """ Parameters ---------- selected_channels: list The indexes of the channels to be selected. """ self.selected_channels = selected_channels def __call__(self, img): """ Parameters ---------- img: numpy.ndarray A multi channel image, HxWxnum_channels """ return img[:, :, self.selected_channels] def __repr__(self): return ( self.__class__.__name__ + "(selected_channels={}, output channels ={})".format( self.selected_channels, len(self.selected_channels) ) )
[docs]class RandomHorizontalFlipImage(object): """Flips the image horizontally, works on numpy arrays. Attributes ---------- p: float Probability of image returned being flipped . """ def __init__(self, p=0.5): """ Parameters ---------- p: float Probability of image returned being flipped . """ self.p = p def __call__(self, img): """ Parameters ---------- img: numpy.ndarray A multi channel image, HxWxnum_channels """ if numpy.random.random() < self.p: imgn = numpy.fliplr(img).copy() else: imgn = img.copy() return imgn def __repr__(self): return self.__class__.__name__ + "(Flipping Probability={})".format(self.p)