Source code for bob.ip.binseg.data.transforms

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import torchvision.transforms.functional as VF
import random
import PIL
from PIL import Image
from torchvision.transforms.transforms import Lambda
from torchvision.transforms.transforms import Compose as TorchVisionCompose
import math
from math import floor
import warnings
import collections

_pil_interpolation_to_str = {
    Image.NEAREST: 'PIL.Image.NEAREST',
    Image.BILINEAR: 'PIL.Image.BILINEAR',
    Image.BICUBIC: 'PIL.Image.BICUBIC',
    Image.LANCZOS: 'PIL.Image.LANCZOS',
    Image.HAMMING: 'PIL.Image.HAMMING',
    Image.BOX: 'PIL.Image.BOX',
}
Iterable = collections.abc.Iterable

# Compose 

[docs]class Compose:
    """Composes several transforms.

    Attributes
    ----------
    transforms : list
        list of transforms to compose.
    """

    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, *args):
        for t in self.transforms:
            args = t(*args)
        return args

    def __repr__(self):
        format_string = self.__class__.__name__ + '('
        for t in self.transforms:
            format_string += '\n'
            format_string += '    {0}'.format(t)
        format_string += '\n)'
        return format_string

# Preprocessing

[docs]class CenterCrop:
    """
    Crop at the center.

    Attributes
    ----------
    size : int
        target size
    """
    def __init__(self, size):
        self.size = size
        
    def __call__(self, *args):
        return [VF.center_crop(img, self.size) for img in args]


[docs]class Crop:
    """
    Crop at the given coordinates.
    
    Attributes
    ----------
    i : int 
        upper pixel coordinate.
    j : int 
        left pixel coordinate.
    h : int 
        height of the cropped image.
    w : int 
        width of the cropped image.
    """
    def __init__(self, i, j, h, w):
        self.i = i
        self.j = j
        self.h = h 
        self.w = w 
    
    def __call__(self, *args):
        return [img.crop((self.j, self.i, self.j + self.w, self.i + self.h)) for img in args]

[docs]class Pad:
    """
    Constant padding

    Attributes
    ----------
    padding : int or tuple 
        padding on each border. If a single int is provided this is used to pad all borders. 
        If tuple of length 2 is provided this is the padding on left/right and top/bottom respectively.
        If a tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively.
    
    fill : int
        pixel fill value for constant fill. Default is 0. If a tuple of length 3, it is used to fill R, G, B channels respectively. 
        This value is only used when the padding_mode is constant   
    """
    def __init__(self, padding, fill=0):
        self.padding = padding
        self.fill = fill
        
    def __call__(self, *args):
        return [VF.pad(img, self.padding, self.fill, padding_mode='constant') for img in args]
    
[docs]class ToTensor:
    """Converts :py:class:`PIL.Image.Image` to :py:class:`torch.Tensor` """
    def __call__(self, *args):
        return [VF.to_tensor(img) for img in args]

        
# Augmentations

[docs]class RandomHFlip:
    """
    Flips horizontally
    
    Attributes
    ----------
    prob : float
        probability at which imgage is flipped. Defaults to ``0.5``
    """
    def __init__(self, prob = 0.5):
        self.prob = prob
        
    def __call__(self, *args):
        if random.random() < self.prob:
            return [VF.hflip(img) for img in args]
            
        else:
            return args
    
    
[docs]class RandomVFlip:
    """
    Flips vertically
    
    Attributes
    ----------
    prob : float 
        probability at which imgage is flipped. Defaults to ``0.5``
    """
    def __init__(self, prob = 0.5):
        self.prob = prob
        
    def __call__(self, *args):
        if random.random() < self.prob:
            return [VF.vflip(img) for img in args]
            
        else:
            return args
    

[docs]class RandomRotation:
    """
    Rotates by degree
    
    Attributes
    ----------
    degree_range : tuple
        range of degrees in which image and ground truth are rotated. Defaults to ``(-15, +15)``
    prob : float 
        probability at which imgage is rotated. Defaults to ``0.5``
    """
    def __init__(self, degree_range = (-15, +15), prob = 0.5):
        self.prob = prob
        self.degree_range = degree_range
        
    def __call__(self, *args):
        if random.random() < self.prob:
            degree = random.randint(*self.degree_range)
            return [VF.rotate(img, degree, resample = Image.BILINEAR) for img in args]
        else:
            return args

[docs]class ColorJitter(object):
    """ 
    Randomly change the brightness, contrast, saturation and hue
    
    Attributes
    ----------
    brightness : float 
        how much to jitter brightness. brightness_factor
        is chosen uniformly from ``[max(0, 1 - brightness), 1 + brightness]``.
    contrast : float
        how much to jitter contrast. contrast_factor
        is chosen uniformly from ``[max(0, 1 - contrast), 1 + contrast]``.
    saturation : float 
        how much to jitter saturation. saturation_factor
        is chosen uniformly from ``[max(0, 1 - saturation), 1 + saturation]``.
    hue : float 
        how much to jitter hue. hue_factor is chosen uniformly from
        ``[-hue, hue]``. Should be >=0 and <= 0.5
    prob : float
        probability at which the operation is applied
    """
    def __init__(self, brightness=0.3, contrast=0.3, saturation=0.02, hue=0.02, prob=0.5):
        self.brightness = brightness
        self.contrast = contrast
        self.saturation = saturation
        self.hue = hue
        self.prob = prob

[docs]    @staticmethod
    def get_params(brightness, contrast, saturation, hue):
        transforms = []
        if brightness > 0:
            brightness_factor = random.uniform(max(0, 1 - brightness), 1 + brightness)
            transforms.append(Lambda(lambda img: VF.adjust_brightness(img, brightness_factor)))

        if contrast > 0:
            contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
            transforms.append(Lambda(lambda img: VF.adjust_contrast(img, contrast_factor)))

        if saturation > 0:
            saturation_factor = random.uniform(max(0, 1 - saturation), 1 + saturation)
            transforms.append(Lambda(lambda img: VF.adjust_saturation(img, saturation_factor)))

        if hue > 0:
            hue_factor = random.uniform(-hue, hue)
            transforms.append(Lambda(lambda img: VF.adjust_hue(img, hue_factor)))

        random.shuffle(transforms)
        transform = TorchVisionCompose(transforms)

        return transform

    def __call__(self, *args):
        if random.random() < self.prob:
            transform = self.get_params(self.brightness, self.contrast,
                                        self.saturation, self.hue)
            trans_img = transform(args[0])
            return [trans_img, *args[1:]]
        else:
            return args


[docs]class RandomResizedCrop:
    """Crop to random size and aspect ratio.
    A crop of random size of the original size and a random aspect ratio of 
    the original aspect ratio is made. This crop is finally resized to 
    given size. This is popularly used to train the Inception networks.
    
    Attributes
    ----------
    size : int 
        expected output size of each edge
    scale : tuple 
        range of size of the origin size cropped. Defaults to ``(0.08, 1.0)``
    ratio : tuple
        range of aspect ratio of the origin aspect ratio cropped. Defaults to ``(3. / 4., 4. / 3.)``
    interpolation :
        Defaults to ``PIL.Image.BILINEAR``
    prob : float 
        probability at which the operation is applied. Defaults to ``0.5``
    """

    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR, prob = 0.5):
        if isinstance(size, tuple):
            self.size = size
        else:
            self.size = (size, size)
        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
            warnings.warn("range should be of kind (min, max)")

        self.interpolation = interpolation
        self.scale = scale
        self.ratio = ratio
        self.prob = prob

[docs]    @staticmethod
    def get_params(img, scale, ratio):
        area = img.size[0] * img.size[1]

        for attempt in range(10):
            target_area = random.uniform(*scale) * area
            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
            aspect_ratio = math.exp(random.uniform(*log_ratio))

            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))

            if w <= img.size[0] and h <= img.size[1]:
                i = random.randint(0, img.size[1] - h)
                j = random.randint(0, img.size[0] - w)
                return i, j, h, w

        # Fallback to central crop
        in_ratio = img.size[0] / img.size[1]
        if (in_ratio < min(ratio)):
            w = img.size[0]
            h = w / min(ratio)
        elif (in_ratio > max(ratio)):
            h = img.size[1]
            w = h * max(ratio)
        else:  # whole image
            w = img.size[0]
            h = img.size[1]
        i = (img.size[1] - h) // 2
        j = (img.size[0] - w) // 2
        return i, j, h, w

    def __call__(self, *args):
        if random.random() < self.prob:
            imgs = []
            for img in args:
                i, j, h, w = self.get_params(img, self.scale, self.ratio)
                img = VF.resized_crop(img, i, j, h, w, self.size, self.interpolation)
                imgs.append(img)
            return imgs
        else:
            return args

    def __repr__(self):
        interpolate_str = _pil_interpolation_to_str[self.interpolation]
        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
        format_string += ', interpolation={0})'.format(interpolate_str)
        return format_string


[docs]class Resize:
    """Resize to given size.
    
    Attributes
    ----------
    size : tuple or int
        Desired output size. If size is a sequence like
        (h, w), output size will be matched to this. If size is an int,
        smaller edge of the image will be matched to this number.
        i.e, if height > width, then image will be rescaled to
        (size * height / width, size)
    interpolation : int
        Desired interpolation. Default is``PIL.Image.BILINEAR``
    """

    def __init__(self, size, interpolation=Image.BILINEAR):
        assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
        self.size = size
        self.interpolation = interpolation

    def __call__(self, *args):
        return [VF.resize(img, self.size, self.interpolation) for img in args]

    def __repr__(self):
        interpolate_str = _pil_interpolation_to_str[self.interpolation]
        return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str)