Source code for bob.learn.pytorch.architectures.MCDeepPixBiS

import torch
from torch import nn
from torchvision import models
import numpy as np


[docs]class MCDeepPixBiS(nn.Module):

    """ The class defining Multi-Channel Deep Pixelwise Binary Supervision for Face Presentation
    Attack Detection:

    This extends the following paper to multi-channel/ multi-spectral images with cross modal pretraining.

    Reference: Anjith George and Sébastien Marcel. "Deep Pixel-wise Binary Supervision for 
    Face Presentation Attack Detection." In 2019 International Conference on Biometrics (ICB).IEEE, 2019.

    The initialization uses `Cross modality pre-training` idea from the following paper:

    Wang L, Xiong Y, Wang Z, Qiao Y, Lin D, Tang X, Van Gool L. Temporal segment networks: 
    Towards good practices for deep action recognition. InEuropean conference on computer 
    vision 2016 Oct 8 (pp. 20-36). Springer, Cham.


    Attributes
    ----------
    pretrained: bool
        If set to `True` uses the pretrained DenseNet model as the base. If set to `False`, the network
        will be trained from scratch. 
        default: True 
    num_channels: int
        Number of channels in the input.      
    """

    def __init__(self, pretrained=True, num_channels=4):

        """ Init function

        Parameters
        ----------
        pretrained: bool
            If set to `True` uses the pretrained densenet model as the base. Else, it uses the default network
            default: True
        num_channels: int
            Number of channels in the input. 
        """
        super(MCDeepPixBiS, self).__init__()

        dense = models.densenet161(pretrained=pretrained)

        features = list(dense.features.children())

        temp_layer = features[0]

        # No bias in this architecture

        mean_weight = np.mean(temp_layer.weight.data.detach().numpy(),axis=1) # for 96 filters

        new_weight = np.zeros((96,num_channels,7,7))
  
        for i in range(num_channels):
            new_weight[:,i,:,:]=mean_weight

        features[0]=nn.Conv2d(num_channels, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

        features[0].weight.data = torch.Tensor(new_weight)

        self.enc = nn.Sequential(*features[0:8])

        self.dec=nn.Conv2d(384, 1, kernel_size=1, padding=0)

        self.linear=nn.Linear(14*14,1)


[docs]    def forward(self, x):
        """ Propagate data through the network

        Parameters
        ----------
        img: :py:class:`torch.Tensor` 
          The data to forward through the network. Expects Multi-channel images of size num_channelsx224x224

        Returns
        -------
        dec: :py:class:`torch.Tensor` 
            Binary map of size 1x14x14
        op: :py:class:`torch.Tensor`
            Final binary score.  

        """
        enc = self.enc(x)

        dec=self.dec(enc)

        dec=nn.Sigmoid()(dec)

        dec_flat=dec.view(-1,14*14)

        op=self.linear(dec_flat)

        op=nn.Sigmoid()(op)
 
        return dec,op