Source code for bob.learn.pytorch.trainers.CNNTrainer

#!/usr/bin/env python
# encoding: utf-8


import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable


import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")

import time
import os
import numpy

[docs]class CNNTrainer(object):
  """
  Class to train a CNN

  Attributes
  ----------
  network: :py:class:`torch.nn.Module`
    The network to train
  batch_size: int
    The size of your minibatch
  use_gpu: bool
    If you would like to use the gpu
  verbosity_level: int
    The level of verbosity output to stdout
  
  """
 
  def __init__(self, network, batch_size=64, use_gpu=False, verbosity_level=2, num_classes=2):
    """ Init function

    Parameters
    ----------
    network: :py:class:`torch.nn.Module`
      The network to train
    batch_size: int
      The size of your minibatch
    use_gpu: bool
      If you would like to use the gpu
    verbosity_level: int
      The level of verbosity output to stdout
    num_classes: int
      The number of classes 

    """
    self.network = network
    self.num_classes = num_classes
    self.batch_size = batch_size
    self.use_gpu = use_gpu
    self.criterion = nn.CrossEntropyLoss()

    if self.use_gpu:
      self.network.cuda()

    bob.core.log.set_verbosity_level(logger, verbosity_level)

[docs]  def load_and_initialize_model(self, model_filename):
    """ Loads and initialize a model

    Parameters
    ----------
      model_filename: str

    """
    try:
      cp = torch.load(model_filename)
      logger.info("model {} loaded".format(model_filename))
    except RuntimeError:
      # pre-trained model was probably saved using nn.DataParallel ...
      cp = torch.load(model_filename, map_location='cpu')
      logger.info("model {} loaded on CPU".format(model_filename))
    
    if 'state_dict' in cp:
      from collections import OrderedDict
      new_state_dict = OrderedDict()
      for k, v in cp['state_dict'].items():
        name = k[7:]
        new_state_dict[name] = v
    cp['state_dict'] = new_state_dict

    logger.info("state_dict modified")
    ###########################################################################################################
    ### for each defined architecture, get the output size in pre-trained model, and change it if necessary ###

    # LightCNN
    if isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN9) \
      or isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29) \
      or isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2):
     
      last_layer_weight = 'fc2.weight'
      last_layer_bias = 'fc2.bias'

      num_classes_pretrained = cp['state_dict'][last_layer_weight].shape[0]
      
      if num_classes_pretrained == self.num_classes:
        self.network.load_state_dict(cp['state_dict'])
      else:
        var = 1.0 / (cp['state_dict'][last_layer_weight].shape[0])
        np_weights =  numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict'][last_layer_weight].shape[1]))
        cp['state_dict'][last_layer_weight] = torch.from_numpy(np_weights)
        if not (isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2)):
          cp['state_dict'][last_layer_bias] = torch.zeros(((self.num_classes+1),))
        self.network.load_state_dict(cp['state_dict'], strict=True)
      logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes))

    # CNN8
    if isinstance(self.network, bob.learn.pytorch.architectures.CNN8):
      
      num_classes_pretrained = cp['state_dict']['classifier.weight'].shape[0]
      if num_classes_pretrained == self.num_classes:
        self.network.load_state_dict(cp['state_dict'])
      else:
        var = 1.0 / (cp['state_dict']['classifier.weight'].shape[0])
        np_weights =  numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict']['classifier.weight'].shape[1]))
        cp['state_dict']['classifier.weight'] = torch.from_numpy(np_weights)
        cp['state_dict']['classifier.bias'] = torch.zeros(((self.num_classes+1),))
        #self.network.load_state_dict(cp['state_dict'], strict=False)
        self.network.load_state_dict(cp['state_dict'], strict=True)
      logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes))

    # CASIANet
    if isinstance(self.network, bob.learn.pytorch.architectures.CASIANet):
      
      num_classes_pretrained = cp['state_dict']['classifier.weight'].shape[0]
      if num_classes_pretrained == self.num_classes:
        self.network.load_state_dict(cp['state_dict'])
      else:
        var = 1.0 / (cp['state_dict']['classifier.weight'].shape[0])
        np_weights =  numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict']['classifier.weight'].shape[1]))
        cp['state_dict']['classifier.weight'] = torch.from_numpy(np_weights)
        cp['state_dict']['classifier.bias'] = torch.zeros(((self.num_classes+1),))
        #self.network.load_state_dict(cp['state_dict'], strict=False)
        self.network.load_state_dict(cp['state_dict'], strict=True)
      logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes))

    ###########################################################################################################

    start_epoch = 0
    start_iter = 0
    losses = []
    if 'epoch' in cp.keys():
      start_epoch = cp['epoch']
    if 'iteration' in cp.keys():
      start_iter = cp['iteration']
    if 'losses' in cp.keys():
      losses = cp['epoch']
  
    return start_epoch, start_iter, losses


[docs]  def save_model(self, output_dir, epoch=0, iteration=0, losses=None):
    """Save the trained network

    Parameters
    ----------
    output_dir: str
      The directory to write the models to
    epoch: int
      the current epoch
    iteration: int
      the current (last) iteration
    losses: list(float)
        The list of losses since the beginning of training 
    
    """ 
    
    saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)    
    saved_path = os.path.join(output_dir, saved_filename)    
    logger.info('Saving model to {}'.format(saved_path))
    cp = {'epoch': epoch, 
          'iteration': iteration,
          'loss': losses, 
          'state_dict': self.network.cpu().state_dict()
          }
    torch.save(cp, saved_path)
    
    # moved the model back to GPU if needed
    if self.use_gpu :
        self.network.cuda()


[docs]  def train(self, dataloader, n_epochs=20, learning_rate=0.01, output_dir='out', model=None):
    """Performs the training.

    Parameters
    ----------
    dataloader: :py:class:`torch.utils.data.DataLoader`
      The dataloader for your data
    n_epochs: int
      The number of epochs you would like to train for
    learning_rate: float
      The learning rate for SGD optimizer.
    output_dir: str
      The directory where you would like to save models 
    
    """
    
    # if model exists, load it
    if model is not None:
      
      start_epoch, start_iter, losses = self.load_and_initialize_model(model)
      if start_epoch != 0: 
        logger.info('Previous network was trained up to epoch {}, iteration {}'.format(start_epoch, start_iter))
        if losses:
          logger.info('Last loss = {}'.format(losses[-1]))
      else: 
        logger.info('Starting training / fine-tuning from pre-trained model')

    else:
      start_epoch = 0
      start_iter = 0
      losses = []
      logger.info('Starting training from scratch')

    # setup optimizer
    optimizer = optim.SGD(self.network.parameters(), learning_rate, momentum = 0.9, weight_decay = 0.0005)

    # let's go
    for epoch in range(start_epoch, n_epochs):
      for i, data in enumerate(dataloader, 0):
   
        if i >= start_iter:
        
          start = time.time()
          
          images = data['image']
          labels = data['label']
          batch_size = len(images)
          if self.use_gpu:
            images = images.cuda()
            labels = labels.cuda()
          imagesv = Variable(images)
          labelsv = Variable(labels)

          output, _ = self.network(imagesv)
          loss = self.criterion(output, labelsv)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          end = time.time()
          logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {})".format(epoch, n_epochs, i, len(dataloader), loss.item(), (end-start)))
          losses.append(loss.item())
      
      # do stuff - like saving models
      logger.info("EPOCH {} DONE".format(epoch+1))
      self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses)