Source code for bob.learn.pytorch.trainers.CNNTrainer

#!/usr/bin/env python
# encoding: utf-8


import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable


import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")

import time
import os
import numpy

[docs]class CNNTrainer(object): """ Class to train a CNN Attributes ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu verbosity_level: int The level of verbosity output to stdout """ def __init__(self, network, batch_size=64, use_gpu=False, verbosity_level=2, num_classes=2): """ Init function Parameters ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu verbosity_level: int The level of verbosity output to stdout num_classes: int The number of classes """ self.network = network self.num_classes = num_classes self.batch_size = batch_size self.use_gpu = use_gpu self.criterion = nn.CrossEntropyLoss() if self.use_gpu: self.network.cuda() bob.core.log.set_verbosity_level(logger, verbosity_level)
[docs] def load_and_initialize_model(self, model_filename): """ Loads and initialize a model Parameters ---------- model_filename: str """ try: cp = torch.load(model_filename) logger.info("model {} loaded".format(model_filename)) except RuntimeError: # pre-trained model was probably saved using nn.DataParallel ... cp = torch.load(model_filename, map_location='cpu') logger.info("model {} loaded on CPU".format(model_filename)) if 'state_dict' in cp: from collections import OrderedDict new_state_dict = OrderedDict() for k, v in cp['state_dict'].items(): name = k[7:] new_state_dict[name] = v cp['state_dict'] = new_state_dict logger.info("state_dict modified") ########################################################################################################### ### for each defined architecture, get the output size in pre-trained model, and change it if necessary ### # LightCNN if isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN9) \ or isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29) \ or isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2): last_layer_weight = 'fc2.weight' last_layer_bias = 'fc2.bias' num_classes_pretrained = cp['state_dict'][last_layer_weight].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp['state_dict']) else: var = 1.0 / (cp['state_dict'][last_layer_weight].shape[0]) np_weights = numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict'][last_layer_weight].shape[1])) cp['state_dict'][last_layer_weight] = torch.from_numpy(np_weights) if not (isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2)): cp['state_dict'][last_layer_bias] = torch.zeros(((self.num_classes+1),)) self.network.load_state_dict(cp['state_dict'], strict=True) logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes)) # CNN8 if isinstance(self.network, bob.learn.pytorch.architectures.CNN8): num_classes_pretrained = cp['state_dict']['classifier.weight'].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp['state_dict']) else: var = 1.0 / (cp['state_dict']['classifier.weight'].shape[0]) np_weights = numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict']['classifier.weight'].shape[1])) cp['state_dict']['classifier.weight'] = torch.from_numpy(np_weights) cp['state_dict']['classifier.bias'] = torch.zeros(((self.num_classes+1),)) #self.network.load_state_dict(cp['state_dict'], strict=False) self.network.load_state_dict(cp['state_dict'], strict=True) logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes)) # CASIANet if isinstance(self.network, bob.learn.pytorch.architectures.CASIANet): num_classes_pretrained = cp['state_dict']['classifier.weight'].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp['state_dict']) else: var = 1.0 / (cp['state_dict']['classifier.weight'].shape[0]) np_weights = numpy.random.normal(loc=0.0, scale=var, size=((self.num_classes+1), cp['state_dict']['classifier.weight'].shape[1])) cp['state_dict']['classifier.weight'] = torch.from_numpy(np_weights) cp['state_dict']['classifier.bias'] = torch.zeros(((self.num_classes+1),)) #self.network.load_state_dict(cp['state_dict'], strict=False) self.network.load_state_dict(cp['state_dict'], strict=True) logger.info("state_dict loaded for {} with {} classes".format(type(self.network), self.num_classes)) ########################################################################################################### start_epoch = 0 start_iter = 0 losses = [] if 'epoch' in cp.keys(): start_epoch = cp['epoch'] if 'iteration' in cp.keys(): start_iter = cp['iteration'] if 'losses' in cp.keys(): losses = cp['epoch'] return start_epoch, start_iter, losses
[docs] def save_model(self, output_dir, epoch=0, iteration=0, losses=None): """Save the trained network Parameters ---------- output_dir: str The directory to write the models to epoch: int the current epoch iteration: int the current (last) iteration losses: list(float) The list of losses since the beginning of training """ saved_filename = 'model_{}_{}.pth'.format(epoch, iteration) saved_path = os.path.join(output_dir, saved_filename) logger.info('Saving model to {}'.format(saved_path)) cp = {'epoch': epoch, 'iteration': iteration, 'loss': losses, 'state_dict': self.network.cpu().state_dict() } torch.save(cp, saved_path) # moved the model back to GPU if needed if self.use_gpu : self.network.cuda()
[docs] def train(self, dataloader, n_epochs=20, learning_rate=0.01, output_dir='out', model=None): """Performs the training. Parameters ---------- dataloader: :py:class:`torch.utils.data.DataLoader` The dataloader for your data n_epochs: int The number of epochs you would like to train for learning_rate: float The learning rate for SGD optimizer. output_dir: str The directory where you would like to save models """ # if model exists, load it if model is not None: start_epoch, start_iter, losses = self.load_and_initialize_model(model) if start_epoch != 0: logger.info('Previous network was trained up to epoch {}, iteration {}'.format(start_epoch, start_iter)) if losses: logger.info('Last loss = {}'.format(losses[-1])) else: logger.info('Starting training / fine-tuning from pre-trained model') else: start_epoch = 0 start_iter = 0 losses = [] logger.info('Starting training from scratch') # setup optimizer optimizer = optim.SGD(self.network.parameters(), learning_rate, momentum = 0.9, weight_decay = 0.0005) # let's go for epoch in range(start_epoch, n_epochs): for i, data in enumerate(dataloader, 0): if i >= start_iter: start = time.time() images = data['image'] labels = data['label'] batch_size = len(images) if self.use_gpu: images = images.cuda() labels = labels.cuda() imagesv = Variable(images) labelsv = Variable(labels) output, _ = self.network(imagesv) loss = self.criterion(output, labelsv) optimizer.zero_grad() loss.backward() optimizer.step() end = time.time() logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {})".format(epoch, n_epochs, i, len(dataloader), loss.item(), (end-start))) losses.append(loss.item()) # do stuff - like saving models logger.info("EPOCH {} DONE".format(epoch+1)) self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses)