Source code for bob.learn.pytorch.trainers.CNNTrainer

#!/usr/bin/env python
# encoding: utf-8


import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable


import bob.core

logger = bob.core.log.setup("bob.learn.pytorch")

import time
import os
import numpy


[docs]class CNNTrainer(object): """ Class to train a CNN Attributes ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu verbosity_level: int The level of verbosity output to stdout """ def __init__( self, network, batch_size=64, use_gpu=False, verbosity_level=2, num_classes=2 ): """ Init function Parameters ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu verbosity_level: int The level of verbosity output to stdout num_classes: int The number of classes """ self.network = network self.num_classes = num_classes self.batch_size = batch_size self.use_gpu = use_gpu self.criterion = nn.CrossEntropyLoss() if self.use_gpu: self.network.cuda() bob.core.log.set_verbosity_level(logger, verbosity_level)
[docs] def load_and_initialize_model(self, model_filename): """ Loads and initialize a model Parameters ---------- model_filename: str """ try: cp = torch.load(model_filename) logger.info("model {} loaded".format(model_filename)) except RuntimeError: # pre-trained model was probably saved using nn.DataParallel ... cp = torch.load(model_filename, map_location="cpu") logger.info("model {} loaded on CPU".format(model_filename)) if "state_dict" in cp: from collections import OrderedDict new_state_dict = OrderedDict() for k, v in cp["state_dict"].items(): name = k[7:] new_state_dict[name] = v cp["state_dict"] = new_state_dict logger.info("state_dict modified") ########################################################################################################### ### for each defined architecture, get the output size in pre-trained model, and change it if necessary ### # LightCNN if ( isinstance(self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN9) or isinstance( self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29 ) or isinstance( self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2 ) ): last_layer_weight = "fc2.weight" last_layer_bias = "fc2.bias" num_classes_pretrained = cp["state_dict"][last_layer_weight].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp["state_dict"]) else: var = 1.0 / (cp["state_dict"][last_layer_weight].shape[0]) np_weights = numpy.random.normal( loc=0.0, scale=var, size=( (self.num_classes + 1), cp["state_dict"][last_layer_weight].shape[1], ), ) cp["state_dict"][last_layer_weight] = torch.from_numpy(np_weights) if not ( isinstance( self.network, bob.learn.pytorch.architectures.LightCNN.LightCNN29v2, ) ): cp["state_dict"][last_layer_bias] = torch.zeros( ((self.num_classes + 1),) ) self.network.load_state_dict(cp["state_dict"], strict=True) logger.info( "state_dict loaded for {} with {} classes".format( type(self.network), self.num_classes ) ) # CNN8 if isinstance(self.network, bob.learn.pytorch.architectures.CNN8): num_classes_pretrained = cp["state_dict"]["classifier.weight"].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp["state_dict"]) else: var = 1.0 / (cp["state_dict"]["classifier.weight"].shape[0]) np_weights = numpy.random.normal( loc=0.0, scale=var, size=( (self.num_classes + 1), cp["state_dict"]["classifier.weight"].shape[1], ), ) cp["state_dict"]["classifier.weight"] = torch.from_numpy(np_weights) cp["state_dict"]["classifier.bias"] = torch.zeros( ((self.num_classes + 1),) ) # self.network.load_state_dict(cp['state_dict'], strict=False) self.network.load_state_dict(cp["state_dict"], strict=True) logger.info( "state_dict loaded for {} with {} classes".format( type(self.network), self.num_classes ) ) # CASIANet if isinstance(self.network, bob.learn.pytorch.architectures.CASIANet): num_classes_pretrained = cp["state_dict"]["classifier.weight"].shape[0] if num_classes_pretrained == self.num_classes: self.network.load_state_dict(cp["state_dict"]) else: var = 1.0 / (cp["state_dict"]["classifier.weight"].shape[0]) np_weights = numpy.random.normal( loc=0.0, scale=var, size=( (self.num_classes + 1), cp["state_dict"]["classifier.weight"].shape[1], ), ) cp["state_dict"]["classifier.weight"] = torch.from_numpy(np_weights) cp["state_dict"]["classifier.bias"] = torch.zeros( ((self.num_classes + 1),) ) # self.network.load_state_dict(cp['state_dict'], strict=False) self.network.load_state_dict(cp["state_dict"], strict=True) logger.info( "state_dict loaded for {} with {} classes".format( type(self.network), self.num_classes ) ) ########################################################################################################### start_epoch = 0 start_iter = 0 losses = [] if "epoch" in cp.keys(): start_epoch = cp["epoch"] if "iteration" in cp.keys(): start_iter = cp["iteration"] if "losses" in cp.keys(): losses = cp["epoch"] return start_epoch, start_iter, losses
[docs] def save_model(self, output_dir, epoch=0, iteration=0, losses=None): """Save the trained network Parameters ---------- output_dir: str The directory to write the models to epoch: int the current epoch iteration: int the current (last) iteration losses: list(float) The list of losses since the beginning of training """ saved_filename = "model_{}_{}.pth".format(epoch, iteration) saved_path = os.path.join(output_dir, saved_filename) logger.info("Saving model to {}".format(saved_path)) cp = { "epoch": epoch, "iteration": iteration, "loss": losses, "state_dict": self.network.cpu().state_dict(), } torch.save(cp, saved_path) # moved the model back to GPU if needed if self.use_gpu: self.network.cuda()
[docs] def train( self, dataloader, n_epochs=20, learning_rate=0.01, output_dir="out", model=None ): """Performs the training. Parameters ---------- dataloader: :py:class:`torch.utils.data.DataLoader` The dataloader for your data n_epochs: int The number of epochs you would like to train for learning_rate: float The learning rate for SGD optimizer. output_dir: str The directory where you would like to save models """ # if model exists, load it if model is not None: start_epoch, start_iter, losses = self.load_and_initialize_model(model) if start_epoch != 0: logger.info( "Previous network was trained up to epoch {}, iteration {}".format( start_epoch, start_iter ) ) if losses: logger.info("Last loss = {}".format(losses[-1])) else: logger.info("Starting training / fine-tuning from pre-trained model") else: start_epoch = 0 start_iter = 0 losses = [] logger.info("Starting training from scratch") # setup optimizer optimizer = optim.SGD( self.network.parameters(), learning_rate, momentum=0.9, weight_decay=0.0005 ) # let's go for epoch in range(start_epoch, n_epochs): for i, data in enumerate(dataloader, 0): if i >= start_iter: start = time.time() images = data["image"] labels = data["label"] batch_size = len(images) if self.use_gpu: images = images.cuda() labels = labels.cuda() imagesv = Variable(images) labelsv = Variable(labels) output, _ = self.network(imagesv) loss = self.criterion(output, labelsv) optimizer.zero_grad() loss.backward() optimizer.step() end = time.time() logger.info( "[{}/{}][{}/{}] => Loss = {} (time spent: {})".format( epoch, n_epochs, i, len(dataloader), loss.item(), (end - start), ) ) losses.append(loss.item()) # do stuff - like saving models logger.info("EPOCH {} DONE".format(epoch + 1)) self.save_model(output_dir, epoch=(epoch + 1), iteration=0, losses=losses)