Source code for bob.learn.pytorch.trainers.FASNetTrainer

#!/usr/bin/env python
# encoding: utf-8

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from bob.learn.pytorch.utils import comp_bce_loss_weights
from .tflog import Logger 

import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")

import time
import os

import copy

[docs]class FASNetTrainer(object): """ Class to train the MCCNN Attributes ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu verbosity_level: int The level of verbosity output to stdout """ def __init__(self, network, batch_size=64, use_gpu=False, verbosity_level=2, tf_logdir='tf_logs',do_crossvalidation=False): """ Init function . The layers to be adapted in the network is selected and the gradients are set to `True` for the layers which needs to be adapted. Parameters ---------- network: :py:class:`torch.nn.Module` The network to train batch_size: int The size of your minibatch use_gpu: bool If you would like to use the gpu adapted_layers: str The blocks in the CNN to adapt; only the ones listed are adapted in the training. The layers are separated by '-' in the string, for example 'conv1-block1-group1-ffc'. The fully connected layer in the output part are adapted always. adapt_reference_channel: bool If this value is `True` then 'ch_0' (which is the reference channel- usually, grayscale image) is also adapted. Otherwise the reference channel is not adapted, so that it can be used for Face recognition as well, default: `False`. verbosity_level: int The level of verbosity output to stdout do_crossvalidation: bool If set to `True`, performs validation in each epoch and stores the best model based on validation loss. """ self.network = network self.batch_size = batch_size self.use_gpu = use_gpu self.criterion = nn.BCELoss() self.do_crossvalidation=do_crossvalidation if self.do_crossvalidation: phases=['train','val'] else: phases=['train'] self.phases=phases if self.use_gpu: self.network.cuda() bob.core.log.set_verbosity_level(logger, verbosity_level) self.tf_logger = Logger(tf_logdir) # Setting the gradients to true for the layers which needs to be adapted for name, param in self.network.named_parameters(): param.requires_grad = False if not 'enc' in name: param.requires_grad = True
[docs] def load_model(self, model_filename): """Loads an existing model Parameters ---------- model_file: str The filename of the model to load Returns ------- start_epoch: int The epoch to start with start_iteration: int The iteration to start with losses: list(float) The list of losses from previous training """ cp = torch.load(model_filename) self.network.load_state_dict(cp['state_dict']) start_epoch = cp['epoch'] start_iter = cp['iteration'] losses = cp['loss'] return start_epoch, start_iter, losses
[docs] def save_model(self, output_dir, epoch=0, iteration=0, losses=None): """Save the trained network Parameters ---------- output_dir: str The directory to write the models to epoch: int the current epoch iteration: int the current (last) iteration losses: list(float) The list of losses since the beginning of training """ saved_filename = 'model_{}_{}.pth'.format(epoch, iteration) saved_path = os.path.join(output_dir, saved_filename) logger.info('Saving model to {}'.format(saved_path)) cp = {'epoch': epoch, 'iteration': iteration, 'loss': losses, 'state_dict': self.network.cpu().state_dict() } torch.save(cp, saved_path) # moved the model back to GPU if needed if self.use_gpu : self.network.cuda()
[docs] def train(self, dataloader, n_epochs=25, learning_rate=1e-4, output_dir='out', model=None): """Performs the training. Parameters ---------- dataloader: :py:class:`torch.utils.data.DataLoader` The dataloader for your data n_epochs: int The number of epochs you would like to train for learning_rate: float The learning rate for Adam optimizer. output_dir: str The directory where you would like to save models model: str The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model """ # if model exists, load it if model is not None: start_epoch, start_iter, losses = self.load_model(model) logger.info('Starting training at epoch {}, iteration {} - last loss value is {}'.format(start_epoch, start_iter, losses[-1])) else: start_epoch = 0 start_iter = 0 losses = [] logger.info('Starting training from scratch') for name, param in self.network.named_parameters(): if param.requires_grad == True: logger.info('Layer to be adapted from grad check : {}'.format(name)) # setup optimizer optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.network.parameters()),lr = learning_rate, weight_decay=0.000001) self.network.train(True) best_model_wts = copy.deepcopy(self.network.state_dict()) best_loss = float("inf") # let's go for epoch in range(start_epoch, n_epochs): # in the epoch train_loss_history=[] val_loss_history = [] for phase in self.phases: if phase == 'train': self.network.train() # Set model to training mode else: self.network.eval() # Set model to evaluate mode for i, data in enumerate(dataloader[phase], 0): if i >= start_iter: start = time.time() img, labels = data labels=labels.float().unsqueeze(1) weights=comp_bce_loss_weights(labels) batch_size = len(img) if self.use_gpu: img = img.cuda() labels = labels.cuda() weights = weights.cuda() imagesv = Variable(img) labelsv = Variable(labels) # weights for samples, should help with data imbalance self.criterion.weight = weights optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): output= self.network(imagesv) loss = self.criterion(output, labelsv) if phase == 'train': loss.backward() optimizer.step() train_loss_history.append(loss.item()) else: val_loss_history.append(loss.item()) end = time.time() logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {}), Phase {}".format(epoch, n_epochs, i, len(dataloader[phase]), loss.item(), (end-start),phase)) losses.append(loss.item()) epoch_train_loss=np.mean(train_loss_history) logger.info("Train Loss : {} epoch : {}".format(epoch_train_loss,epoch)) if self.do_crossvalidation: epoch_val_loss=np.mean(val_loss_history) logger.info("Val Loss : {} epoch : {}".format(epoch_val_loss,epoch)) if phase == 'val' and epoch_val_loss < best_loss: logger.debug("New val loss : {} is better than old: {}, copying over the new weights".format(epoch_val_loss,best_loss)) best_loss = epoch_val_loss best_model_wts = copy.deepcopy(self.network.state_dict()) ######################################## <Logging> ################################### if self.do_crossvalidation: info = {'train_loss':epoch_train_loss,'val_loss':epoch_val_loss} else: info = {'train_loss':epoch_train_loss} # scalar logs for tag, value in info.items(): self.tf_logger.scalar_summary(tag, value, epoch+1) # Log values and gradients of the parameters (histogram summary) for tag, value in self.network.named_parameters(): tag = tag.replace('.', '/') try: self.tf_logger.histo_summary(tag, value.data.cpu().numpy(), epoch+1) self.tf_logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), epoch+1) except: pass ######################################## </Logging> ################################### # do stuff - like saving models logger.info("EPOCH {} DONE".format(epoch+1)) # comment it out after debugging self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses) ## load the best weights self.network.load_state_dict(best_model_wts) # best epoch is 100 self.save_model(output_dir, epoch=100, iteration=0, losses=losses)