Source code for bob.learn.pytorch.trainers.GenericTrainer

#!/usr/bin/env python
# encoding: utf-8

import copy
import os
import time
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from .tflog import Logger

import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")

[docs]class GenericTrainer(object): """ Class to train a generic NN; all the parameters are provided in configs Attributes ---------- network: :py:class:`torch.nn.Module` The network to train optimizer: :py:class:`torch.optim.Optimizer` Optimizer object to be used. Initialized in the config file. device: str Device which will be used for training the model verbosity_level: int The level of verbosity output to stdout """ def __init__(self, network, optimizer, compute_loss, learning_rate=0.0001, device='cpu', verbosity_level=2, tf_logdir='tf_logs', do_crossvalidation=False, save_interval=5): """ Init function . The layers to be adapted in the network is selected and the gradients are set to `True` for the layers which needs to be adapted. Parameters ---------- network: :py:class:`torch.nn.Module` The network to train device: str Device which will be used for training the model verbosity_level: int The level of verbosity output to stdout do_crossvalidation: bool If set to `True`, performs validation in each epoch and stores the best model based on validation loss. """ = network self.optimizer = optimizer self.compute_loss = compute_loss self.device = device self.learning_rate = learning_rate self.save_interval = save_interval self.do_crossvalidation = do_crossvalidation if self.do_crossvalidation: phases = ['train', 'val'] else: phases = ['train'] self.phases = phases # Move the network to device bob.core.log.set_verbosity_level(logger, verbosity_level) self.tf_logger = Logger(tf_logdir) # Setting the gradients to true for the layers which needs to be adapted
[docs] def load_model(self, model_filename): """Loads an existing model Parameters ---------- model_file: str The filename of the model to load Returns ------- start_epoch: int The epoch to start with start_iteration: int The iteration to start with losses: list(float) The list of losses from previous training """ cp = torch.load(model_filename)['state_dict']) start_epoch = cp['epoch'] start_iter = cp['iteration'] losses = cp['loss'] return start_epoch, start_iter, losses
[docs] def save_model(self, output_dir, epoch=0, iteration=0, losses=None): """Save the trained network Parameters ---------- output_dir: str The directory to write the models to epoch: int the current epoch iteration: int the current (last) iteration losses: list(float) The list of losses since the beginning of training """ saved_filename = 'model_{}_{}.pth'.format(epoch, iteration) saved_path = os.path.join(output_dir, saved_filename)'Saving model to {}'.format(saved_path)) cp = {'epoch': epoch, 'iteration': iteration, 'loss': losses, 'state_dict': }, saved_path)
[docs] def train(self, dataloader, n_epochs=25, output_dir='out', model=None): """Performs the training. Parameters ---------- dataloader: :py:class:`` The dataloader for your data n_epochs: int The number of epochs you would like to train for learning_rate: float The learning rate for Adam optimizer. output_dir: str The directory where you would like to save models model: str The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model """ # if model exists, load it if model is not None: start_epoch, start_iter, losses = self.load_model(model)'Starting training at epoch {}, iteration {} - last loss value is {}'.format( start_epoch, start_iter, losses[-1])) else: start_epoch = 0 start_iter = 0 losses = []'Starting training from scratch') for name, param in if param.requires_grad == True: 'Layer to be adapted from grad check : {}'.format(name)) # setup optimizer best_model_wts = copy.deepcopy( best_loss = float("inf") # let's go for epoch in range(start_epoch, n_epochs): # in the epoch train_loss_history = [] val_loss_history = [] for phase in self.phases: if phase == 'train': # Set model to training mode else: # Set model to evaluate mode for i, data in enumerate(dataloader[phase], 0): if i >= start_iter: start = time.time() # get data from dataset img, labels = data self.optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): loss = self.compute_loss(, img, labels, self.device) if phase == 'train': loss.backward() self.optimizer.step() train_loss_history.append(loss.item()) else: val_loss_history.append(loss.item()) end = time.time()"[{}/{}][{}/{}] => Loss = {} (time spent: {}), Phase {}".format( epoch, n_epochs, i, len(dataloader[phase]), loss.item(), (end-start), phase)) losses.append(loss.item()) epoch_train_loss = np.mean(train_loss_history)"Train Loss : {} epoch : {}".format( epoch_train_loss, epoch)) if self.do_crossvalidation: epoch_val_loss = np.mean(val_loss_history)"Val Loss : {} epoch : {}".format( epoch_val_loss, epoch)) if phase == 'val' and epoch_val_loss < best_loss: logger.debug("New val loss : {} is better than old: {}, copying over the new weights".format( epoch_val_loss, best_loss)) best_loss = epoch_val_loss best_model_wts = copy.deepcopy( ######################################## <Logging> ################################### if self.do_crossvalidation: info = {'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss} else: info = {'train_loss': epoch_train_loss} # scalar logs for tag, value in info.items(): self.tf_logger.scalar_summary(tag, value, epoch+1) # Log values and gradients of the parameters (histogram summary) for tag, value in tag = tag.replace('.', '/') try: self.tf_logger.histo_summary( tag,, epoch+1) self.tf_logger.histo_summary( tag+'/grad',, epoch+1) except: pass ######################################## </Logging> ################################### # do stuff - like saving models"EPOCH {} DONE".format(epoch+1)) # comment it out after debugging # save the last model, and the ones in the specified interval if (epoch+1) == n_epochs or epoch % self.save_interval == 0: self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses) # load the best weights # best epoch is 0 self.save_model(output_dir, epoch=0, iteration=0, losses=losses)