1#!/usr/bin/env python
2# coding=utf-8
3
4import logging
5import os
6
7import matplotlib.pyplot as plt
8import numpy
9import pandas
10
11from matplotlib.backends.backend_pdf import PdfPages
12
13logger = logging.getLogger(__name__)
14
15
16def _loss_evolution(df):
17 """Plots the loss evolution over time (epochs)
18
19 Parameters
20 ----------
21
22 df : pandas.DataFrame
23 dataframe containing the training logs
24
25
26 Returns
27 -------
28
29 figure : matplotlib.figure.Figure
30 figure to be displayed or saved to file
31
32 """
33
34 figure = plt.figure()
35 axes = figure.gca()
36
37 axes.plot(df.epoch.values, df.loss.values, label="Training")
38 if "validation_loss" in df.columns:
39 axes.plot(
40 df.epoch.values, df.validation_loss.values, label="Validation"
41 )
42 # shows a red dot on the location with the minima on the validation set
43 lowest_index = numpy.argmin(df["validation_loss"])
44
45 axes.plot(
46 df.epoch.values[lowest_index],
47 df.validation_loss[lowest_index],
48 "mo",
49 label=f"Lowest validation ({df.validation_loss[lowest_index]:.3f}@{df.epoch[lowest_index]})",
50 )
51
52 if "extra_validation_losses" in df.columns:
53 # These losses are in array format. So, we read all rows, then create a
54 # 2d array. We transpose the array to iterate over each column and
55 # plot the losses individually. They are numbered from 1.
56 df["extra_validation_losses"] = df["extra_validation_losses"].apply(
57 lambda x: numpy.fromstring(x.strip("[]"), sep=" ")
58 )
59 losses = numpy.vstack(df.extra_validation_losses.values).T
60 for n, k in enumerate(losses):
61 axes.plot(df.epoch.values, k, label=f"Extra validation {n+1}")
62
63 axes.set_title("Loss over time")
64 axes.set_xlabel("Epoch")
65 axes.set_ylabel("Loss")
66
67 axes.legend(loc="best")
68 axes.grid(alpha=0.3)
69 figure.set_layout_engine("tight")
70
71 return figure
72
73
74def _hardware_utilisation(df, const):
75 """Plot the CPU utilisation over time (epochs).
76
77 Parameters
78 ----------
79
80 df : pandas.DataFrame
81 dataframe containing the training logs
82
83 const : dict
84 training and hardware constants
85
86
87 Returns
88 -------
89
90 figure : matplotlib.figure.Figure
91 figure to be displayed or saved to file
92
93 """
94 figure = plt.figure()
95 axes = figure.gca()
96
97 cpu_percent = df.cpu_percent.values / const["cpu_count"]
98 cpu_memory = 100 * df.cpu_rss / const["cpu_memory_total"]
99
100 axes.plot(
101 df.epoch.values,
102 cpu_percent,
103 label=f"CPU usage (cores: {const['cpu_count']})",
104 )
105 axes.plot(
106 df.epoch.values,
107 cpu_memory,
108 label=f"CPU memory (total: {const['cpu_memory_total']:.1f} Gb)",
109 )
110 if "gpu_percent" in df:
111 axes.plot(
112 df.epoch.values,
113 df.gpu_percent.values,
114 label=f"GPU usage (type: {const['gpu_name']})",
115 )
116 if "gpu_memory_percent" in df:
117 axes.plot(
118 df.epoch.values,
119 df.gpu_memory_percent.values,
120 label=f"GPU memory (total: {const['gpu_memory_total']:.1f} Gb)",
121 )
122 axes.set_title("Hardware utilisation over time")
123 axes.set_xlabel("Epoch")
124 axes.set_ylabel("Relative utilisation (%)")
125 axes.set_ylim([0, 100])
126
127 axes.legend(loc="best")
128 axes.grid(alpha=0.3)
129 figure.set_layout_engine("tight")
130
131 return figure
132
133
134def base_train_analysis(log, constants, output_pdf, verbose, **kwargs):
135 """Create base train_analysis function."""
136 constants = pandas.read_csv(constants)
137 constants = dict(zip(constants.keys(), constants.values[0]))
138 data = pandas.read_csv(log)
139
140 # makes sure the directory to save the output PDF is there
141 dirname = os.path.dirname(os.path.realpath(output_pdf))
142 if not os.path.exists(dirname):
143 os.makedirs(dirname)
144
145 # now, do the analysis
146 with PdfPages(output_pdf) as pdf:
147
148 figure = _loss_evolution(data)
149 pdf.savefig(figure)
150 plt.close(figure)
151
152 figure = _hardware_utilisation(data, constants)
153 pdf.savefig(figure)
154 plt.close(figure)