Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1673966692152/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.10/site-packages/bob/ip/common/script/train_analysis.py: 95%

60 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 15:03 +0000

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4import logging 

5import os 

6 

7import matplotlib.pyplot as plt 

8import numpy 

9import pandas 

10 

11from matplotlib.backends.backend_pdf import PdfPages 

12 

13logger = logging.getLogger(__name__) 

14 

15 

16def _loss_evolution(df): 

17 """Plots the loss evolution over time (epochs) 

18 

19 Parameters 

20 ---------- 

21 

22 df : pandas.DataFrame 

23 dataframe containing the training logs 

24 

25 

26 Returns 

27 ------- 

28 

29 figure : matplotlib.figure.Figure 

30 figure to be displayed or saved to file 

31 

32 """ 

33 

34 figure = plt.figure() 

35 axes = figure.gca() 

36 

37 axes.plot(df.epoch.values, df.loss.values, label="Training") 

38 if "validation_loss" in df.columns: 

39 axes.plot( 

40 df.epoch.values, df.validation_loss.values, label="Validation" 

41 ) 

42 # shows a red dot on the location with the minima on the validation set 

43 lowest_index = numpy.argmin(df["validation_loss"]) 

44 

45 axes.plot( 

46 df.epoch.values[lowest_index], 

47 df.validation_loss[lowest_index], 

48 "mo", 

49 label=f"Lowest validation ({df.validation_loss[lowest_index]:.3f}@{df.epoch[lowest_index]})", 

50 ) 

51 

52 if "extra_validation_losses" in df.columns: 

53 # These losses are in array format. So, we read all rows, then create a 

54 # 2d array. We transpose the array to iterate over each column and 

55 # plot the losses individually. They are numbered from 1. 

56 df["extra_validation_losses"] = df["extra_validation_losses"].apply( 

57 lambda x: numpy.fromstring(x.strip("[]"), sep=" ") 

58 ) 

59 losses = numpy.vstack(df.extra_validation_losses.values).T 

60 for n, k in enumerate(losses): 

61 axes.plot(df.epoch.values, k, label=f"Extra validation {n+1}") 

62 

63 axes.set_title("Loss over time") 

64 axes.set_xlabel("Epoch") 

65 axes.set_ylabel("Loss") 

66 

67 axes.legend(loc="best") 

68 axes.grid(alpha=0.3) 

69 figure.set_layout_engine("tight") 

70 

71 return figure 

72 

73 

74def _hardware_utilisation(df, const): 

75 """Plot the CPU utilisation over time (epochs). 

76 

77 Parameters 

78 ---------- 

79 

80 df : pandas.DataFrame 

81 dataframe containing the training logs 

82 

83 const : dict 

84 training and hardware constants 

85 

86 

87 Returns 

88 ------- 

89 

90 figure : matplotlib.figure.Figure 

91 figure to be displayed or saved to file 

92 

93 """ 

94 figure = plt.figure() 

95 axes = figure.gca() 

96 

97 cpu_percent = df.cpu_percent.values / const["cpu_count"] 

98 cpu_memory = 100 * df.cpu_rss / const["cpu_memory_total"] 

99 

100 axes.plot( 

101 df.epoch.values, 

102 cpu_percent, 

103 label=f"CPU usage (cores: {const['cpu_count']})", 

104 ) 

105 axes.plot( 

106 df.epoch.values, 

107 cpu_memory, 

108 label=f"CPU memory (total: {const['cpu_memory_total']:.1f} Gb)", 

109 ) 

110 if "gpu_percent" in df: 

111 axes.plot( 

112 df.epoch.values, 

113 df.gpu_percent.values, 

114 label=f"GPU usage (type: {const['gpu_name']})", 

115 ) 

116 if "gpu_memory_percent" in df: 

117 axes.plot( 

118 df.epoch.values, 

119 df.gpu_memory_percent.values, 

120 label=f"GPU memory (total: {const['gpu_memory_total']:.1f} Gb)", 

121 ) 

122 axes.set_title("Hardware utilisation over time") 

123 axes.set_xlabel("Epoch") 

124 axes.set_ylabel("Relative utilisation (%)") 

125 axes.set_ylim([0, 100]) 

126 

127 axes.legend(loc="best") 

128 axes.grid(alpha=0.3) 

129 figure.set_layout_engine("tight") 

130 

131 return figure 

132 

133 

134def base_train_analysis(log, constants, output_pdf, verbose, **kwargs): 

135 """Create base train_analysis function.""" 

136 constants = pandas.read_csv(constants) 

137 constants = dict(zip(constants.keys(), constants.values[0])) 

138 data = pandas.read_csv(log) 

139 

140 # makes sure the directory to save the output PDF is there 

141 dirname = os.path.dirname(os.path.realpath(output_pdf)) 

142 if not os.path.exists(dirname): 

143 os.makedirs(dirname) 

144 

145 # now, do the analysis 

146 with PdfPages(output_pdf) as pdf: 

147 

148 figure = _loss_evolution(data) 

149 pdf.savefig(figure) 

150 plt.close(figure) 

151 

152 figure = _hardware_utilisation(data, constants) 

153 pdf.savefig(figure) 

154 plt.close(figure)