Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4import logging 

5import os 

6 

7import click 

8 

9from bob.extension.scripts.click_helper import ( 

10 ConfigCommand, 

11 ResourceOption, 

12 verbosity_option, 

13) 

14 

15from .binseg import save_sh_command 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20@click.command( 

21 entry_point_group="bob.ip.binseg.config", 

22 cls=ConfigCommand, 

23 epilog="""Examples: 

24 

25\b 

26 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel 

27 segmentation), on the CPU, by running inference and evaluation on results 

28 from its test set: 

29 

30 $ bob binseg analyze -vv m2unet drive --weight=model.path 

31 

32""", 

33) 

34@click.option( 

35 "--output-folder", 

36 "-o", 

37 help="Path where to store experiment outputs (created if does not exist)", 

38 required=True, 

39 type=click.Path(), 

40 default="results", 

41 cls=ResourceOption, 

42) 

43@click.option( 

44 "--model", 

45 "-m", 

46 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated", 

47 required=True, 

48 cls=ResourceOption, 

49) 

50@click.option( 

51 "--dataset", 

52 "-d", 

53 help="A dictionary mapping string keys to " 

54 "bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key " 

55 "named 'train' must be available. This dataset will be used for training " 

56 "the network model. All other datasets will be used for prediction and " 

57 "evaluation. Dataset descriptions include all required pre-processing, " 

58 "including eventual data augmentation, which may be eventually excluded " 

59 "for prediction and evaluation purposes", 

60 required=True, 

61 cls=ResourceOption, 

62) 

63@click.option( 

64 "--second-annotator", 

65 "-S", 

66 help="A dataset or dictionary, like in --dataset, with the same " 

67 "sample keys, but with annotations from a different annotator that is " 

68 "going to be compared to the one in --dataset", 

69 required=False, 

70 default=None, 

71 cls=ResourceOption, 

72 show_default=True, 

73) 

74@click.option( 

75 "--batch-size", 

76 "-b", 

77 help="Number of samples in every batch (this parameter affects " 

78 "memory requirements for the network). If the number of samples in " 

79 "the batch is larger than the total number of samples available for " 

80 "training, this value is truncated. If this number is smaller, then " 

81 "batches of the specified size are created and fed to the network " 

82 "until there are no more new samples to feed (epoch is finished). " 

83 "If the total number of training samples is not a multiple of the " 

84 "batch-size, the last batch will be smaller than the first.", 

85 required=True, 

86 show_default=True, 

87 default=1, 

88 type=click.IntRange(min=1), 

89 cls=ResourceOption, 

90) 

91@click.option( 

92 "--device", 

93 "-d", 

94 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")', 

95 show_default=True, 

96 required=True, 

97 default="cpu", 

98 cls=ResourceOption, 

99) 

100@click.option( 

101 "--overlayed/--no-overlayed", 

102 "-O", 

103 help="Creates overlayed representations of the output probability maps, " 

104 "similar to --overlayed in prediction-mode, except it includes " 

105 "distinctive colours for true and false positives and false negatives. " 

106 "If not set, or empty then do **NOT** output overlayed images.", 

107 show_default=True, 

108 default=False, 

109 required=False, 

110 cls=ResourceOption, 

111) 

112@click.option( 

113 "--weight", 

114 "-w", 

115 help="Path or URL to pretrained model file (.pth extension)", 

116 required=True, 

117 cls=ResourceOption, 

118) 

119@click.option( 

120 "--steps", 

121 "-S", 

122 help="This number is used to define the number of threshold steps to " 

123 "consider when evaluating the highest possible F1-score on test data.", 

124 default=1000, 

125 show_default=True, 

126 required=True, 

127 cls=ResourceOption, 

128) 

129@verbosity_option(cls=ResourceOption) 

130@click.pass_context 

131def analyze( 

132 ctx, 

133 model, 

134 output_folder, 

135 batch_size, 

136 dataset, 

137 second_annotator, 

138 device, 

139 overlayed, 

140 weight, 

141 steps, 

142 verbose, 

143 **kwargs, 

144): 

145 """Runs a complete evaluation from prediction to comparison 

146 

147 This script is just a wrapper around the individual scripts for running 

148 prediction and evaluating FCN models. It organises the output in a 

149 preset way:: 

150 

151 \b 

152 └─ <output-folder>/ 

153 ├── predictions/ #the prediction outputs for the train/test set 

154 ├── overlayed/ #the overlayed outputs for the train/test set 

155 ├── predictions/ #predictions overlayed on the input images 

156 ├── analysis/ #predictions overlayed on the input images 

157 ├ #including analysis of false positives, negatives 

158 ├ #and true positives 

159 └── second-annotator/ #if set, store overlayed images for the 

160 #second annotator here 

161 └── analysis / #the outputs of the analysis of both train/test sets 

162 #includes second-annotator "mesures" as well, if 

163 # configured 

164 

165 N.B.: The tool is designed to prevent analysis bias and allows one to 

166 provide separate subsets for training and evaluation. Instead of using 

167 simple datasets, datasets for full experiment running should be 

168 dictionaries with specific subset names: 

169 

170 * ``__train__``: dataset used for training, prioritarily. It is typically 

171 the dataset containing data augmentation pipelines. 

172 * ``train`` (optional): a copy of the ``__train__`` dataset, without data 

173 augmentation, that will be evaluated alongside other sets available 

174 * ``*``: any other name, not starting with an underscore character (``_``), 

175 will be considered a test set for evaluation. 

176 

177 N.B.2: The threshold used for calculating the F1-score on the test set, or 

178 overlay analysis (false positives, negatives and true positives overprinted 

179 on the original image) also follows the logic above. 

180 """ 

181 

182 command_sh = os.path.join(output_folder, "command.sh") 

183 if not os.path.exists(command_sh): 

184 # only save if experiment has not saved yet something similar 

185 save_sh_command(command_sh) 

186 

187 # Prediction 

188 logger.info("Started prediction") 

189 

190 from .predict import predict 

191 

192 predictions_folder = os.path.join(output_folder, "predictions") 

193 overlayed_folder = ( 

194 os.path.join(output_folder, "overlayed", "predictions") 

195 if overlayed 

196 else None 

197 ) 

198 

199 ctx.invoke( 

200 predict, 

201 output_folder=predictions_folder, 

202 model=model, 

203 dataset=dataset, 

204 batch_size=batch_size, 

205 device=device, 

206 weight=weight, 

207 overlayed=overlayed_folder, 

208 verbose=verbose, 

209 ) 

210 logger.info("Ended prediction") 

211 

212 # Evaluation 

213 logger.info("Started evaluation") 

214 

215 from .evaluate import evaluate 

216 

217 overlayed_folder = ( 

218 os.path.join(output_folder, "overlayed", "analysis") 

219 if overlayed 

220 else None 

221 ) 

222 

223 # choosing the overlayed_threshold 

224 if "validation" in dataset: 

225 threshold = "validation" 

226 elif "train" in dataset: 

227 threshold = "train" 

228 else: 

229 threshold = 0.5 

230 logger.info(f"Setting --threshold={threshold}...") 

231 

232 analysis_folder = os.path.join(output_folder, "analysis") 

233 ctx.invoke( 

234 evaluate, 

235 output_folder=analysis_folder, 

236 predictions_folder=predictions_folder, 

237 dataset=dataset, 

238 second_annotator=second_annotator, 

239 overlayed=overlayed_folder, 

240 threshold=threshold, 

241 steps=steps, 

242 verbose=verbose, 

243 ) 

244 

245 logger.info("Ended evaluation") 

246 

247 # Comparison 

248 logger.info("Started comparison") 

249 

250 # compare performances on the various sets 

251 from .compare import compare 

252 

253 systems = [] 

254 for k, v in dataset.items(): 

255 if k.startswith("_"): 

256 logger.info(f"Skipping dataset '{k}' (not to be compared)") 

257 continue 

258 candidate = os.path.join(analysis_folder, f"{k}.csv") 

259 if not os.path.exists(candidate): 

260 logger.error( 

261 f"Skipping dataset '{k}' " 

262 f"(candidate CSV file `{candidate}` does not exist!)" 

263 ) 

264 continue 

265 systems += [k, os.path.join(analysis_folder, f"{k}.csv")] 

266 if second_annotator is not None: 

267 for k, v in second_annotator.items(): 

268 if k.startswith("_"): 

269 logger.info( 

270 f"Skipping second-annotator '{k}' " f"(not to be compared)" 

271 ) 

272 continue 

273 if k not in dataset: 

274 logger.info( 

275 f"Skipping second-annotator '{k}' " 

276 f"(no equivalent `dataset[{k}]`)" 

277 ) 

278 continue 

279 if not dataset[k].all_keys_match(v): 

280 logger.warning( 

281 f"Skipping second-annotator '{k}' " 

282 f"(keys do not match `dataset[{k}]`?)" 

283 ) 

284 continue 

285 candidate = os.path.join( 

286 analysis_folder, "second-annotator", f"{k}.csv" 

287 ) 

288 if not os.path.exists(candidate): 

289 logger.error( 

290 f"Skipping second-annotator '{k}' " 

291 f"(candidate CSV file `{candidate}` does not exist!)" 

292 ) 

293 continue 

294 systems += [f"{k} (2nd. annot.)", candidate] 

295 

296 output_figure = os.path.join(output_folder, "comparison.pdf") 

297 output_table = os.path.join(output_folder, "comparison.rst") 

298 

299 ctx.invoke( 

300 compare, 

301 label_path=systems, 

302 output_figure=output_figure, 

303 output_table=output_table, 

304 threshold=threshold, 

305 verbose=verbose, 

306 ) 

307 

308 logger.info("Ended comparison")