Coverage for src/deepdraw/script/analyze.py: 85%

74 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-11-30 15:00 +0100

1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> 

2# 

3# SPDX-License-Identifier: GPL-3.0-or-later 

4 

5import os 

6 

7import click 

8 

9from clapper.click import ConfigCommand, ResourceOption, verbosity_option 

10from clapper.logging import setup 

11 

12logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") 

13 

14from .common import save_sh_command 

15 

16 

17@click.command( 

18 entry_point_group="deepdraw.config", 

19 cls=ConfigCommand, 

20 epilog="""Examples: 

21 

22\b 

23 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel 

24 segmentation), on the CPU, by running inference and evaluation on results 

25 from its test set: 

26 

27 .. code:: sh 

28 

29 $ deepdraw analyze -vv m2unet drive --weight=model.path 

30""", 

31) 

32@click.option( 

33 "--output-folder", 

34 "-o", 

35 help="Path where to store experiment outputs (created if does not exist)", 

36 required=True, 

37 type=click.Path(), 

38 default="results", 

39 cls=ResourceOption, 

40) 

41@click.option( 

42 "--model", 

43 "-m", 

44 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated", 

45 required=True, 

46 cls=ResourceOption, 

47) 

48@click.option( 

49 "--dataset", 

50 "-d", 

51 help="A dictionary mapping string keys to " 

52 "deepdraw.data.utils.SampleList2TorchDataset's. At least one key " 

53 "named 'train' must be available. This dataset will be used for training " 

54 "the network model. All other datasets will be used for prediction and " 

55 "evaluation. Dataset descriptions include all required pre-processing, " 

56 "including eventual data augmentation, which may be eventually excluded " 

57 "for prediction and evaluation purposes", 

58 required=True, 

59 cls=ResourceOption, 

60) 

61@click.option( 

62 "--second-annotator", 

63 "-S", 

64 help="A dataset or dictionary, like in --dataset, with the same " 

65 "sample keys, but with annotations from a different annotator that is " 

66 "going to be compared to the one in --dataset", 

67 required=False, 

68 default=None, 

69 cls=ResourceOption, 

70 show_default=True, 

71) 

72@click.option( 

73 "--batch-size", 

74 "-b", 

75 help="Number of samples in every batch (this parameter affects " 

76 "memory requirements for the network). If the number of samples in " 

77 "the batch is larger than the total number of samples available for " 

78 "training, this value is truncated. If this number is smaller, then " 

79 "batches of the specified size are created and fed to the network " 

80 "until there are no more new samples to feed (epoch is finished). " 

81 "If the total number of training samples is not a multiple of the " 

82 "batch-size, the last batch will be smaller than the first.", 

83 required=True, 

84 show_default=True, 

85 default=1, 

86 type=click.IntRange(min=1), 

87 cls=ResourceOption, 

88) 

89@click.option( 

90 "--device", 

91 "-d", 

92 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")', 

93 show_default=True, 

94 required=True, 

95 default="cpu", 

96 cls=ResourceOption, 

97) 

98@click.option( 

99 "--overlayed/--no-overlayed", 

100 "-O", 

101 help="Creates overlayed representations of the output probability maps, " 

102 "similar to --overlayed in prediction-mode, except it includes " 

103 "distinctive colours for true and false positives and false negatives. " 

104 "If not set, or empty then do **NOT** output overlayed images.", 

105 show_default=True, 

106 default=False, 

107 required=False, 

108 cls=ResourceOption, 

109) 

110@click.option( 

111 "--weight", 

112 "-w", 

113 help="Path or URL to pretrained model file (.pth extension)", 

114 required=True, 

115 cls=ResourceOption, 

116) 

117@click.option( 

118 "--steps", 

119 "-S", 

120 help="This number is used to define the number of threshold steps to " 

121 "consider when evaluating the highest possible F1-score on test data.", 

122 default=1000, 

123 show_default=True, 

124 required=True, 

125 cls=ResourceOption, 

126) 

127@click.option( 

128 "--parallel", 

129 "-P", 

130 help="""Use multiprocessing for data processing: if set to -1 (default), 

131 disables multiprocessing. Set to 0 to enable as many data loading 

132 instances as processing cores as available in the system. Set to >= 1 to 

133 enable that many multiprocessing instances for data processing.""", 

134 type=click.IntRange(min=-1), 

135 show_default=True, 

136 required=True, 

137 default=-1, 

138 cls=ResourceOption, 

139) 

140@click.option( 

141 "--plot-limits", 

142 "-L", 

143 help="""If set, this option affects the performance comparison plots. It 

144 must be a 4-tuple containing the bounds of the plot for the x and y axis 

145 respectively (format: x_low, x_high, y_low, y_high]). If not set, use 

146 normal bounds ([0, 1, 0, 1]) for the performance curve.""", 

147 default=[0.0, 1.0, 0.0, 1.0], 

148 show_default=True, 

149 nargs=4, 

150 type=float, 

151 cls=ResourceOption, 

152) 

153@verbosity_option(logger=logger, cls=ResourceOption) 

154@click.pass_context 

155def analyze( 

156 ctx, 

157 model, 

158 output_folder, 

159 batch_size, 

160 dataset, 

161 second_annotator, 

162 device, 

163 overlayed, 

164 weight, 

165 steps, 

166 parallel, 

167 plot_limits, 

168 verbose, 

169 **kwargs, 

170): 

171 """Runs a complete evaluation from prediction to comparison. 

172 

173 This script is just a wrapper around the individual scripts for running 

174 prediction and evaluating FCN models. It organises the output in a 

175 preset way:: 

176 

177 \b 

178 └─ <output-folder>/ 

179 ├── predictions/ #the prediction outputs for the train/test set 

180 ├── overlayed/ #the overlayed outputs for the train/test set 

181 ├── predictions/ #predictions overlayed on the input images 

182 ├── analysis/ #predictions overlayed on the input images 

183 ├ #including analysis of false positives, negatives 

184 ├ #and true positives 

185 └── second-annotator/ #if set, store overlayed images for the 

186 #second annotator here 

187 └── analysis / #the outputs of the analysis of both train/test sets 

188 #includes second-annotator "mesures" as well, if 

189 # configured 

190 

191 N.B.: The tool is designed to prevent analysis bias and allows one to 

192 provide separate subsets for training and evaluation. Instead of using 

193 simple datasets, datasets for full experiment running should be 

194 dictionaries with specific subset names: 

195 

196 * ``__train__``: dataset used for training, prioritarily. It is typically 

197 the dataset containing data augmentation pipelines. 

198 * ``train`` (optional): a copy of the ``__train__`` dataset, without data 

199 augmentation, that will be evaluated alongside other sets available 

200 * ``*``: any other name, not starting with an underscore character (``_``), 

201 will be considered a test set for evaluation. 

202 

203 N.B.2: The threshold used for calculating the F1-score on the test set, or 

204 overlay analysis (false positives, negatives and true positives overprinted 

205 on the original image) also follows the logic above. 

206 """ 

207 

208 command_sh = os.path.join(output_folder, "command.sh") 

209 if not os.path.exists(command_sh): 

210 # only save if experiment has not saved yet something similar 

211 save_sh_command(command_sh) 

212 

213 # Prediction 

214 logger.info("Started prediction") 

215 

216 from .predict import predict 

217 

218 predictions_folder = os.path.join(output_folder, "predictions") 

219 overlayed_folder = ( 

220 os.path.join(output_folder, "overlayed", "predictions") 

221 if overlayed 

222 else None 

223 ) 

224 

225 ctx.invoke( 

226 predict, 

227 output_folder=predictions_folder, 

228 model=model, 

229 dataset=dataset, 

230 batch_size=batch_size, 

231 device=device, 

232 weight=weight, 

233 overlayed=overlayed_folder, 

234 parallel=parallel, 

235 verbose=verbose, 

236 ) 

237 logger.info("Ended prediction") 

238 

239 # Evaluation 

240 logger.info("Started evaluation") 

241 

242 from .evaluate import evaluate 

243 

244 overlayed_folder = ( 

245 os.path.join(output_folder, "overlayed", "analysis") 

246 if overlayed 

247 else None 

248 ) 

249 

250 # choosing the overlayed_threshold 

251 if "validation" in dataset: 

252 threshold = "validation" 

253 elif "train" in dataset: 

254 threshold = "train" 

255 else: 

256 threshold = 0.5 

257 logger.info(f"Setting --threshold={threshold}...") 

258 

259 analysis_folder = os.path.join(output_folder, "analysis") 

260 ctx.invoke( 

261 evaluate, 

262 output_folder=analysis_folder, 

263 predictions_folder=predictions_folder, 

264 dataset=dataset, 

265 second_annotator=second_annotator, 

266 overlayed=overlayed_folder, 

267 threshold=threshold, 

268 steps=steps, 

269 parallel=parallel, 

270 verbose=verbose, 

271 ) 

272 

273 logger.info("Ended evaluation") 

274 

275 # Comparison 

276 logger.info("Started comparison") 

277 

278 # compare performances on the various sets 

279 from .compare import compare 

280 

281 systems = [] 

282 for k, v in dataset.items(): 

283 if k.startswith("_"): 

284 logger.info(f"Skipping dataset '{k}' (not to be compared)") 

285 continue 

286 candidate = os.path.join(analysis_folder, f"{k}.csv") 

287 if not os.path.exists(candidate): 

288 logger.error( 

289 f"Skipping dataset '{k}' " 

290 f"(candidate CSV file `{candidate}` does not exist!)" 

291 ) 

292 continue 

293 systems += [k, os.path.join(analysis_folder, f"{k}.csv")] 

294 if second_annotator is not None: 

295 for k, v in second_annotator.items(): 

296 if k.startswith("_"): 

297 logger.info( 

298 f"Skipping second-annotator '{k}' " f"(not to be compared)" 

299 ) 

300 continue 

301 if k not in dataset: 

302 logger.info( 

303 f"Skipping second-annotator '{k}' " 

304 f"(no equivalent `dataset[{k}]`)" 

305 ) 

306 continue 

307 if not dataset[k].all_keys_match(v): 

308 logger.warning( 

309 f"Skipping second-annotator '{k}' " 

310 f"(keys do not match `dataset[{k}]`?)" 

311 ) 

312 continue 

313 candidate = os.path.join( 

314 analysis_folder, "second-annotator", f"{k}.csv" 

315 ) 

316 if not os.path.exists(candidate): 

317 logger.error( 

318 f"Skipping second-annotator '{k}' " 

319 f"(candidate CSV file `{candidate}` does not exist!)" 

320 ) 

321 continue 

322 systems += [f"{k} (2nd. annot.)", candidate] 

323 

324 output_figure = os.path.join(output_folder, "comparison.pdf") 

325 output_table = os.path.join(output_folder, "comparison.rst") 

326 

327 ctx.invoke( 

328 compare, 

329 label_path=systems, 

330 output_figure=output_figure, 

331 output_table=output_table, 

332 threshold=threshold, 

333 plot_limits=plot_limits, 

334 verbose=verbose, 

335 ) 

336 

337 logger.info("Ended comparison")