Coverage for src/deepdraw/script/analyze.py: 85%

3# SPDX-License-Identifier: GPL-3.0-or-later

5import os

7import click

9from clapper.click import ConfigCommand, ResourceOption, verbosity_option

10from clapper.logging import setup

12logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")

14from .common import save_sh_command

17@click.command(

18 entry_point_group="deepdraw.config",

19 cls=ConfigCommand,

20 epilog="""Examples:

22\b

23 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel

24 segmentation), on the CPU, by running inference and evaluation on results

25 from its test set:

27 .. code:: sh

29 $ deepdraw analyze -vv m2unet drive --weight=model.path

30""",

31)

32@click.option(

33 "--output-folder",

34 "-o",

35 help="Path where to store experiment outputs (created if does not exist)",

36 required=True,

37 type=click.Path(),

38 default="results",

39 cls=ResourceOption,

40)

41@click.option(

42 "--model",

43 "-m",

44 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated",

45 required=True,

46 cls=ResourceOption,

47)

48@click.option(

49 "--dataset",

50 "-d",

51 help="A dictionary mapping string keys to "

52 "deepdraw.data.utils.SampleList2TorchDataset's. At least one key "

53 "named 'train' must be available. This dataset will be used for training "

54 "the network model. All other datasets will be used for prediction and "

55 "evaluation. Dataset descriptions include all required pre-processing, "

56 "including eventual data augmentation, which may be eventually excluded "

57 "for prediction and evaluation purposes",

58 required=True,

59 cls=ResourceOption,

60)

61@click.option(

62 "--second-annotator",

63 "-S",

64 help="A dataset or dictionary, like in --dataset, with the same "

65 "sample keys, but with annotations from a different annotator that is "

66 "going to be compared to the one in --dataset",

67 required=False,

68 default=None,

69 cls=ResourceOption,

70 show_default=True,

71)

72@click.option(

73 "--batch-size",

74 "-b",

75 help="Number of samples in every batch (this parameter affects "

76 "memory requirements for the network). If the number of samples in "

77 "the batch is larger than the total number of samples available for "

78 "training, this value is truncated. If this number is smaller, then "

79 "batches of the specified size are created and fed to the network "

80 "until there are no more new samples to feed (epoch is finished). "

81 "If the total number of training samples is not a multiple of the "

82 "batch-size, the last batch will be smaller than the first.",

83 required=True,

84 show_default=True,

85 default=1,

86 type=click.IntRange(min=1),

87 cls=ResourceOption,

88)

89@click.option(

90 "--device",

91 "-d",

92 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")',

93 show_default=True,

94 required=True,

95 default="cpu",

96 cls=ResourceOption,

97)

98@click.option(

99 "--overlayed/--no-overlayed",

100 "-O",

101 help="Creates overlayed representations of the output probability maps, "

102 "similar to --overlayed in prediction-mode, except it includes "

103 "distinctive colours for true and false positives and false negatives. "

104 "If not set, or empty then do **NOT** output overlayed images.",

105 show_default=True,

106 default=False,

107 required=False,

108 cls=ResourceOption,

109)

110@click.option(

111 "--weight",

112 "-w",

113 help="Path or URL to pretrained model file (.pth extension)",

114 required=True,

115 cls=ResourceOption,

116)

117@click.option(

118 "--steps",

119 "-S",

120 help="This number is used to define the number of threshold steps to "

121 "consider when evaluating the highest possible F1-score on test data.",

122 default=1000,

123 show_default=True,

124 required=True,

125 cls=ResourceOption,

126)

127@click.option(

128 "--parallel",

129 "-P",

130 help="""Use multiprocessing for data processing: if set to -1 (default),

131 disables multiprocessing. Set to 0 to enable as many data loading

132 instances as processing cores as available in the system. Set to >= 1 to

133 enable that many multiprocessing instances for data processing.""",

134 type=click.IntRange(min=-1),

135 show_default=True,

136 required=True,

137 default=-1,

138 cls=ResourceOption,

139)

140@click.option(

141 "--plot-limits",

142 "-L",

143 help="""If set, this option affects the performance comparison plots. It

144 must be a 4-tuple containing the bounds of the plot for the x and y axis

145 respectively (format: x_low, x_high, y_low, y_high]). If not set, use

146 normal bounds ([0, 1, 0, 1]) for the performance curve.""",

147 default=[0.0, 1.0, 0.0, 1.0],

148 show_default=True,

149 nargs=4,

150 type=float,

151 cls=ResourceOption,

152)

153@verbosity_option(logger=logger, cls=ResourceOption)

154@click.pass_context

155def analyze(

156 ctx,

157 model,

158 output_folder,

159 batch_size,

160 dataset,

161 second_annotator,

162 device,

163 overlayed,

164 weight,

165 steps,

166 parallel,

167 plot_limits,

168 verbose,

169 **kwargs,

170):

171 """Runs a complete evaluation from prediction to comparison.

172

173 This script is just a wrapper around the individual scripts for running

174 prediction and evaluating FCN models. It organises the output in a

175 preset way::

176

177 \b

178 └─ <output-folder>/

179 ├── predictions/ #the prediction outputs for the train/test set

180 ├── overlayed/ #the overlayed outputs for the train/test set

181 ├── predictions/ #predictions overlayed on the input images

182 ├── analysis/ #predictions overlayed on the input images

183 ├ #including analysis of false positives, negatives

184 ├ #and true positives

185 └── second-annotator/ #if set, store overlayed images for the

186 #second annotator here

187 └── analysis / #the outputs of the analysis of both train/test sets

188 #includes second-annotator "mesures" as well, if

189 # configured

190

191 N.B.: The tool is designed to prevent analysis bias and allows one to

192 provide separate subsets for training and evaluation. Instead of using

193 simple datasets, datasets for full experiment running should be

194 dictionaries with specific subset names:

195

196 * ``__train__``: dataset used for training, prioritarily. It is typically

197 the dataset containing data augmentation pipelines.

198 * ``train`` (optional): a copy of the ``__train__`` dataset, without data

199 augmentation, that will be evaluated alongside other sets available

200 * ``*``: any other name, not starting with an underscore character (``_``),

201 will be considered a test set for evaluation.

202

203 N.B.2: The threshold used for calculating the F1-score on the test set, or

204 overlay analysis (false positives, negatives and true positives overprinted

205 on the original image) also follows the logic above.

206 """

207

208 command_sh = os.path.join(output_folder, "command.sh")

209 if not os.path.exists(command_sh):

210 # only save if experiment has not saved yet something similar

211 save_sh_command(command_sh)

212

213 # Prediction

214 logger.info("Started prediction")

215

216 from .predict import predict

217

218 predictions_folder = os.path.join(output_folder, "predictions")

219 overlayed_folder = (

220 os.path.join(output_folder, "overlayed", "predictions")

221 if overlayed

222 else None

223 )

224

225 ctx.invoke(

226 predict,

227 output_folder=predictions_folder,

228 model=model,

229 dataset=dataset,

230 batch_size=batch_size,

231 device=device,

232 weight=weight,

233 overlayed=overlayed_folder,

234 parallel=parallel,

235 verbose=verbose,

236 )

237 logger.info("Ended prediction")

238

239 # Evaluation

240 logger.info("Started evaluation")

241

242 from .evaluate import evaluate

243

244 overlayed_folder = (

245 os.path.join(output_folder, "overlayed", "analysis")

246 if overlayed

247 else None

248 )

249

250 # choosing the overlayed_threshold

251 if "validation" in dataset:

252 threshold = "validation"

253 elif "train" in dataset:

254 threshold = "train"

255 else:

256 threshold = 0.5

257 logger.info(f"Setting --threshold={threshold}...")

258

259 analysis_folder = os.path.join(output_folder, "analysis")

260 ctx.invoke(

261 evaluate,

262 output_folder=analysis_folder,

263 predictions_folder=predictions_folder,

264 dataset=dataset,

265 second_annotator=second_annotator,

266 overlayed=overlayed_folder,

267 threshold=threshold,

268 steps=steps,

269 parallel=parallel,

270 verbose=verbose,

271 )

272

273 logger.info("Ended evaluation")

274

275 # Comparison

276 logger.info("Started comparison")

277

278 # compare performances on the various sets

279 from .compare import compare

280

281 systems = []

282 for k, v in dataset.items():

283 if k.startswith("_"):

284 logger.info(f"Skipping dataset '{k}' (not to be compared)")

285 continue

286 candidate = os.path.join(analysis_folder, f"{k}.csv")

287 if not os.path.exists(candidate):

288 logger.error(

289 f"Skipping dataset '{k}' "

290 f"(candidate CSV file `{candidate}` does not exist!)"

291 )

292 continue

293 systems += [k, os.path.join(analysis_folder, f"{k}.csv")]

294 if second_annotator is not None:

295 for k, v in second_annotator.items():

296 if k.startswith("_"):

297 logger.info(

298 f"Skipping second-annotator '{k}' " f"(not to be compared)"

299 )

300 continue

301 if k not in dataset:

302 logger.info(

303 f"Skipping second-annotator '{k}' "

304 f"(no equivalent `dataset[{k}]`)"

305 )

306 continue

307 if not dataset[k].all_keys_match(v):

308 logger.warning(

309 f"Skipping second-annotator '{k}' "

310 f"(keys do not match `dataset[{k}]`?)"

311 )

312 continue

313 candidate = os.path.join(

314 analysis_folder, "second-annotator", f"{k}.csv"

315 )

316 if not os.path.exists(candidate):

317 logger.error(

318 f"Skipping second-annotator '{k}' "

319 f"(candidate CSV file `{candidate}` does not exist!)"

320 )

321 continue

322 systems += [f"{k} (2nd. annot.)", candidate]

323

324 output_figure = os.path.join(output_folder, "comparison.pdf")

325 output_table = os.path.join(output_folder, "comparison.rst")

326

327 ctx.invoke(

328 compare,

329 label_path=systems,

330 output_figure=output_figure,

331 output_table=output_table,

332 threshold=threshold,

333 plot_limits=plot_limits,

334 verbose=verbose,

335 )

336

337 logger.info("Ended comparison")