Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1635977648782/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.8/site-packages/bob/ip/binseg/script/analyze.py: 85%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

2# coding=utf-8

4import logging

5import os

7import click

9from bob.extension.scripts.click_helper import (

10 ConfigCommand,

11 ResourceOption,

12 verbosity_option,

13)

15from .binseg import save_sh_command

17logger = logging.getLogger(__name__)

20@click.command(

21 entry_point_group="bob.ip.binseg.config",

22 cls=ConfigCommand,

23 epilog="""Examples:

25\b

26 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel

27 segmentation), on the CPU, by running inference and evaluation on results

28 from its test set:

30 $ bob binseg analyze -vv m2unet drive --weight=model.path

32""",

33)

34@click.option(

35 "--output-folder",

36 "-o",

37 help="Path where to store experiment outputs (created if does not exist)",

38 required=True,

39 type=click.Path(),

40 default="results",

41 cls=ResourceOption,

42)

43@click.option(

44 "--model",

45 "-m",

46 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated",

47 required=True,

48 cls=ResourceOption,

49)

50@click.option(

51 "--dataset",

52 "-d",

53 help="A dictionary mapping string keys to "

54 "bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key "

55 "named 'train' must be available. This dataset will be used for training "

56 "the network model. All other datasets will be used for prediction and "

57 "evaluation. Dataset descriptions include all required pre-processing, "

58 "including eventual data augmentation, which may be eventually excluded "

59 "for prediction and evaluation purposes",

60 required=True,

61 cls=ResourceOption,

62)

63@click.option(

64 "--second-annotator",

65 "-S",

66 help="A dataset or dictionary, like in --dataset, with the same "

67 "sample keys, but with annotations from a different annotator that is "

68 "going to be compared to the one in --dataset",

69 required=False,

70 default=None,

71 cls=ResourceOption,

72 show_default=True,

73)

74@click.option(

75 "--batch-size",

76 "-b",

77 help="Number of samples in every batch (this parameter affects "

78 "memory requirements for the network). If the number of samples in "

79 "the batch is larger than the total number of samples available for "

80 "training, this value is truncated. If this number is smaller, then "

81 "batches of the specified size are created and fed to the network "

82 "until there are no more new samples to feed (epoch is finished). "

83 "If the total number of training samples is not a multiple of the "

84 "batch-size, the last batch will be smaller than the first.",

85 required=True,

86 show_default=True,

87 default=1,

88 type=click.IntRange(min=1),

89 cls=ResourceOption,

90)

91@click.option(

92 "--device",

93 "-d",

94 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")',

95 show_default=True,

96 required=True,

97 default="cpu",

98 cls=ResourceOption,

99)

100@click.option(

101 "--overlayed/--no-overlayed",

102 "-O",

103 help="Creates overlayed representations of the output probability maps, "

104 "similar to --overlayed in prediction-mode, except it includes "

105 "distinctive colours for true and false positives and false negatives. "

106 "If not set, or empty then do **NOT** output overlayed images.",

107 show_default=True,

108 default=False,

109 required=False,

110 cls=ResourceOption,

111)

112@click.option(

113 "--weight",

114 "-w",

115 help="Path or URL to pretrained model file (.pth extension)",

116 required=True,

117 cls=ResourceOption,

118)

119@click.option(

120 "--steps",

121 "-S",

122 help="This number is used to define the number of threshold steps to "

123 "consider when evaluating the highest possible F1-score on test data.",

124 default=1000,

125 show_default=True,

126 required=True,

127 cls=ResourceOption,

128)

129@verbosity_option(cls=ResourceOption)

130@click.pass_context

131def analyze(

132 ctx,

133 model,

134 output_folder,

135 batch_size,

136 dataset,

137 second_annotator,

138 device,

139 overlayed,

140 weight,

141 steps,

142 verbose,

143 **kwargs,

144):

145 """Runs a complete evaluation from prediction to comparison

146

147 This script is just a wrapper around the individual scripts for running

148 prediction and evaluating FCN models. It organises the output in a

149 preset way::

150

151 \b

152 └─ <output-folder>/

153 ├── predictions/ #the prediction outputs for the train/test set

154 ├── overlayed/ #the overlayed outputs for the train/test set

155 ├── predictions/ #predictions overlayed on the input images

156 ├── analysis/ #predictions overlayed on the input images

157 ├ #including analysis of false positives, negatives

158 ├ #and true positives

159 └── second-annotator/ #if set, store overlayed images for the

160 #second annotator here

161 └── analysis / #the outputs of the analysis of both train/test sets

162 #includes second-annotator "mesures" as well, if

163 # configured

164

165 N.B.: The tool is designed to prevent analysis bias and allows one to

166 provide separate subsets for training and evaluation. Instead of using

167 simple datasets, datasets for full experiment running should be

168 dictionaries with specific subset names:

169

170 * ``__train__``: dataset used for training, prioritarily. It is typically

171 the dataset containing data augmentation pipelines.

172 * ``train`` (optional): a copy of the ``__train__`` dataset, without data

173 augmentation, that will be evaluated alongside other sets available

174 * ``*``: any other name, not starting with an underscore character (``_``),

175 will be considered a test set for evaluation.

176

177 N.B.2: The threshold used for calculating the F1-score on the test set, or

178 overlay analysis (false positives, negatives and true positives overprinted

179 on the original image) also follows the logic above.

180 """

181

182 command_sh = os.path.join(output_folder, "command.sh")

183 if not os.path.exists(command_sh):

184 # only save if experiment has not saved yet something similar

185 save_sh_command(command_sh)

186

187 # Prediction

188 logger.info("Started prediction")

189

190 from .predict import predict

191

192 predictions_folder = os.path.join(output_folder, "predictions")

193 overlayed_folder = (

194 os.path.join(output_folder, "overlayed", "predictions")

195 if overlayed

196 else None

197 )

198

199 ctx.invoke(

200 predict,

201 output_folder=predictions_folder,

202 model=model,

203 dataset=dataset,

204 batch_size=batch_size,

205 device=device,

206 weight=weight,

207 overlayed=overlayed_folder,

208 verbose=verbose,

209 )

210 logger.info("Ended prediction")

211

212 # Evaluation

213 logger.info("Started evaluation")

214

215 from .evaluate import evaluate

216

217 overlayed_folder = (

218 os.path.join(output_folder, "overlayed", "analysis")

219 if overlayed

220 else None

221 )

222

223 # choosing the overlayed_threshold

224 if "validation" in dataset:

225 threshold = "validation"

226 elif "train" in dataset:

227 threshold = "train"

228 else:

229 threshold = 0.5

230 logger.info(f"Setting --threshold={threshold}...")

231

232 analysis_folder = os.path.join(output_folder, "analysis")

233 ctx.invoke(

234 evaluate,

235 output_folder=analysis_folder,

236 predictions_folder=predictions_folder,

237 dataset=dataset,

238 second_annotator=second_annotator,

239 overlayed=overlayed_folder,

240 threshold=threshold,

241 steps=steps,

242 verbose=verbose,

243 )

244

245 logger.info("Ended evaluation")

246

247 # Comparison

248 logger.info("Started comparison")

249

250 # compare performances on the various sets

251 from .compare import compare

252

253 systems = []

254 for k, v in dataset.items():

255 if k.startswith("_"):

256 logger.info(f"Skipping dataset '{k}' (not to be compared)")

257 continue

258 candidate = os.path.join(analysis_folder, f"{k}.csv")

259 if not os.path.exists(candidate):

260 logger.error(

261 f"Skipping dataset '{k}' "

262 f"(candidate CSV file `{candidate}` does not exist!)"

263 )

264 continue

265 systems += [k, os.path.join(analysis_folder, f"{k}.csv")]

266 if second_annotator is not None:

267 for k, v in second_annotator.items():

268 if k.startswith("_"):

269 logger.info(

270 f"Skipping second-annotator '{k}' " f"(not to be compared)"

271 )

272 continue

273 if k not in dataset:

274 logger.info(

275 f"Skipping second-annotator '{k}' "

276 f"(no equivalent `dataset[{k}]`)"

277 )

278 continue

279 if not dataset[k].all_keys_match(v):

280 logger.warning(

281 f"Skipping second-annotator '{k}' "

282 f"(keys do not match `dataset[{k}]`?)"

283 )

284 continue

285 candidate = os.path.join(

286 analysis_folder, "second-annotator", f"{k}.csv"

287 )

288 if not os.path.exists(candidate):

289 logger.error(

290 f"Skipping second-annotator '{k}' "

291 f"(candidate CSV file `{candidate}` does not exist!)"

292 )

293 continue

294 systems += [f"{k} (2nd. annot.)", candidate]

295

296 output_figure = os.path.join(output_folder, "comparison.pdf")

297 output_table = os.path.join(output_folder, "comparison.rst")

298

299 ctx.invoke(

300 compare,

301 label_path=systems,

302 output_figure=output_figure,

303 output_table=output_table,

304 threshold=threshold,

305 verbose=verbose,

306 )

307

308 logger.info("Ended comparison")

72 statements 61 run 11 missing 0 excluded

72 statements