Coverage for src/deepdraw/script/analyze.py: 85%
74 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
2#
3# SPDX-License-Identifier: GPL-3.0-or-later
5import os
7import click
9from clapper.click import ConfigCommand, ResourceOption, verbosity_option
10from clapper.logging import setup
12logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
14from .common import save_sh_command
17@click.command(
18 entry_point_group="deepdraw.config",
19 cls=ConfigCommand,
20 epilog="""Examples:
22\b
23 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel
24 segmentation), on the CPU, by running inference and evaluation on results
25 from its test set:
27 .. code:: sh
29 $ deepdraw analyze -vv m2unet drive --weight=model.path
30""",
31)
32@click.option(
33 "--output-folder",
34 "-o",
35 help="Path where to store experiment outputs (created if does not exist)",
36 required=True,
37 type=click.Path(),
38 default="results",
39 cls=ResourceOption,
40)
41@click.option(
42 "--model",
43 "-m",
44 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated",
45 required=True,
46 cls=ResourceOption,
47)
48@click.option(
49 "--dataset",
50 "-d",
51 help="A dictionary mapping string keys to "
52 "deepdraw.data.utils.SampleList2TorchDataset's. At least one key "
53 "named 'train' must be available. This dataset will be used for training "
54 "the network model. All other datasets will be used for prediction and "
55 "evaluation. Dataset descriptions include all required pre-processing, "
56 "including eventual data augmentation, which may be eventually excluded "
57 "for prediction and evaluation purposes",
58 required=True,
59 cls=ResourceOption,
60)
61@click.option(
62 "--second-annotator",
63 "-S",
64 help="A dataset or dictionary, like in --dataset, with the same "
65 "sample keys, but with annotations from a different annotator that is "
66 "going to be compared to the one in --dataset",
67 required=False,
68 default=None,
69 cls=ResourceOption,
70 show_default=True,
71)
72@click.option(
73 "--batch-size",
74 "-b",
75 help="Number of samples in every batch (this parameter affects "
76 "memory requirements for the network). If the number of samples in "
77 "the batch is larger than the total number of samples available for "
78 "training, this value is truncated. If this number is smaller, then "
79 "batches of the specified size are created and fed to the network "
80 "until there are no more new samples to feed (epoch is finished). "
81 "If the total number of training samples is not a multiple of the "
82 "batch-size, the last batch will be smaller than the first.",
83 required=True,
84 show_default=True,
85 default=1,
86 type=click.IntRange(min=1),
87 cls=ResourceOption,
88)
89@click.option(
90 "--device",
91 "-d",
92 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")',
93 show_default=True,
94 required=True,
95 default="cpu",
96 cls=ResourceOption,
97)
98@click.option(
99 "--overlayed/--no-overlayed",
100 "-O",
101 help="Creates overlayed representations of the output probability maps, "
102 "similar to --overlayed in prediction-mode, except it includes "
103 "distinctive colours for true and false positives and false negatives. "
104 "If not set, or empty then do **NOT** output overlayed images.",
105 show_default=True,
106 default=False,
107 required=False,
108 cls=ResourceOption,
109)
110@click.option(
111 "--weight",
112 "-w",
113 help="Path or URL to pretrained model file (.pth extension)",
114 required=True,
115 cls=ResourceOption,
116)
117@click.option(
118 "--steps",
119 "-S",
120 help="This number is used to define the number of threshold steps to "
121 "consider when evaluating the highest possible F1-score on test data.",
122 default=1000,
123 show_default=True,
124 required=True,
125 cls=ResourceOption,
126)
127@click.option(
128 "--parallel",
129 "-P",
130 help="""Use multiprocessing for data processing: if set to -1 (default),
131 disables multiprocessing. Set to 0 to enable as many data loading
132 instances as processing cores as available in the system. Set to >= 1 to
133 enable that many multiprocessing instances for data processing.""",
134 type=click.IntRange(min=-1),
135 show_default=True,
136 required=True,
137 default=-1,
138 cls=ResourceOption,
139)
140@click.option(
141 "--plot-limits",
142 "-L",
143 help="""If set, this option affects the performance comparison plots. It
144 must be a 4-tuple containing the bounds of the plot for the x and y axis
145 respectively (format: x_low, x_high, y_low, y_high]). If not set, use
146 normal bounds ([0, 1, 0, 1]) for the performance curve.""",
147 default=[0.0, 1.0, 0.0, 1.0],
148 show_default=True,
149 nargs=4,
150 type=float,
151 cls=ResourceOption,
152)
153@verbosity_option(logger=logger, cls=ResourceOption)
154@click.pass_context
155def analyze(
156 ctx,
157 model,
158 output_folder,
159 batch_size,
160 dataset,
161 second_annotator,
162 device,
163 overlayed,
164 weight,
165 steps,
166 parallel,
167 plot_limits,
168 verbose,
169 **kwargs,
170):
171 """Runs a complete evaluation from prediction to comparison.
173 This script is just a wrapper around the individual scripts for running
174 prediction and evaluating FCN models. It organises the output in a
175 preset way::
177 \b
178 └─ <output-folder>/
179 ├── predictions/ #the prediction outputs for the train/test set
180 ├── overlayed/ #the overlayed outputs for the train/test set
181 ├── predictions/ #predictions overlayed on the input images
182 ├── analysis/ #predictions overlayed on the input images
183 ├ #including analysis of false positives, negatives
184 ├ #and true positives
185 └── second-annotator/ #if set, store overlayed images for the
186 #second annotator here
187 └── analysis / #the outputs of the analysis of both train/test sets
188 #includes second-annotator "mesures" as well, if
189 # configured
191 N.B.: The tool is designed to prevent analysis bias and allows one to
192 provide separate subsets for training and evaluation. Instead of using
193 simple datasets, datasets for full experiment running should be
194 dictionaries with specific subset names:
196 * ``__train__``: dataset used for training, prioritarily. It is typically
197 the dataset containing data augmentation pipelines.
198 * ``train`` (optional): a copy of the ``__train__`` dataset, without data
199 augmentation, that will be evaluated alongside other sets available
200 * ``*``: any other name, not starting with an underscore character (``_``),
201 will be considered a test set for evaluation.
203 N.B.2: The threshold used for calculating the F1-score on the test set, or
204 overlay analysis (false positives, negatives and true positives overprinted
205 on the original image) also follows the logic above.
206 """
208 command_sh = os.path.join(output_folder, "command.sh")
209 if not os.path.exists(command_sh):
210 # only save if experiment has not saved yet something similar
211 save_sh_command(command_sh)
213 # Prediction
214 logger.info("Started prediction")
216 from .predict import predict
218 predictions_folder = os.path.join(output_folder, "predictions")
219 overlayed_folder = (
220 os.path.join(output_folder, "overlayed", "predictions")
221 if overlayed
222 else None
223 )
225 ctx.invoke(
226 predict,
227 output_folder=predictions_folder,
228 model=model,
229 dataset=dataset,
230 batch_size=batch_size,
231 device=device,
232 weight=weight,
233 overlayed=overlayed_folder,
234 parallel=parallel,
235 verbose=verbose,
236 )
237 logger.info("Ended prediction")
239 # Evaluation
240 logger.info("Started evaluation")
242 from .evaluate import evaluate
244 overlayed_folder = (
245 os.path.join(output_folder, "overlayed", "analysis")
246 if overlayed
247 else None
248 )
250 # choosing the overlayed_threshold
251 if "validation" in dataset:
252 threshold = "validation"
253 elif "train" in dataset:
254 threshold = "train"
255 else:
256 threshold = 0.5
257 logger.info(f"Setting --threshold={threshold}...")
259 analysis_folder = os.path.join(output_folder, "analysis")
260 ctx.invoke(
261 evaluate,
262 output_folder=analysis_folder,
263 predictions_folder=predictions_folder,
264 dataset=dataset,
265 second_annotator=second_annotator,
266 overlayed=overlayed_folder,
267 threshold=threshold,
268 steps=steps,
269 parallel=parallel,
270 verbose=verbose,
271 )
273 logger.info("Ended evaluation")
275 # Comparison
276 logger.info("Started comparison")
278 # compare performances on the various sets
279 from .compare import compare
281 systems = []
282 for k, v in dataset.items():
283 if k.startswith("_"):
284 logger.info(f"Skipping dataset '{k}' (not to be compared)")
285 continue
286 candidate = os.path.join(analysis_folder, f"{k}.csv")
287 if not os.path.exists(candidate):
288 logger.error(
289 f"Skipping dataset '{k}' "
290 f"(candidate CSV file `{candidate}` does not exist!)"
291 )
292 continue
293 systems += [k, os.path.join(analysis_folder, f"{k}.csv")]
294 if second_annotator is not None:
295 for k, v in second_annotator.items():
296 if k.startswith("_"):
297 logger.info(
298 f"Skipping second-annotator '{k}' " f"(not to be compared)"
299 )
300 continue
301 if k not in dataset:
302 logger.info(
303 f"Skipping second-annotator '{k}' "
304 f"(no equivalent `dataset[{k}]`)"
305 )
306 continue
307 if not dataset[k].all_keys_match(v):
308 logger.warning(
309 f"Skipping second-annotator '{k}' "
310 f"(keys do not match `dataset[{k}]`?)"
311 )
312 continue
313 candidate = os.path.join(
314 analysis_folder, "second-annotator", f"{k}.csv"
315 )
316 if not os.path.exists(candidate):
317 logger.error(
318 f"Skipping second-annotator '{k}' "
319 f"(candidate CSV file `{candidate}` does not exist!)"
320 )
321 continue
322 systems += [f"{k} (2nd. annot.)", candidate]
324 output_figure = os.path.join(output_folder, "comparison.pdf")
325 output_table = os.path.join(output_folder, "comparison.rst")
327 ctx.invoke(
328 compare,
329 label_path=systems,
330 output_figure=output_figure,
331 output_table=output_table,
332 threshold=threshold,
333 plot_limits=plot_limits,
334 verbose=verbose,
335 )
337 logger.info("Ended comparison")