Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# coding=utf-8
4import logging
5import os
7import click
9from bob.extension.scripts.click_helper import (
10 ConfigCommand,
11 ResourceOption,
12 verbosity_option,
13)
15from .binseg import save_sh_command
17logger = logging.getLogger(__name__)
20@click.command(
21 entry_point_group="bob.ip.binseg.config",
22 cls=ConfigCommand,
23 epilog="""Examples:
25\b
26 1. Re-evaluates a pre-trained M2U-Net model with DRIVE (vessel
27 segmentation), on the CPU, by running inference and evaluation on results
28 from its test set:
30 $ bob binseg analyze -vv m2unet drive --weight=model.path
32""",
33)
34@click.option(
35 "--output-folder",
36 "-o",
37 help="Path where to store experiment outputs (created if does not exist)",
38 required=True,
39 type=click.Path(),
40 default="results",
41 cls=ResourceOption,
42)
43@click.option(
44 "--model",
45 "-m",
46 help="A torch.nn.Module instance implementing the network to be trained, and then evaluated",
47 required=True,
48 cls=ResourceOption,
49)
50@click.option(
51 "--dataset",
52 "-d",
53 help="A dictionary mapping string keys to "
54 "bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key "
55 "named 'train' must be available. This dataset will be used for training "
56 "the network model. All other datasets will be used for prediction and "
57 "evaluation. Dataset descriptions include all required pre-processing, "
58 "including eventual data augmentation, which may be eventually excluded "
59 "for prediction and evaluation purposes",
60 required=True,
61 cls=ResourceOption,
62)
63@click.option(
64 "--second-annotator",
65 "-S",
66 help="A dataset or dictionary, like in --dataset, with the same "
67 "sample keys, but with annotations from a different annotator that is "
68 "going to be compared to the one in --dataset",
69 required=False,
70 default=None,
71 cls=ResourceOption,
72 show_default=True,
73)
74@click.option(
75 "--batch-size",
76 "-b",
77 help="Number of samples in every batch (this parameter affects "
78 "memory requirements for the network). If the number of samples in "
79 "the batch is larger than the total number of samples available for "
80 "training, this value is truncated. If this number is smaller, then "
81 "batches of the specified size are created and fed to the network "
82 "until there are no more new samples to feed (epoch is finished). "
83 "If the total number of training samples is not a multiple of the "
84 "batch-size, the last batch will be smaller than the first.",
85 required=True,
86 show_default=True,
87 default=1,
88 type=click.IntRange(min=1),
89 cls=ResourceOption,
90)
91@click.option(
92 "--device",
93 "-d",
94 help='A string indicating the device to use (e.g. "cpu" or "cuda:0")',
95 show_default=True,
96 required=True,
97 default="cpu",
98 cls=ResourceOption,
99)
100@click.option(
101 "--overlayed/--no-overlayed",
102 "-O",
103 help="Creates overlayed representations of the output probability maps, "
104 "similar to --overlayed in prediction-mode, except it includes "
105 "distinctive colours for true and false positives and false negatives. "
106 "If not set, or empty then do **NOT** output overlayed images.",
107 show_default=True,
108 default=False,
109 required=False,
110 cls=ResourceOption,
111)
112@click.option(
113 "--weight",
114 "-w",
115 help="Path or URL to pretrained model file (.pth extension)",
116 required=True,
117 cls=ResourceOption,
118)
119@click.option(
120 "--steps",
121 "-S",
122 help="This number is used to define the number of threshold steps to "
123 "consider when evaluating the highest possible F1-score on test data.",
124 default=1000,
125 show_default=True,
126 required=True,
127 cls=ResourceOption,
128)
129@verbosity_option(cls=ResourceOption)
130@click.pass_context
131def analyze(
132 ctx,
133 model,
134 output_folder,
135 batch_size,
136 dataset,
137 second_annotator,
138 device,
139 overlayed,
140 weight,
141 steps,
142 verbose,
143 **kwargs,
144):
145 """Runs a complete evaluation from prediction to comparison
147 This script is just a wrapper around the individual scripts for running
148 prediction and evaluating FCN models. It organises the output in a
149 preset way::
151 \b
152 └─ <output-folder>/
153 ├── predictions/ #the prediction outputs for the train/test set
154 ├── overlayed/ #the overlayed outputs for the train/test set
155 ├── predictions/ #predictions overlayed on the input images
156 ├── analysis/ #predictions overlayed on the input images
157 ├ #including analysis of false positives, negatives
158 ├ #and true positives
159 └── second-annotator/ #if set, store overlayed images for the
160 #second annotator here
161 └── analysis / #the outputs of the analysis of both train/test sets
162 #includes second-annotator "mesures" as well, if
163 # configured
165 N.B.: The tool is designed to prevent analysis bias and allows one to
166 provide separate subsets for training and evaluation. Instead of using
167 simple datasets, datasets for full experiment running should be
168 dictionaries with specific subset names:
170 * ``__train__``: dataset used for training, prioritarily. It is typically
171 the dataset containing data augmentation pipelines.
172 * ``train`` (optional): a copy of the ``__train__`` dataset, without data
173 augmentation, that will be evaluated alongside other sets available
174 * ``*``: any other name, not starting with an underscore character (``_``),
175 will be considered a test set for evaluation.
177 N.B.2: The threshold used for calculating the F1-score on the test set, or
178 overlay analysis (false positives, negatives and true positives overprinted
179 on the original image) also follows the logic above.
180 """
182 command_sh = os.path.join(output_folder, "command.sh")
183 if not os.path.exists(command_sh):
184 # only save if experiment has not saved yet something similar
185 save_sh_command(command_sh)
187 # Prediction
188 logger.info("Started prediction")
190 from .predict import predict
192 predictions_folder = os.path.join(output_folder, "predictions")
193 overlayed_folder = (
194 os.path.join(output_folder, "overlayed", "predictions")
195 if overlayed
196 else None
197 )
199 ctx.invoke(
200 predict,
201 output_folder=predictions_folder,
202 model=model,
203 dataset=dataset,
204 batch_size=batch_size,
205 device=device,
206 weight=weight,
207 overlayed=overlayed_folder,
208 verbose=verbose,
209 )
210 logger.info("Ended prediction")
212 # Evaluation
213 logger.info("Started evaluation")
215 from .evaluate import evaluate
217 overlayed_folder = (
218 os.path.join(output_folder, "overlayed", "analysis")
219 if overlayed
220 else None
221 )
223 # choosing the overlayed_threshold
224 if "validation" in dataset:
225 threshold = "validation"
226 elif "train" in dataset:
227 threshold = "train"
228 else:
229 threshold = 0.5
230 logger.info(f"Setting --threshold={threshold}...")
232 analysis_folder = os.path.join(output_folder, "analysis")
233 ctx.invoke(
234 evaluate,
235 output_folder=analysis_folder,
236 predictions_folder=predictions_folder,
237 dataset=dataset,
238 second_annotator=second_annotator,
239 overlayed=overlayed_folder,
240 threshold=threshold,
241 steps=steps,
242 verbose=verbose,
243 )
245 logger.info("Ended evaluation")
247 # Comparison
248 logger.info("Started comparison")
250 # compare performances on the various sets
251 from .compare import compare
253 systems = []
254 for k, v in dataset.items():
255 if k.startswith("_"):
256 logger.info(f"Skipping dataset '{k}' (not to be compared)")
257 continue
258 candidate = os.path.join(analysis_folder, f"{k}.csv")
259 if not os.path.exists(candidate):
260 logger.error(
261 f"Skipping dataset '{k}' "
262 f"(candidate CSV file `{candidate}` does not exist!)"
263 )
264 continue
265 systems += [k, os.path.join(analysis_folder, f"{k}.csv")]
266 if second_annotator is not None:
267 for k, v in second_annotator.items():
268 if k.startswith("_"):
269 logger.info(
270 f"Skipping second-annotator '{k}' " f"(not to be compared)"
271 )
272 continue
273 if k not in dataset:
274 logger.info(
275 f"Skipping second-annotator '{k}' "
276 f"(no equivalent `dataset[{k}]`)"
277 )
278 continue
279 if not dataset[k].all_keys_match(v):
280 logger.warning(
281 f"Skipping second-annotator '{k}' "
282 f"(keys do not match `dataset[{k}]`?)"
283 )
284 continue
285 candidate = os.path.join(
286 analysis_folder, "second-annotator", f"{k}.csv"
287 )
288 if not os.path.exists(candidate):
289 logger.error(
290 f"Skipping second-annotator '{k}' "
291 f"(candidate CSV file `{candidate}` does not exist!)"
292 )
293 continue
294 systems += [f"{k} (2nd. annot.)", candidate]
296 output_figure = os.path.join(output_folder, "comparison.pdf")
297 output_table = os.path.join(output_folder, "comparison.rst")
299 ctx.invoke(
300 compare,
301 label_path=systems,
302 output_figure=output_figure,
303 output_table=output_table,
304 threshold=threshold,
305 verbose=verbose,
306 )
308 logger.info("Ended comparison")