Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1635977648782/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.8/site-packages/bob/ip/binseg/script/evaluate.py: 88%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

2# coding=utf-8

4import logging

6import click

8from bob.extension.scripts.click_helper import (

9 ConfigCommand,

10 ResourceOption,

11 verbosity_option,

12)

14from ..engine.evaluator import compare_annotators, run

16logger = logging.getLogger(__name__)

19def _validate_threshold(t, dataset):

20 """Validates the user threshold selection. Returns parsed threshold."""

22 if t is None:

23 return 0.5

25 try:

26 # we try to convert it to float first

27 t = float(t)

28 if t < 0.0 or t > 1.0:

29 raise ValueError("Float thresholds must be within range [0.0, 1.0]")

30 except ValueError:

31 # it is a bit of text - assert dataset with name is available

32 if not isinstance(dataset, dict):

33 raise ValueError(

34 "Threshold should be a floating-point number "

35 "if your provide only a single dataset for evaluation"

36 )

37 if t not in dataset:

38 raise ValueError(

39 f"Text thresholds should match dataset names, "

40 f"but {t} is not available among the datasets provided ("

41 f"({', '.join(dataset.keys())})"

42 )

44 return t

47@click.command(

48 entry_point_group="bob.ip.binseg.config",

49 cls=ConfigCommand,

50 epilog="""Examples:

52\b

53 1. Runs evaluation on an existing dataset configuration:

54\b

55 $ bob binseg evaluate -vv drive --predictions-folder=path/to/predictions --output-folder=path/to/results

56\b

57 2. To run evaluation on a folder with your own images and annotations, you

58 must first specify resizing, cropping, etc, so that the image can be

59 correctly input to the model. Failing to do so will likely result in

60 poor performance. To figure out such specifications, you must consult

61 the dataset configuration used for **training** the provided model.

62 Once you figured this out, do the following:

63\b

64 $ bob binseg config copy csv-dataset-example mydataset.py

65 # modify "mydataset.py" to your liking

66 $ bob binseg evaluate -vv mydataset.py --predictions-folder=path/to/predictions --output-folder=path/to/results

67""",

68)

69@click.option(

70 "--output-folder",

71 "-o",

72 help="Path where to store the analysis result (created if does not exist)",

73 required=True,

74 default="results",

75 type=click.Path(),

76 cls=ResourceOption,

77)

78@click.option(

79 "--predictions-folder",

80 "-p",

81 help="Path where predictions are currently stored",

82 required=True,

83 type=click.Path(exists=True, file_okay=False, dir_okay=True),

84 cls=ResourceOption,

85)

86@click.option(

87 "--dataset",

88 "-d",

89 help="A torch.utils.data.dataset.Dataset instance implementing a dataset "

90 "to be used for evaluation purposes, possibly including all pre-processing "

91 "pipelines required or, optionally, a dictionary mapping string keys to "

92 "torch.utils.data.dataset.Dataset instances. All keys that do not start "

93 "with an underscore (_) will be processed.",

94 required=True,

95 cls=ResourceOption,

96)

97@click.option(

98 "--second-annotator",

99 "-S",

100 help="A dataset or dictionary, like in --dataset, with the same "

101 "sample keys, but with annotations from a different annotator that is "

102 "going to be compared to the one in --dataset. The same rules regarding "

103 "dataset naming conventions apply",

104 required=False,

105 default=None,

106 cls=ResourceOption,

107 show_default=True,

108)

109@click.option(

110 "--overlayed",

111 "-O",

112 help="Creates overlayed representations of the output probability maps, "

113 "similar to --overlayed in prediction-mode, except it includes "

114 "distinctive colours for true and false positives and false negatives. "

115 "If not set, or empty then do **NOT** output overlayed images. "

116 "Otherwise, the parameter represents the name of a folder where to "

117 "store those",

118 show_default=True,

119 default=None,

120 required=False,

121 cls=ResourceOption,

122)

123@click.option(

124 "--threshold",

125 "-t",

126 help="This number is used to define positives and negatives from "

127 "probability maps, and report F1-scores (a priori). It "

128 "should either come from the training set or a separate validation set "

129 "to avoid biasing the analysis. Optionally, if you provide a multi-set "

130 "dataset as input, this may also be the name of an existing set from "

131 "which the threshold will be estimated (highest F1-score) and then "

132 "applied to the subsequent sets. This number is also used to print "

133 "the test set F1-score a priori performance",

134 default=None,

135 show_default=False,

136 required=False,

137 cls=ResourceOption,

138)

139@click.option(

140 "--steps",

141 "-S",

142 help="This number is used to define the number of threshold steps to "

143 "consider when evaluating the highest possible F1-score on test data.",

144 default=1000,

145 show_default=True,

146 required=True,

147 cls=ResourceOption,

148)

149@verbosity_option(cls=ResourceOption)

150def evaluate(

151 output_folder,

152 predictions_folder,

153 dataset,

154 second_annotator,

155 overlayed,

156 threshold,

157 steps,

158 **kwargs,

159):

160 """Evaluates an FCN on a binary segmentation task."""

161

162 threshold = _validate_threshold(threshold, dataset)

163

164 if not isinstance(dataset, dict):

165 dataset = {"test": dataset}

166

167 if second_annotator is None:

168 second_annotator = {}

169 elif not isinstance(second_annotator, dict):

170 second_annotator = {"test": second_annotator}

171 # else, second_annotator must be a dict

172

173 if isinstance(threshold, str):

174 # first run evaluation for reference dataset, do not save overlays

175 logger.info(f"Evaluating threshold on '{threshold}' set")

176 threshold = run(

177 dataset[threshold], threshold, predictions_folder, steps=steps

178 )

179 logger.info(f"Set --threshold={threshold:.5f}")

180

181 # clean-up the overlayed path

182 if overlayed is not None:

183 overlayed = overlayed.strip()

184

185 # now run with the

186 for k, v in dataset.items():

187 if k.startswith("_"):

188 logger.info(f"Skipping dataset '{k}' (not to be evaluated)")

189 continue

190 logger.info(f"Analyzing '{k}' set...")

191 run(

192 v,

193 k,

194 predictions_folder,

195 output_folder,

196 overlayed,

197 threshold,

198 steps=steps,

199 )

200 second = second_annotator.get(k)

201 if second is not None:

202 if not second.all_keys_match(v):

203 logger.warning(

204 f"Key mismatch between `dataset[{k}]` and "

205 f"`second_annotator[{k}]` - skipping "

206 f"second-annotator comparisons for {k} subset"

207 )

208 else:

209 compare_annotators(v, second, k, output_folder, overlayed)

52 statements 46 run 6 missing 0 excluded

52 statements