Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4import logging 

5 

6import click 

7 

8from bob.extension.scripts.click_helper import ( 

9 ConfigCommand, 

10 ResourceOption, 

11 verbosity_option, 

12) 

13 

14from ..engine.evaluator import compare_annotators, run 

15 

16logger = logging.getLogger(__name__) 

17 

18 

19def _validate_threshold(t, dataset): 

20 """Validates the user threshold selection. Returns parsed threshold.""" 

21 

22 if t is None: 

23 return 0.5 

24 

25 try: 

26 # we try to convert it to float first 

27 t = float(t) 

28 if t < 0.0 or t > 1.0: 

29 raise ValueError("Float thresholds must be within range [0.0, 1.0]") 

30 except ValueError: 

31 # it is a bit of text - assert dataset with name is available 

32 if not isinstance(dataset, dict): 

33 raise ValueError( 

34 "Threshold should be a floating-point number " 

35 "if your provide only a single dataset for evaluation" 

36 ) 

37 if t not in dataset: 

38 raise ValueError( 

39 f"Text thresholds should match dataset names, " 

40 f"but {t} is not available among the datasets provided (" 

41 f"({', '.join(dataset.keys())})" 

42 ) 

43 

44 return t 

45 

46 

47@click.command( 

48 entry_point_group="bob.ip.binseg.config", 

49 cls=ConfigCommand, 

50 epilog="""Examples: 

51 

52\b 

53 1. Runs evaluation on an existing dataset configuration: 

54\b 

55 $ bob binseg evaluate -vv drive --predictions-folder=path/to/predictions --output-folder=path/to/results 

56\b 

57 2. To run evaluation on a folder with your own images and annotations, you 

58 must first specify resizing, cropping, etc, so that the image can be 

59 correctly input to the model. Failing to do so will likely result in 

60 poor performance. To figure out such specifications, you must consult 

61 the dataset configuration used for **training** the provided model. 

62 Once you figured this out, do the following: 

63\b 

64 $ bob binseg config copy csv-dataset-example mydataset.py 

65 # modify "mydataset.py" to your liking 

66 $ bob binseg evaluate -vv mydataset.py --predictions-folder=path/to/predictions --output-folder=path/to/results 

67""", 

68) 

69@click.option( 

70 "--output-folder", 

71 "-o", 

72 help="Path where to store the analysis result (created if does not exist)", 

73 required=True, 

74 default="results", 

75 type=click.Path(), 

76 cls=ResourceOption, 

77) 

78@click.option( 

79 "--predictions-folder", 

80 "-p", 

81 help="Path where predictions are currently stored", 

82 required=True, 

83 type=click.Path(exists=True, file_okay=False, dir_okay=True), 

84 cls=ResourceOption, 

85) 

86@click.option( 

87 "--dataset", 

88 "-d", 

89 help="A torch.utils.data.dataset.Dataset instance implementing a dataset " 

90 "to be used for evaluation purposes, possibly including all pre-processing " 

91 "pipelines required or, optionally, a dictionary mapping string keys to " 

92 "torch.utils.data.dataset.Dataset instances. All keys that do not start " 

93 "with an underscore (_) will be processed.", 

94 required=True, 

95 cls=ResourceOption, 

96) 

97@click.option( 

98 "--second-annotator", 

99 "-S", 

100 help="A dataset or dictionary, like in --dataset, with the same " 

101 "sample keys, but with annotations from a different annotator that is " 

102 "going to be compared to the one in --dataset. The same rules regarding " 

103 "dataset naming conventions apply", 

104 required=False, 

105 default=None, 

106 cls=ResourceOption, 

107 show_default=True, 

108) 

109@click.option( 

110 "--overlayed", 

111 "-O", 

112 help="Creates overlayed representations of the output probability maps, " 

113 "similar to --overlayed in prediction-mode, except it includes " 

114 "distinctive colours for true and false positives and false negatives. " 

115 "If not set, or empty then do **NOT** output overlayed images. " 

116 "Otherwise, the parameter represents the name of a folder where to " 

117 "store those", 

118 show_default=True, 

119 default=None, 

120 required=False, 

121 cls=ResourceOption, 

122) 

123@click.option( 

124 "--threshold", 

125 "-t", 

126 help="This number is used to define positives and negatives from " 

127 "probability maps, and report F1-scores (a priori). It " 

128 "should either come from the training set or a separate validation set " 

129 "to avoid biasing the analysis. Optionally, if you provide a multi-set " 

130 "dataset as input, this may also be the name of an existing set from " 

131 "which the threshold will be estimated (highest F1-score) and then " 

132 "applied to the subsequent sets. This number is also used to print " 

133 "the test set F1-score a priori performance", 

134 default=None, 

135 show_default=False, 

136 required=False, 

137 cls=ResourceOption, 

138) 

139@click.option( 

140 "--steps", 

141 "-S", 

142 help="This number is used to define the number of threshold steps to " 

143 "consider when evaluating the highest possible F1-score on test data.", 

144 default=1000, 

145 show_default=True, 

146 required=True, 

147 cls=ResourceOption, 

148) 

149@verbosity_option(cls=ResourceOption) 

150def evaluate( 

151 output_folder, 

152 predictions_folder, 

153 dataset, 

154 second_annotator, 

155 overlayed, 

156 threshold, 

157 steps, 

158 **kwargs, 

159): 

160 """Evaluates an FCN on a binary segmentation task.""" 

161 

162 threshold = _validate_threshold(threshold, dataset) 

163 

164 if not isinstance(dataset, dict): 

165 dataset = {"test": dataset} 

166 

167 if second_annotator is None: 

168 second_annotator = {} 

169 elif not isinstance(second_annotator, dict): 

170 second_annotator = {"test": second_annotator} 

171 # else, second_annotator must be a dict 

172 

173 if isinstance(threshold, str): 

174 # first run evaluation for reference dataset, do not save overlays 

175 logger.info(f"Evaluating threshold on '{threshold}' set") 

176 threshold = run( 

177 dataset[threshold], threshold, predictions_folder, steps=steps 

178 ) 

179 logger.info(f"Set --threshold={threshold:.5f}") 

180 

181 # clean-up the overlayed path 

182 if overlayed is not None: 

183 overlayed = overlayed.strip() 

184 

185 # now run with the 

186 for k, v in dataset.items(): 

187 if k.startswith("_"): 

188 logger.info(f"Skipping dataset '{k}' (not to be evaluated)") 

189 continue 

190 logger.info(f"Analyzing '{k}' set...") 

191 run( 

192 v, 

193 k, 

194 predictions_folder, 

195 output_folder, 

196 overlayed, 

197 threshold, 

198 steps=steps, 

199 ) 

200 second = second_annotator.get(k) 

201 if second is not None: 

202 if not second.all_keys_match(v): 

203 logger.warning( 

204 f"Key mismatch between `dataset[{k}]` and " 

205 f"`second_annotator[{k}]` - skipping " 

206 f"second-annotator comparisons for {k} subset" 

207 ) 

208 else: 

209 compare_annotators(v, second, k, output_folder, overlayed)