Coverage for src/bob/bio/base/script/annotate.py: 97%

63 statements  

« prev     ^ index     » next       coverage.py v7.6.5, created at 2024-11-14 21:41 +0100

1"""A script to help annotate databases. 

2""" 

3import functools 

4import json 

5import logging 

6 

7import click 

8 

9from clapper.click import ( 

10 ConfigCommand, 

11 ResourceOption, 

12 log_parameters, 

13 verbosity_option, 

14) 

15 

16from bob.pipelines import DelayedSample, ToDaskBag, wrap 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21def save_json(data, path): 

22 """ 

23 Saves a dictionary ``data`` in a json file at ``path``. 

24 """ 

25 with open(path, "w") as f: 

26 json.dump(data, f) 

27 

28 

29def load_json(path): 

30 """ 

31 Returns a dictionary from a json file at ``path``. 

32 """ 

33 with open(path, "r") as f: 

34 return json.load(f) 

35 

36 

37def annotate_common_options(func): 

38 @click.option( 

39 "--annotator", 

40 "-a", 

41 required=True, 

42 cls=ResourceOption, 

43 entry_point_group="bob.bio.annotator", 

44 help="An annotator (instance of class inheriting from " 

45 "bob.bio.base.Annotator) or an annotator resource name.", 

46 ) 

47 @click.option( 

48 "--output-dir", 

49 "-o", 

50 required=True, 

51 cls=ResourceOption, 

52 help="The directory to save the annotations.", 

53 ) 

54 @click.option( 

55 "--dask-client", 

56 "-l", 

57 "dask_client", 

58 entry_point_group="dask.client", 

59 help="Dask client for the execution of the pipeline. If not specified, " 

60 "uses a single threaded, local Dask Client.", 

61 cls=ResourceOption, 

62 ) 

63 @functools.wraps(func) 

64 def wrapper(*args, **kwds): 

65 return func(*args, **kwds) 

66 

67 return wrapper 

68 

69 

70@click.command( 

71 entry_point_group="bob.bio.config", 

72 cls=ConfigCommand, 

73 epilog="""\b 

74Examples: 

75 

76 $ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations 

77""", 

78) 

79@click.option( 

80 "--database", 

81 "-d", 

82 required=True, 

83 cls=ResourceOption, 

84 entry_point_group="bob.bio.database", 

85 help="Biometric Database (class that implements the methods: " 

86 "`background_model_samples`, `references` and `probes`).", 

87) 

88@click.option( 

89 "--groups", 

90 "-g", 

91 multiple=True, 

92 help="Biometric Database group that will be annotated. Can be added multiple" 

93 "times for different groups. [Default: All groups]", 

94) 

95@annotate_common_options 

96@verbosity_option(logger=logger, expose_value=False) 

97def annotate(database, groups, annotator, output_dir, dask_client, **kwargs): 

98 """Annotates a database. 

99 

100 The annotations are written in text file (json) format which can be read 

101 back using :any:`read_annotation_file` (annotation_type='json') 

102 """ 

103 log_parameters(logger) 

104 

105 # Allows passing of Sample objects as parameters 

106 annotator = wrap(["sample"], annotator, output_attribute="annotations") 

107 

108 # Will save the annotations in the `data` fields to a json file 

109 annotator = wrap( 

110 ["checkpoint"], 

111 annotator, 

112 features_dir=output_dir, 

113 extension=".json", 

114 save_func=save_json, 

115 load_func=load_json, 

116 sample_attribute="annotations", 

117 ) 

118 

119 # Allows reception of Dask Bags 

120 annotator = wrap(["dask"], annotator) 

121 

122 # Transformer that splits the samples into several Dask Bags 

123 to_dask_bags = ToDaskBag(npartitions=50) 

124 

125 logger.debug("Retrieving samples from database.") 

126 samples = database.all_samples(groups) 

127 

128 # Sets the scheduler to local if no dask_client is specified 

129 if dask_client is not None: 

130 scheduler = dask_client 

131 else: 

132 scheduler = "single-threaded" 

133 

134 # Splits the samples list into bags 

135 dask_bags = to_dask_bags.transform(samples) 

136 

137 logger.info(f"Saving annotations in {output_dir}.") 

138 logger.info(f"Annotating {len(samples)} samples...") 

139 annotator.transform(dask_bags).compute(scheduler=scheduler) 

140 

141 logger.info("All annotations written.") 

142 

143 

144@click.command( 

145 entry_point_group="bob.bio.config", 

146 cls=ConfigCommand, 

147 epilog="""\b 

148Examples: 

149 

150 $ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations 

151 

152You have to define ``samples``, ``reader``, and ``make_key`` in python files 

153(config.py) as in examples. 

154""", 

155) 

156@click.option( 

157 "--samples", 

158 entry_point_group="bob.bio.config", 

159 required=True, 

160 cls=ResourceOption, 

161 help="A list of all samples that you want to annotate. They will be passed " 

162 "as is to the ``reader`` and ``make-key`` functions.", 

163) 

164@click.option( 

165 "--reader", 

166 required=True, 

167 cls=ResourceOption, 

168 help="A function with the signature of ``data = reader(sample)`` which " 

169 "takes a sample and returns the loaded data. The returned data is given to " 

170 "the annotator.", 

171) 

172@click.option( 

173 "--make-key", 

174 required=True, 

175 cls=ResourceOption, 

176 help="A function with the signature of ``key = make_key(sample)`` which " 

177 "takes a sample and returns a unique str identifier for that sample that " 

178 "will be use to save it in output_dir. ``key`` generally is the relative " 

179 "path to a sample's file from the dataset's root directory.", 

180) 

181@annotate_common_options 

182@verbosity_option(logger=logger, expose_value=False) 

183def annotate_samples( 

184 samples, reader, make_key, annotator, output_dir, dask_client, **kwargs 

185): 

186 """Annotates a list of samples. 

187 

188 This command is very similar to ``bob bio annotate`` except that it works 

189 without a database interface. You must provide a list of samples as well as 

190 two functions: 

191 

192 def reader(sample): 

193 # Loads data from a sample. 

194 # for example: 

195 data = bob.io.base.load(sample) 

196 # data will be given to the annotator 

197 return data 

198 

199 def make_key(sample): 

200 # Creates a unique str identifier for this sample. 

201 # for example: 

202 return str(sample) 

203 """ 

204 log_parameters(logger, ignore=("samples",)) 

205 

206 # Allows passing of Sample objects as parameters 

207 annotator = wrap(["sample"], annotator, output_attribute="annotations") 

208 

209 # Will save the annotations in the `data` fields to a json file 

210 annotator = wrap( 

211 bases=["checkpoint"], 

212 estimator=annotator, 

213 features_dir=output_dir, 

214 extension=".json", 

215 save_func=save_json, 

216 load_func=load_json, 

217 sample_attribute="annotations", 

218 ) 

219 

220 # Allows reception of Dask Bags 

221 annotator = wrap(["dask"], annotator) 

222 

223 # Transformer that splits the samples into several Dask Bags 

224 to_dask_bags = ToDaskBag(npartitions=50) 

225 

226 if dask_client is not None: 

227 scheduler = dask_client 

228 else: 

229 scheduler = "single-threaded" 

230 

231 # Converts samples into a list of DelayedSample objects 

232 samples_obj = [ 

233 DelayedSample( 

234 load=functools.partial(reader, s), 

235 key=make_key(s), 

236 ) 

237 for s in samples 

238 ] 

239 

240 # Splits the samples list into bags 

241 dask_bags = to_dask_bags.transform(samples_obj) 

242 

243 logger.info(f"Saving annotations in {output_dir}") 

244 logger.info(f"Annotating {len(samples_obj)} samples...") 

245 annotator.transform(dask_bags).compute(scheduler=scheduler) 

246 

247 logger.info("All annotations written.")