Coverage for src/bob/bio/base/script/annotate.py: 97%
63 statements
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
« prev ^ index » next coverage.py v7.6.5, created at 2024-11-14 21:41 +0100
1"""A script to help annotate databases.
2"""
3import functools
4import json
5import logging
7import click
9from clapper.click import (
10 ConfigCommand,
11 ResourceOption,
12 log_parameters,
13 verbosity_option,
14)
16from bob.pipelines import DelayedSample, ToDaskBag, wrap
18logger = logging.getLogger(__name__)
21def save_json(data, path):
22 """
23 Saves a dictionary ``data`` in a json file at ``path``.
24 """
25 with open(path, "w") as f:
26 json.dump(data, f)
29def load_json(path):
30 """
31 Returns a dictionary from a json file at ``path``.
32 """
33 with open(path, "r") as f:
34 return json.load(f)
37def annotate_common_options(func):
38 @click.option(
39 "--annotator",
40 "-a",
41 required=True,
42 cls=ResourceOption,
43 entry_point_group="bob.bio.annotator",
44 help="An annotator (instance of class inheriting from "
45 "bob.bio.base.Annotator) or an annotator resource name.",
46 )
47 @click.option(
48 "--output-dir",
49 "-o",
50 required=True,
51 cls=ResourceOption,
52 help="The directory to save the annotations.",
53 )
54 @click.option(
55 "--dask-client",
56 "-l",
57 "dask_client",
58 entry_point_group="dask.client",
59 help="Dask client for the execution of the pipeline. If not specified, "
60 "uses a single threaded, local Dask Client.",
61 cls=ResourceOption,
62 )
63 @functools.wraps(func)
64 def wrapper(*args, **kwds):
65 return func(*args, **kwds)
67 return wrapper
70@click.command(
71 entry_point_group="bob.bio.config",
72 cls=ConfigCommand,
73 epilog="""\b
74Examples:
76 $ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations
77""",
78)
79@click.option(
80 "--database",
81 "-d",
82 required=True,
83 cls=ResourceOption,
84 entry_point_group="bob.bio.database",
85 help="Biometric Database (class that implements the methods: "
86 "`background_model_samples`, `references` and `probes`).",
87)
88@click.option(
89 "--groups",
90 "-g",
91 multiple=True,
92 help="Biometric Database group that will be annotated. Can be added multiple"
93 "times for different groups. [Default: All groups]",
94)
95@annotate_common_options
96@verbosity_option(logger=logger, expose_value=False)
97def annotate(database, groups, annotator, output_dir, dask_client, **kwargs):
98 """Annotates a database.
100 The annotations are written in text file (json) format which can be read
101 back using :any:`read_annotation_file` (annotation_type='json')
102 """
103 log_parameters(logger)
105 # Allows passing of Sample objects as parameters
106 annotator = wrap(["sample"], annotator, output_attribute="annotations")
108 # Will save the annotations in the `data` fields to a json file
109 annotator = wrap(
110 ["checkpoint"],
111 annotator,
112 features_dir=output_dir,
113 extension=".json",
114 save_func=save_json,
115 load_func=load_json,
116 sample_attribute="annotations",
117 )
119 # Allows reception of Dask Bags
120 annotator = wrap(["dask"], annotator)
122 # Transformer that splits the samples into several Dask Bags
123 to_dask_bags = ToDaskBag(npartitions=50)
125 logger.debug("Retrieving samples from database.")
126 samples = database.all_samples(groups)
128 # Sets the scheduler to local if no dask_client is specified
129 if dask_client is not None:
130 scheduler = dask_client
131 else:
132 scheduler = "single-threaded"
134 # Splits the samples list into bags
135 dask_bags = to_dask_bags.transform(samples)
137 logger.info(f"Saving annotations in {output_dir}.")
138 logger.info(f"Annotating {len(samples)} samples...")
139 annotator.transform(dask_bags).compute(scheduler=scheduler)
141 logger.info("All annotations written.")
144@click.command(
145 entry_point_group="bob.bio.config",
146 cls=ConfigCommand,
147 epilog="""\b
148Examples:
150 $ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations
152You have to define ``samples``, ``reader``, and ``make_key`` in python files
153(config.py) as in examples.
154""",
155)
156@click.option(
157 "--samples",
158 entry_point_group="bob.bio.config",
159 required=True,
160 cls=ResourceOption,
161 help="A list of all samples that you want to annotate. They will be passed "
162 "as is to the ``reader`` and ``make-key`` functions.",
163)
164@click.option(
165 "--reader",
166 required=True,
167 cls=ResourceOption,
168 help="A function with the signature of ``data = reader(sample)`` which "
169 "takes a sample and returns the loaded data. The returned data is given to "
170 "the annotator.",
171)
172@click.option(
173 "--make-key",
174 required=True,
175 cls=ResourceOption,
176 help="A function with the signature of ``key = make_key(sample)`` which "
177 "takes a sample and returns a unique str identifier for that sample that "
178 "will be use to save it in output_dir. ``key`` generally is the relative "
179 "path to a sample's file from the dataset's root directory.",
180)
181@annotate_common_options
182@verbosity_option(logger=logger, expose_value=False)
183def annotate_samples(
184 samples, reader, make_key, annotator, output_dir, dask_client, **kwargs
185):
186 """Annotates a list of samples.
188 This command is very similar to ``bob bio annotate`` except that it works
189 without a database interface. You must provide a list of samples as well as
190 two functions:
192 def reader(sample):
193 # Loads data from a sample.
194 # for example:
195 data = bob.io.base.load(sample)
196 # data will be given to the annotator
197 return data
199 def make_key(sample):
200 # Creates a unique str identifier for this sample.
201 # for example:
202 return str(sample)
203 """
204 log_parameters(logger, ignore=("samples",))
206 # Allows passing of Sample objects as parameters
207 annotator = wrap(["sample"], annotator, output_attribute="annotations")
209 # Will save the annotations in the `data` fields to a json file
210 annotator = wrap(
211 bases=["checkpoint"],
212 estimator=annotator,
213 features_dir=output_dir,
214 extension=".json",
215 save_func=save_json,
216 load_func=load_json,
217 sample_attribute="annotations",
218 )
220 # Allows reception of Dask Bags
221 annotator = wrap(["dask"], annotator)
223 # Transformer that splits the samples into several Dask Bags
224 to_dask_bags = ToDaskBag(npartitions=50)
226 if dask_client is not None:
227 scheduler = dask_client
228 else:
229 scheduler = "single-threaded"
231 # Converts samples into a list of DelayedSample objects
232 samples_obj = [
233 DelayedSample(
234 load=functools.partial(reader, s),
235 key=make_key(s),
236 )
237 for s in samples
238 ]
240 # Splits the samples list into bags
241 dask_bags = to_dask_bags.transform(samples_obj)
243 logger.info(f"Saving annotations in {output_dir}")
244 logger.info(f"Annotating {len(samples_obj)} samples...")
245 annotator.transform(dask_bags).compute(scheduler=scheduler)
247 logger.info("All annotations written.")