Coverage for src/bob/bio/base/utils/resources.py: 25%
114 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:34 +0200
« prev ^ index » next coverage.py v7.6.0, created at 2024-07-12 22:34 +0200
1#!/usr/bin/env python
2# vim: set fileencoding=utf-8 :
3# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
4# @date: Thu Oct 25 10:05:55 CEST 2012
6from __future__ import print_function
8import importlib.metadata
9import importlib.resources
10import logging
12from pathlib import Path
14import clapper.config
16logger = logging.getLogger("bob.bio.base")
19valid_keywords = (
20 "database",
21 "preprocessor",
22 "extractor",
23 "algorithm",
24 "grid",
25 "client",
26 "config",
27 "annotator",
28 "pipeline",
29)
30"""Keywords for which resources are defined."""
33def _collect_config(paths):
34 """Collect all python file resources into a module
36 This function recursively loads python modules (in a Python 3-compatible way)
37 so the last loaded module corresponds to the final state of the loading. In
38 this way, we load the first file, resolve its symbols, overwrite with the
39 second file and so on. We return a temporarily created module containing all
40 resolved variables, respecting the input order.
43 Parameters:
45 paths : [str]
46 A list of resources, modules or files (in order) to collect resources from
49 Returns: module
51 A valid Python module you can use to configure your tool
53 """
55 return clapper.config.load(paths, entry_point_group="bob.bio.config")
58def read_config_file(filenames, keyword=None):
59 """Use this function to read the given configuration file.
60 If a keyword is specified, only the configuration according to this keyword is returned.
61 Otherwise a dictionary of the configurations read from the configuration file is returned.
63 **Parameters:**
65 filenames : [str]
66 A list (pontentially empty) of configuration files or resources to read
67 running options from
69 keyword : str or ``None``
70 If specified, only the contents of the variable with the given name is returned.
71 If ``None``, the whole configuration is returned (a local namespace)
73 **Returns:**
75 config : object or namespace
76 If ``keyword`` is specified, the object inside the configuration with the given name is returned.
77 Otherwise, the whole configuration is returned (as a local namespace).
78 """
80 if not filenames:
81 raise RuntimeError(
82 "At least one configuration file, resource or "
83 "module name must be passed"
84 )
86 config = _collect_config(filenames)
88 if not keyword:
89 return config
91 if not hasattr(config, keyword):
92 raise ImportError(
93 "The desired keyword '%s' does not exist in any of "
94 "your configuration files: %s" % (keyword, ", ".join(filenames))
95 )
97 return getattr(config, keyword)
100def _get_entry_points(
101 keyword: str,
102 strip: list[str] | None = None,
103 package_prefix: str = "bob.bio.",
104) -> list[importlib.metadata.EntryPoint]:
105 """Returns the list of entry points for registered resources with the given keyword."""
106 if strip is None:
107 strip = []
108 return [
109 entry_point
110 for entry_point in importlib.metadata.entry_points().select(
111 group=package_prefix + keyword
112 )
113 if not entry_point.name.startswith(tuple(strip))
114 ]
117def load_resource(
118 resource,
119 keyword,
120 imports=["bob.bio.base"],
121 package_prefix="bob.bio.",
122 preferred_package=None,
123):
124 """Loads the given resource that is registered with the given keyword.
125 The resource can be:
127 1. a resource as defined in the setup.py
128 2. a configuration file
129 3. a string defining the construction of an object. If imports are required for the construction of this object, they can be given as list of strings.
131 **Parameters:**
133 resource : str
134 Any string interpretable as a resource (see above).
136 keyword : str
137 A valid resource keyword, can be one of :any:`bob.bio.base.utils.resources.valid_keywords`.
139 imports : [str]
140 A list of strings defining which modules to import, when constructing new objects (option 3).
142 package_prefix : str
143 Package namespace, in which we search for entry points, e.g., ``bob.bio``.
145 preferred_package : str or ``None``
146 When several resources with the same name are found in different packages (e.g., in different ``bob.bio`` or other packages), this specifies the preferred package to load the resource from.
147 If not specified, the extension that is **not** from ``bob.bio`` is selected.
149 **Returns:**
151 resource : object
152 The resulting resource object is returned, either read from file or resource, or created newly.
153 """
155 # first, look if the resource is a file name
156 if Path(resource).is_file():
157 return read_config_file([resource], keyword)
159 if keyword not in valid_keywords:
160 logger.warning(
161 "The given keyword '%s' is not valid. Please use one of %s!",
162 keyword,
163 valid_keywords,
164 )
166 # now, we check if the resource is registered as an entry point in the resource files
167 entry_points = [
168 entry_point
169 for entry_point in _get_entry_points(
170 keyword, package_prefix=package_prefix
171 )
172 if entry_point.name == resource
173 ]
175 if len(entry_points):
176 if len(entry_points) == 1:
177 return entry_points[0].load()
178 else:
179 # TODO: extract current package name and use this one, if possible
181 # Now: check if there are only two entry points, and one is from the bob.bio.base, then use the other one
182 index = -1
183 if preferred_package is not None:
184 for i, p in enumerate(entry_points):
185 if p.dist.metadata["name"] == preferred_package:
186 index = i
187 break
189 if index == -1:
190 # by default, use the first one that is not from bob.bio
191 for i, p in enumerate(entry_points):
192 if not p.dist.metadata["name"].startswith(package_prefix):
193 index = i
194 break
196 if index != -1:
197 logger.debug(
198 "RESOURCES: Using the resource '%s' from '%s', and ignoring the one from '%s'",
199 resource,
200 entry_points[index].module,
201 entry_points[1 - index].module,
202 )
203 return entry_points[index].load()
204 else:
205 logger.warn(
206 "Under the desired name '%s', there are multiple entry points defined, we return the first one: %s",
207 resource,
208 [entry_point.module for entry_point in entry_points],
209 )
210 return entry_points[0].load()
212 # if the resource is neither a config file nor an entry point,
213 # just execute it as a command
214 try:
215 # first, execute all import commands that are required
216 for i in imports:
217 exec("import %s" % i)
218 # now, evaluate the resource (re-evaluate if the resource is still a string)
219 while isinstance(resource, str):
220 resource = eval(resource)
221 return resource
223 except Exception as e:
224 raise ImportError(
225 "The given command line option '%s' is neither a resource for a '%s', nor an existing configuration file, nor could be interpreted as a command"
226 % (resource, keyword)
227 ) from e
230def extensions(keywords=valid_keywords, package_prefix="bob.bio."):
231 """extensions(keywords=valid_keywords, package_prefix='bob.bio.') -> extensions
233 Returns a list of packages that define extensions using the given keywords.
235 **Parameters:**
237 keywords : [str]
238 A list of keywords to load entry points for.
239 Defaults to all :any:`bob.bio.base.utils.resources.valid_keywords`.
241 package_prefix : str
242 Package namespace, in which we search for entry points, e.g., ``bob.bio``.
243 """
244 entry_points = [
245 entry_point
246 for keyword in keywords
247 for entry_point in _get_entry_points(
248 keyword, package_prefix=package_prefix
249 )
250 ]
251 return sorted(
252 list(
253 set(
254 entry_point.dist.metadata["name"]
255 for entry_point in entry_points
256 )
257 )
258 )
261def resource_keys(
262 keyword, exclude_packages=None, package_prefix="bob.bio.", strip=None
263):
264 """Reads and returns all resources that are registered with the given keyword.
265 Entry points from the given ``exclude_packages`` are ignored."""
266 if exclude_packages is None:
267 exclude_packages = []
268 if strip is None:
269 strip = ["dummy"]
270 ret_list = [
271 entry_point.name
272 for entry_point in _get_entry_points(
273 keyword, strip=strip, package_prefix=package_prefix
274 )
275 if entry_point.dist.metadata["name"] not in exclude_packages
276 ]
277 return sorted(ret_list)
280def list_resources(
281 keyword,
282 strip=None,
283 package_prefix="bob.bio.",
284 verbose=False,
285 packages=None,
286):
287 """Returns a string containing a detailed list of resources that are registered with the given keyword."""
288 if keyword not in valid_keywords:
289 raise ValueError(
290 "The given keyword '%s' is not valid. Please use one of %s!"
291 % (str(keyword), str(valid_keywords))
292 )
293 if strip is None:
294 strip = ["dummy"]
296 entry_points = _get_entry_points(
297 keyword, strip, package_prefix=package_prefix
298 )
299 last_dist = None
300 retval = ""
301 length = (
302 max(len(entry_point.name) for entry_point in entry_points)
303 if entry_points
304 else 1
305 )
307 if packages is not None:
308 entry_points = [
309 entry_point
310 for entry_point in entry_points
311 if entry_point.dist.metadata["name"] in packages
312 ]
314 for entry_point in sorted(
315 entry_points, key=lambda p: (p.dist.metadata["name"], p.name)
316 ):
317 if last_dist != f"{entry_point.dist.name} {entry_point.dist.version}":
318 retval += "\n- %s %s @ %s: \n" % (
319 entry_point.dist.name,
320 entry_point.dist.version,
321 str(entry_point.dist.locate_file("")),
322 )
323 last_dist = f"{entry_point.dist.name} {entry_point.dist.version}"
325 if entry_point.attr is not None:
326 retval += " + %s --> %s: %s\n" % (
327 entry_point.name + " " * (length - len(entry_point.name)),
328 entry_point.module,
329 entry_point.attr,
330 )
331 else:
332 retval += " + %s --> %s\n" % (
333 entry_point.name + " " * (length - len(entry_point.name)),
334 entry_point.module,
335 )
336 if verbose:
337 retval += " ==> " + str(entry_point.load()) + "\n\n"
339 return retval
342def database_directories(
343 strip=None, replacements=None, package_prefix="bob.bio."
344):
345 """Returns a dictionary of original directories for all registered databases."""
346 if strip is None:
347 strip = ["dummy"]
348 entry_points = _get_entry_points(
349 "database", strip, package_prefix=package_prefix
350 )
352 dirs = {}
353 for entry_point in sorted(
354 entry_points, key=lambda entry_point: entry_point.name
355 ):
356 try:
357 db = load_resource(entry_point.name, "database")
358 db.replace_directories(replacements)
359 dirs[entry_point.name] = [db.original_directory]
360 if db.annotation_directory is not None:
361 dirs[entry_point.name].append(db.annotation_directory)
362 except (AttributeError, ValueError, ImportError):
363 pass
365 return dirs
368def get_resource_filename(resource_name, group):
369 """
370 Get the file name of a resource.
373 Parameters
374 ----------
375 resource_name: str
376 Name of the resource to be searched
378 group: str
379 Entry point group
381 Return
382 ------
383 filename: str
384 The entrypoint file name
386 """
388 # Check if it's already a path
389 if Path(resource_name).exists():
390 return resource_name
392 # If it's a resource get the path of this resource
393 resources = [
394 r for r in importlib.metadata.entry_points().select(group=group)
395 ]
397 # if resource_name not in [r.name for r in resources]:
398 # raise ValueError(f"Resource not found: `{resource_name}`")
400 for r in resources:
401 if r.name == resource_name:
402 resource = r
403 break
404 else:
405 raise ValueError(f"Resource not found: `{resource_name}`")
407 # TODO: This get the root path only
408 # I don't know how to get the filename
409 containing, _dot, resource_filename = resource.module.rpartition(".")
410 return str(
411 importlib.resources.files(containing).joinpath(
412 resource_filename + ".py"
413 )
414 )