Coverage for src/bob/bio/base/utils/resources.py: 25%

1#!/usr/bin/env python

2# vim: set fileencoding=utf-8 :

3# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>

4# @date: Thu Oct 25 10:05:55 CEST 2012

6from __future__ import print_function

8import importlib.metadata

9import importlib.resources

10import logging

12from pathlib import Path

14import clapper.config

16logger = logging.getLogger("bob.bio.base")

19valid_keywords = (

20 "database",

21 "preprocessor",

22 "extractor",

23 "algorithm",

24 "grid",

25 "client",

26 "config",

27 "annotator",

28 "pipeline",

29)

30"""Keywords for which resources are defined."""

33def _collect_config(paths):

34 """Collect all python file resources into a module

36 This function recursively loads python modules (in a Python 3-compatible way)

37 so the last loaded module corresponds to the final state of the loading. In

38 this way, we load the first file, resolve its symbols, overwrite with the

39 second file and so on. We return a temporarily created module containing all

40 resolved variables, respecting the input order.

43 Parameters:

45 paths : [str]

46 A list of resources, modules or files (in order) to collect resources from

49 Returns: module

51 A valid Python module you can use to configure your tool

53 """

55 return clapper.config.load(paths, entry_point_group="bob.bio.config")

58def read_config_file(filenames, keyword=None):

59 """Use this function to read the given configuration file.

60 If a keyword is specified, only the configuration according to this keyword is returned.

61 Otherwise a dictionary of the configurations read from the configuration file is returned.

63 **Parameters:**

65 filenames : [str]

66 A list (pontentially empty) of configuration files or resources to read

67 running options from

69 keyword : str or ``None``

70 If specified, only the contents of the variable with the given name is returned.

71 If ``None``, the whole configuration is returned (a local namespace)

73 **Returns:**

75 config : object or namespace

76 If ``keyword`` is specified, the object inside the configuration with the given name is returned.

77 Otherwise, the whole configuration is returned (as a local namespace).

78 """

80 if not filenames:

81 raise RuntimeError(

82 "At least one configuration file, resource or "

83 "module name must be passed"

84 )

86 config = _collect_config(filenames)

88 if not keyword:

89 return config

91 if not hasattr(config, keyword):

92 raise ImportError(

93 "The desired keyword '%s' does not exist in any of "

94 "your configuration files: %s" % (keyword, ", ".join(filenames))

95 )

97 return getattr(config, keyword)

100def _get_entry_points(

101 keyword: str,

102 strip: list[str] | None = None,

103 package_prefix: str = "bob.bio.",

104) -> list[importlib.metadata.EntryPoint]:

105 """Returns the list of entry points for registered resources with the given keyword."""

106 if strip is None:

107 strip = []

108 return [

109 entry_point

110 for entry_point in importlib.metadata.entry_points().select(

111 group=package_prefix + keyword

112 )

113 if not entry_point.name.startswith(tuple(strip))

114 ]

115

116

117def load_resource(

118 resource,

119 keyword,

120 imports=["bob.bio.base"],

121 package_prefix="bob.bio.",

122 preferred_package=None,

123):

124 """Loads the given resource that is registered with the given keyword.

125 The resource can be:

126

127 1. a resource as defined in the setup.py

128 2. a configuration file

129 3. a string defining the construction of an object. If imports are required for the construction of this object, they can be given as list of strings.

130

131 **Parameters:**

132

133 resource : str

134 Any string interpretable as a resource (see above).

135

136 keyword : str

137 A valid resource keyword, can be one of :any:`bob.bio.base.utils.resources.valid_keywords`.

138

139 imports : [str]

140 A list of strings defining which modules to import, when constructing new objects (option 3).

141

142 package_prefix : str

143 Package namespace, in which we search for entry points, e.g., ``bob.bio``.

144

145 preferred_package : str or ``None``

146 When several resources with the same name are found in different packages (e.g., in different ``bob.bio`` or other packages), this specifies the preferred package to load the resource from.

147 If not specified, the extension that is **not** from ``bob.bio`` is selected.

148

149 **Returns:**

150

151 resource : object

152 The resulting resource object is returned, either read from file or resource, or created newly.

153 """

154

155 # first, look if the resource is a file name

156 if Path(resource).is_file():

157 return read_config_file([resource], keyword)

158

159 if keyword not in valid_keywords:

160 logger.warning(

161 "The given keyword '%s' is not valid. Please use one of %s!",

162 keyword,

163 valid_keywords,

164 )

165

166 # now, we check if the resource is registered as an entry point in the resource files

167 entry_points = [

168 entry_point

169 for entry_point in _get_entry_points(

170 keyword, package_prefix=package_prefix

171 )

172 if entry_point.name == resource

173 ]

174

175 if len(entry_points):

176 if len(entry_points) == 1:

177 return entry_points[0].load()

178 else:

179 # TODO: extract current package name and use this one, if possible

180

181 # Now: check if there are only two entry points, and one is from the bob.bio.base, then use the other one

182 index = -1

183 if preferred_package is not None:

184 for i, p in enumerate(entry_points):

185 if p.dist.metadata["name"] == preferred_package:

186 index = i

187 break

188

189 if index == -1:

190 # by default, use the first one that is not from bob.bio

191 for i, p in enumerate(entry_points):

192 if not p.dist.metadata["name"].startswith(package_prefix):

193 index = i

194 break

195

196 if index != -1:

197 logger.debug(

198 "RESOURCES: Using the resource '%s' from '%s', and ignoring the one from '%s'",

199 resource,

200 entry_points[index].module,

201 entry_points[1 - index].module,

202 )

203 return entry_points[index].load()

204 else:

205 logger.warn(

206 "Under the desired name '%s', there are multiple entry points defined, we return the first one: %s",

207 resource,

208 [entry_point.module for entry_point in entry_points],

209 )

210 return entry_points[0].load()

211

212 # if the resource is neither a config file nor an entry point,

213 # just execute it as a command

214 try:

215 # first, execute all import commands that are required

216 for i in imports:

217 exec("import %s" % i)

218 # now, evaluate the resource (re-evaluate if the resource is still a string)

219 while isinstance(resource, str):

220 resource = eval(resource)

221 return resource

222

223 except Exception as e:

224 raise ImportError(

225 "The given command line option '%s' is neither a resource for a '%s', nor an existing configuration file, nor could be interpreted as a command"

226 % (resource, keyword)

227 ) from e

228

229

230def extensions(keywords=valid_keywords, package_prefix="bob.bio."):

231 """extensions(keywords=valid_keywords, package_prefix='bob.bio.') -> extensions

232

233 Returns a list of packages that define extensions using the given keywords.

234

235 **Parameters:**

236

237 keywords : [str]

238 A list of keywords to load entry points for.

239 Defaults to all :any:`bob.bio.base.utils.resources.valid_keywords`.

240

241 package_prefix : str

242 Package namespace, in which we search for entry points, e.g., ``bob.bio``.

243 """

244 entry_points = [

245 entry_point

246 for keyword in keywords

247 for entry_point in _get_entry_points(

248 keyword, package_prefix=package_prefix

249 )

250 ]

251 return sorted(

252 list(

253 set(

254 entry_point.dist.metadata["name"]

255 for entry_point in entry_points

256 )

257 )

258 )

259

260

261def resource_keys(

262 keyword, exclude_packages=None, package_prefix="bob.bio.", strip=None

263):

264 """Reads and returns all resources that are registered with the given keyword.

265 Entry points from the given ``exclude_packages`` are ignored."""

266 if exclude_packages is None:

267 exclude_packages = []

268 if strip is None:

269 strip = ["dummy"]

270 ret_list = [

271 entry_point.name

272 for entry_point in _get_entry_points(

273 keyword, strip=strip, package_prefix=package_prefix

274 )

275 if entry_point.dist.metadata["name"] not in exclude_packages

276 ]

277 return sorted(ret_list)

278

279

280def list_resources(

281 keyword,

282 strip=None,

283 package_prefix="bob.bio.",

284 verbose=False,

285 packages=None,

286):

287 """Returns a string containing a detailed list of resources that are registered with the given keyword."""

288 if keyword not in valid_keywords:

289 raise ValueError(

290 "The given keyword '%s' is not valid. Please use one of %s!"

291 % (str(keyword), str(valid_keywords))

292 )

293 if strip is None:

294 strip = ["dummy"]

295

296 entry_points = _get_entry_points(

297 keyword, strip, package_prefix=package_prefix

298 )

299 last_dist = None

300 retval = ""

301 length = (

302 max(len(entry_point.name) for entry_point in entry_points)

303 if entry_points

304 else 1

305 )

306

307 if packages is not None:

308 entry_points = [

309 entry_point

310 for entry_point in entry_points

311 if entry_point.dist.metadata["name"] in packages

312 ]

313

314 for entry_point in sorted(

315 entry_points, key=lambda p: (p.dist.metadata["name"], p.name)

316 ):

317 if last_dist != f"{entry_point.dist.name} {entry_point.dist.version}":

318 retval += "\n- %s %s @ %s: \n" % (

319 entry_point.dist.name,

320 entry_point.dist.version,

321 str(entry_point.dist.locate_file("")),

322 )

323 last_dist = f"{entry_point.dist.name} {entry_point.dist.version}"

324

325 if entry_point.attr is not None:

326 retval += " + %s --> %s: %s\n" % (

327 entry_point.name + " " * (length - len(entry_point.name)),

328 entry_point.module,

329 entry_point.attr,

330 )

331 else:

332 retval += " + %s --> %s\n" % (

333 entry_point.name + " " * (length - len(entry_point.name)),

334 entry_point.module,

335 )

336 if verbose:

337 retval += " ==> " + str(entry_point.load()) + "\n\n"

338

339 return retval

340

341

342def database_directories(

343 strip=None, replacements=None, package_prefix="bob.bio."

344):

345 """Returns a dictionary of original directories for all registered databases."""

346 if strip is None:

347 strip = ["dummy"]

348 entry_points = _get_entry_points(

349 "database", strip, package_prefix=package_prefix

350 )

351

352 dirs = {}

353 for entry_point in sorted(

354 entry_points, key=lambda entry_point: entry_point.name

355 ):

356 try:

357 db = load_resource(entry_point.name, "database")

358 db.replace_directories(replacements)

359 dirs[entry_point.name] = [db.original_directory]

360 if db.annotation_directory is not None:

361 dirs[entry_point.name].append(db.annotation_directory)

362 except (AttributeError, ValueError, ImportError):

363 pass

364

365 return dirs

366

367

368def get_resource_filename(resource_name, group):

369 """

370 Get the file name of a resource.

371

372

373 Parameters

374 ----------

375 resource_name: str

376 Name of the resource to be searched

377

378 group: str

379 Entry point group

380

381 Return

382 ------

383 filename: str

384 The entrypoint file name

385

386 """

387

388 # Check if it's already a path

389 if Path(resource_name).exists():

390 return resource_name

391

392 # If it's a resource get the path of this resource

393 resources = [

394 r for r in importlib.metadata.entry_points().select(group=group)

395 ]

396

397 # if resource_name not in [r.name for r in resources]:

398 # raise ValueError(f"Resource not found: `{resource_name}`")

399

400 for r in resources:

401 if r.name == resource_name:

402 resource = r

403 break

404 else:

405 raise ValueError(f"Resource not found: `{resource_name}`")

406

407 # TODO: This get the root path only

408 # I don't know how to get the filename

409 containing, _dot, resource_filename = resource.module.rpartition(".")

410 return str(

411 importlib.resources.files(containing).joinpath(

412 resource_filename + ".py"

413 )

414 )