Coverage for src/bob/bio/base/utils/resources.py: 25%

114 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 22:34 +0200

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> 

4# @date: Thu Oct 25 10:05:55 CEST 2012 

5 

6from __future__ import print_function 

7 

8import importlib.metadata 

9import importlib.resources 

10import logging 

11 

12from pathlib import Path 

13 

14import clapper.config 

15 

16logger = logging.getLogger("bob.bio.base") 

17 

18 

19valid_keywords = ( 

20 "database", 

21 "preprocessor", 

22 "extractor", 

23 "algorithm", 

24 "grid", 

25 "client", 

26 "config", 

27 "annotator", 

28 "pipeline", 

29) 

30"""Keywords for which resources are defined.""" 

31 

32 

33def _collect_config(paths): 

34 """Collect all python file resources into a module 

35 

36 This function recursively loads python modules (in a Python 3-compatible way) 

37 so the last loaded module corresponds to the final state of the loading. In 

38 this way, we load the first file, resolve its symbols, overwrite with the 

39 second file and so on. We return a temporarily created module containing all 

40 resolved variables, respecting the input order. 

41 

42 

43 Parameters: 

44 

45 paths : [str] 

46 A list of resources, modules or files (in order) to collect resources from 

47 

48 

49 Returns: module 

50 

51 A valid Python module you can use to configure your tool 

52 

53 """ 

54 

55 return clapper.config.load(paths, entry_point_group="bob.bio.config") 

56 

57 

58def read_config_file(filenames, keyword=None): 

59 """Use this function to read the given configuration file. 

60 If a keyword is specified, only the configuration according to this keyword is returned. 

61 Otherwise a dictionary of the configurations read from the configuration file is returned. 

62 

63 **Parameters:** 

64 

65 filenames : [str] 

66 A list (pontentially empty) of configuration files or resources to read 

67 running options from 

68 

69 keyword : str or ``None`` 

70 If specified, only the contents of the variable with the given name is returned. 

71 If ``None``, the whole configuration is returned (a local namespace) 

72 

73 **Returns:** 

74 

75 config : object or namespace 

76 If ``keyword`` is specified, the object inside the configuration with the given name is returned. 

77 Otherwise, the whole configuration is returned (as a local namespace). 

78 """ 

79 

80 if not filenames: 

81 raise RuntimeError( 

82 "At least one configuration file, resource or " 

83 "module name must be passed" 

84 ) 

85 

86 config = _collect_config(filenames) 

87 

88 if not keyword: 

89 return config 

90 

91 if not hasattr(config, keyword): 

92 raise ImportError( 

93 "The desired keyword '%s' does not exist in any of " 

94 "your configuration files: %s" % (keyword, ", ".join(filenames)) 

95 ) 

96 

97 return getattr(config, keyword) 

98 

99 

100def _get_entry_points( 

101 keyword: str, 

102 strip: list[str] | None = None, 

103 package_prefix: str = "bob.bio.", 

104) -> list[importlib.metadata.EntryPoint]: 

105 """Returns the list of entry points for registered resources with the given keyword.""" 

106 if strip is None: 

107 strip = [] 

108 return [ 

109 entry_point 

110 for entry_point in importlib.metadata.entry_points().select( 

111 group=package_prefix + keyword 

112 ) 

113 if not entry_point.name.startswith(tuple(strip)) 

114 ] 

115 

116 

117def load_resource( 

118 resource, 

119 keyword, 

120 imports=["bob.bio.base"], 

121 package_prefix="bob.bio.", 

122 preferred_package=None, 

123): 

124 """Loads the given resource that is registered with the given keyword. 

125 The resource can be: 

126 

127 1. a resource as defined in the setup.py 

128 2. a configuration file 

129 3. a string defining the construction of an object. If imports are required for the construction of this object, they can be given as list of strings. 

130 

131 **Parameters:** 

132 

133 resource : str 

134 Any string interpretable as a resource (see above). 

135 

136 keyword : str 

137 A valid resource keyword, can be one of :any:`bob.bio.base.utils.resources.valid_keywords`. 

138 

139 imports : [str] 

140 A list of strings defining which modules to import, when constructing new objects (option 3). 

141 

142 package_prefix : str 

143 Package namespace, in which we search for entry points, e.g., ``bob.bio``. 

144 

145 preferred_package : str or ``None`` 

146 When several resources with the same name are found in different packages (e.g., in different ``bob.bio`` or other packages), this specifies the preferred package to load the resource from. 

147 If not specified, the extension that is **not** from ``bob.bio`` is selected. 

148 

149 **Returns:** 

150 

151 resource : object 

152 The resulting resource object is returned, either read from file or resource, or created newly. 

153 """ 

154 

155 # first, look if the resource is a file name 

156 if Path(resource).is_file(): 

157 return read_config_file([resource], keyword) 

158 

159 if keyword not in valid_keywords: 

160 logger.warning( 

161 "The given keyword '%s' is not valid. Please use one of %s!", 

162 keyword, 

163 valid_keywords, 

164 ) 

165 

166 # now, we check if the resource is registered as an entry point in the resource files 

167 entry_points = [ 

168 entry_point 

169 for entry_point in _get_entry_points( 

170 keyword, package_prefix=package_prefix 

171 ) 

172 if entry_point.name == resource 

173 ] 

174 

175 if len(entry_points): 

176 if len(entry_points) == 1: 

177 return entry_points[0].load() 

178 else: 

179 # TODO: extract current package name and use this one, if possible 

180 

181 # Now: check if there are only two entry points, and one is from the bob.bio.base, then use the other one 

182 index = -1 

183 if preferred_package is not None: 

184 for i, p in enumerate(entry_points): 

185 if p.dist.metadata["name"] == preferred_package: 

186 index = i 

187 break 

188 

189 if index == -1: 

190 # by default, use the first one that is not from bob.bio 

191 for i, p in enumerate(entry_points): 

192 if not p.dist.metadata["name"].startswith(package_prefix): 

193 index = i 

194 break 

195 

196 if index != -1: 

197 logger.debug( 

198 "RESOURCES: Using the resource '%s' from '%s', and ignoring the one from '%s'", 

199 resource, 

200 entry_points[index].module, 

201 entry_points[1 - index].module, 

202 ) 

203 return entry_points[index].load() 

204 else: 

205 logger.warn( 

206 "Under the desired name '%s', there are multiple entry points defined, we return the first one: %s", 

207 resource, 

208 [entry_point.module for entry_point in entry_points], 

209 ) 

210 return entry_points[0].load() 

211 

212 # if the resource is neither a config file nor an entry point, 

213 # just execute it as a command 

214 try: 

215 # first, execute all import commands that are required 

216 for i in imports: 

217 exec("import %s" % i) 

218 # now, evaluate the resource (re-evaluate if the resource is still a string) 

219 while isinstance(resource, str): 

220 resource = eval(resource) 

221 return resource 

222 

223 except Exception as e: 

224 raise ImportError( 

225 "The given command line option '%s' is neither a resource for a '%s', nor an existing configuration file, nor could be interpreted as a command" 

226 % (resource, keyword) 

227 ) from e 

228 

229 

230def extensions(keywords=valid_keywords, package_prefix="bob.bio."): 

231 """extensions(keywords=valid_keywords, package_prefix='bob.bio.') -> extensions 

232 

233 Returns a list of packages that define extensions using the given keywords. 

234 

235 **Parameters:** 

236 

237 keywords : [str] 

238 A list of keywords to load entry points for. 

239 Defaults to all :any:`bob.bio.base.utils.resources.valid_keywords`. 

240 

241 package_prefix : str 

242 Package namespace, in which we search for entry points, e.g., ``bob.bio``. 

243 """ 

244 entry_points = [ 

245 entry_point 

246 for keyword in keywords 

247 for entry_point in _get_entry_points( 

248 keyword, package_prefix=package_prefix 

249 ) 

250 ] 

251 return sorted( 

252 list( 

253 set( 

254 entry_point.dist.metadata["name"] 

255 for entry_point in entry_points 

256 ) 

257 ) 

258 ) 

259 

260 

261def resource_keys( 

262 keyword, exclude_packages=None, package_prefix="bob.bio.", strip=None 

263): 

264 """Reads and returns all resources that are registered with the given keyword. 

265 Entry points from the given ``exclude_packages`` are ignored.""" 

266 if exclude_packages is None: 

267 exclude_packages = [] 

268 if strip is None: 

269 strip = ["dummy"] 

270 ret_list = [ 

271 entry_point.name 

272 for entry_point in _get_entry_points( 

273 keyword, strip=strip, package_prefix=package_prefix 

274 ) 

275 if entry_point.dist.metadata["name"] not in exclude_packages 

276 ] 

277 return sorted(ret_list) 

278 

279 

280def list_resources( 

281 keyword, 

282 strip=None, 

283 package_prefix="bob.bio.", 

284 verbose=False, 

285 packages=None, 

286): 

287 """Returns a string containing a detailed list of resources that are registered with the given keyword.""" 

288 if keyword not in valid_keywords: 

289 raise ValueError( 

290 "The given keyword '%s' is not valid. Please use one of %s!" 

291 % (str(keyword), str(valid_keywords)) 

292 ) 

293 if strip is None: 

294 strip = ["dummy"] 

295 

296 entry_points = _get_entry_points( 

297 keyword, strip, package_prefix=package_prefix 

298 ) 

299 last_dist = None 

300 retval = "" 

301 length = ( 

302 max(len(entry_point.name) for entry_point in entry_points) 

303 if entry_points 

304 else 1 

305 ) 

306 

307 if packages is not None: 

308 entry_points = [ 

309 entry_point 

310 for entry_point in entry_points 

311 if entry_point.dist.metadata["name"] in packages 

312 ] 

313 

314 for entry_point in sorted( 

315 entry_points, key=lambda p: (p.dist.metadata["name"], p.name) 

316 ): 

317 if last_dist != f"{entry_point.dist.name} {entry_point.dist.version}": 

318 retval += "\n- %s %s @ %s: \n" % ( 

319 entry_point.dist.name, 

320 entry_point.dist.version, 

321 str(entry_point.dist.locate_file("")), 

322 ) 

323 last_dist = f"{entry_point.dist.name} {entry_point.dist.version}" 

324 

325 if entry_point.attr is not None: 

326 retval += " + %s --> %s: %s\n" % ( 

327 entry_point.name + " " * (length - len(entry_point.name)), 

328 entry_point.module, 

329 entry_point.attr, 

330 ) 

331 else: 

332 retval += " + %s --> %s\n" % ( 

333 entry_point.name + " " * (length - len(entry_point.name)), 

334 entry_point.module, 

335 ) 

336 if verbose: 

337 retval += " ==> " + str(entry_point.load()) + "\n\n" 

338 

339 return retval 

340 

341 

342def database_directories( 

343 strip=None, replacements=None, package_prefix="bob.bio." 

344): 

345 """Returns a dictionary of original directories for all registered databases.""" 

346 if strip is None: 

347 strip = ["dummy"] 

348 entry_points = _get_entry_points( 

349 "database", strip, package_prefix=package_prefix 

350 ) 

351 

352 dirs = {} 

353 for entry_point in sorted( 

354 entry_points, key=lambda entry_point: entry_point.name 

355 ): 

356 try: 

357 db = load_resource(entry_point.name, "database") 

358 db.replace_directories(replacements) 

359 dirs[entry_point.name] = [db.original_directory] 

360 if db.annotation_directory is not None: 

361 dirs[entry_point.name].append(db.annotation_directory) 

362 except (AttributeError, ValueError, ImportError): 

363 pass 

364 

365 return dirs 

366 

367 

368def get_resource_filename(resource_name, group): 

369 """ 

370 Get the file name of a resource. 

371 

372 

373 Parameters 

374 ---------- 

375 resource_name: str 

376 Name of the resource to be searched 

377 

378 group: str 

379 Entry point group 

380 

381 Return 

382 ------ 

383 filename: str 

384 The entrypoint file name 

385 

386 """ 

387 

388 # Check if it's already a path 

389 if Path(resource_name).exists(): 

390 return resource_name 

391 

392 # If it's a resource get the path of this resource 

393 resources = [ 

394 r for r in importlib.metadata.entry_points().select(group=group) 

395 ] 

396 

397 # if resource_name not in [r.name for r in resources]: 

398 # raise ValueError(f"Resource not found: `{resource_name}`") 

399 

400 for r in resources: 

401 if r.name == resource_name: 

402 resource = r 

403 break 

404 else: 

405 raise ValueError(f"Resource not found: `{resource_name}`") 

406 

407 # TODO: This get the root path only 

408 # I don't know how to get the filename 

409 containing, _dot, resource_filename = resource.module.rpartition(".") 

410 return str( 

411 importlib.resources.files(containing).joinpath( 

412 resource_filename + ".py" 

413 ) 

414 )