Coverage for src/bob/bio/base/database/filelist/query.py: 93%

248 statements  

« prev     ^ index     » next       coverage.py v7.6.5, created at 2024-11-14 21:41 +0100

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3import logging 

4import os 

5 

6from bob.bio.base.database.legacy import check_parameters_for_validity 

7from bob.bio.base.utils.annotations import read_annotation_file 

8 

9from .. import BioFile, ZTBioDatabase 

10from .models import ListReader 

11 

12logger = logging.getLogger("bob.bio.base") 

13 

14 

15class FileListBioDatabase(ZTBioDatabase): 

16 """This class provides a user-friendly interface to databases that are given as file lists. 

17 

18 Parameters 

19 ---------- 

20 

21 filelists_directory : str 

22 The directory that contains the filelists defining the protocol(s). If you use the protocol 

23 attribute when querying the database, it will be appended to the base directory, such that 

24 several protocols are supported by the same class instance of `bob.bio.base`. 

25 

26 name : str 

27 The name of the database 

28 

29 protocol : str 

30 The protocol of the database. This should be a folder inside ``filelists_directory``. 

31 

32 bio_file_class : ``class`` 

33 The class that should be used to return the files. 

34 This can be :py:class:`bob.bio.base.database.BioFile`, :py:class:`bob.bio.spear.database.AudioBioFile`, :py:class:`bob.bio.face.database.FaceBioFile`, or anything similar. 

35 

36 original_directory : str or ``None`` 

37 The directory, where the original data can be found. 

38 

39 original_extension : str or [str] or ``None`` 

40 The filename extension of the original data, or multiple extensions. 

41 

42 annotation_directory : str or ``None`` 

43 The directory, where additional annotation files can be found. 

44 

45 annotation_extension : str or ``None`` 

46 The filename extension of the annotation files. 

47 

48 annotation_type : str or ``None`` 

49 The type of annotation that can be read. 

50 Currently, options are ``'eyecenter', 'named', 'idiap'``. 

51 See :py:func:`read_annotation_file` for details. 

52 

53 dev_sub_directory : str or ``None`` 

54 Specify a custom subdirectory for the filelists of the development set (default is ``'dev'``) 

55 

56 eval_sub_directory : str or ``None`` 

57 Specify a custom subdirectory for the filelists of the development set (default is ``'eval'``) 

58 

59 world_filename : str or ``None`` 

60 Specify a custom filename for the training filelist (default is ``'norm/train_world.lst'``) 

61 

62 optional_world_1_filename : str or ``None`` 

63 Specify a custom filename for the (first optional) training filelist 

64 (default is ``'norm/train_optional_world_1.lst'``) 

65 

66 optional_world_2_filename : str or ``None`` 

67 Specify a custom filename for the (second optional) training filelist 

68 (default is ``'norm/train_optional_world_2.lst'``) 

69 

70 models_filename : str or ``None`` 

71 Specify a custom filename for the model filelists (default is ``'for_models.lst'``) 

72 

73 probes_filename : str or ``None`` 

74 Specify a custom filename for the probes filelists (default is ``'for_probes.lst'``) 

75 

76 scores_filename : str or ``None`` 

77 Specify a custom filename for the scores filelists (default is ``'for_scores.lst'``) 

78 

79 tnorm_filename : str or ``None`` 

80 Specify a custom filename for the T-norm scores filelists (default is ``'for_tnorm.lst'``) 

81 

82 znorm_filename : str or ``None`` 

83 Specify a custom filename for the Z-norm scores filelists (default is ``'for_znorm.lst'``) 

84 

85 use_dense_probe_file_list : bool or None 

86 Specify which list to use among ``probes_filename`` (dense) or ``scores_filename``. 

87 If ``None`` it is tried to be estimated based on the given parameters. 

88 

89 keep_read_lists_in_memory : bool 

90 If set to ``True`` (the default), the lists are read only once and stored in memory. 

91 Otherwise the lists will be re-read for every query (not recommended). 

92 """ 

93 

94 def __init__( 

95 self, 

96 filelists_directory, 

97 name, 

98 protocol=None, 

99 bio_file_class=BioFile, 

100 original_directory=None, 

101 original_extension=None, 

102 annotation_directory=None, 

103 annotation_extension=".json", 

104 annotation_type="json", 

105 dev_sub_directory=None, 

106 eval_sub_directory=None, 

107 world_filename=None, 

108 optional_world_1_filename=None, 

109 optional_world_2_filename=None, 

110 models_filename=None, 

111 # For probing, use ONE of the two score file lists: 

112 probes_filename=None, # File containing the probe files -> dense model/probe score matrix 

113 scores_filename=None, # File containing list of model and probe files -> sparse model/probe score matrix 

114 # For ZT-Norm: 

115 tnorm_filename=None, 

116 znorm_filename=None, 

117 use_dense_probe_file_list=None, 

118 # if both probe_filename and scores_filename is given, what kind of list should be used? 

119 keep_read_lists_in_memory=True, 

120 # if set to True (the RECOMMENDED default) lists are read only once and stored in memory. 

121 **kwargs, 

122 ): 

123 """Initializes the database with the file lists from the given base directory, 

124 and the given sub-directories and file names (which default to useful values if not given). 

125 """ 

126 

127 super(FileListBioDatabase, self).__init__( 

128 name=name, 

129 protocol=protocol, 

130 original_directory=original_directory, 

131 original_extension=original_extension, 

132 annotation_directory=annotation_directory, 

133 annotation_extension=annotation_extension, 

134 annotation_type=annotation_type, 

135 **kwargs, 

136 ) 

137 # extra args for pretty printing 

138 self._kwargs.update( 

139 dict( 

140 filelists_directory=filelists_directory, 

141 dev_sub_directory=dev_sub_directory, 

142 eval_sub_directory=eval_sub_directory, 

143 world_filename=world_filename, 

144 optional_world_1_filename=optional_world_1_filename, 

145 optional_world_2_filename=optional_world_2_filename, 

146 models_filename=models_filename, 

147 probes_filename=probes_filename, 

148 scores_filename=scores_filename, 

149 tnorm_filename=tnorm_filename, 

150 znorm_filename=znorm_filename, 

151 use_dense_probe_file_list=use_dense_probe_file_list, 

152 # if both probe_filename and scores_filename are given, what kind 

153 # of list should be used? 

154 keep_read_lists_in_memory=keep_read_lists_in_memory, 

155 ) 

156 ) 

157 # self.original_directory = original_directory 

158 # self.original_extension = original_extension 

159 self.bio_file_class = bio_file_class 

160 self.keep_read_lists_in_memory = keep_read_lists_in_memory 

161 self.list_readers = {} 

162 

163 self.m_base_dir = os.path.abspath(filelists_directory) 

164 if not os.path.isdir(self.m_base_dir): 

165 raise RuntimeError( 

166 "Invalid directory specified %s." % (self.m_base_dir) 

167 ) 

168 

169 # sub-directories for dev and eval set: 

170 self.m_dev_subdir = ( 

171 dev_sub_directory if dev_sub_directory is not None else "dev" 

172 ) 

173 self.m_eval_subdir = ( 

174 eval_sub_directory if eval_sub_directory is not None else "eval" 

175 ) 

176 

177 # training list: format: filename client_id 

178 self.m_world_filename = ( 

179 world_filename 

180 if world_filename is not None 

181 else os.path.join("norm", "train_world.lst") 

182 ) 

183 # optional training list 1: format: filename client_id 

184 self.m_optional_world_1_filename = ( 

185 optional_world_1_filename 

186 if optional_world_1_filename is not None 

187 else os.path.join("norm", "train_optional_world_1.lst") 

188 ) 

189 # optional training list 2: format: filename client_id 

190 self.m_optional_world_2_filename = ( 

191 optional_world_2_filename 

192 if optional_world_2_filename is not None 

193 else os.path.join("norm", "train_optional_world_2.lst") 

194 ) 

195 # model list: format: filename model_id client_id 

196 self.m_models_filename = ( 

197 models_filename if models_filename is not None else "for_models.lst" 

198 ) 

199 # scores list: format: filename model_id claimed_client_id client_id 

200 self.m_scores_filename = ( 

201 scores_filename if scores_filename is not None else "for_scores.lst" 

202 ) 

203 # probe list: format: filename client_id 

204 self.m_probes_filename = ( 

205 probes_filename if probes_filename is not None else "for_probes.lst" 

206 ) 

207 # T-Norm models format: filename model_id client_id 

208 self.m_tnorm_filename = ( 

209 tnorm_filename if tnorm_filename is not None else "for_tnorm.lst" 

210 ) 

211 # Z-Norm files format: filename client_id 

212 self.m_znorm_filename = ( 

213 znorm_filename if znorm_filename is not None else "for_znorm.lst" 

214 ) 

215 

216 self.m_use_dense_probe_file_list = use_dense_probe_file_list 

217 

218 def _list_reader(self, protocol): 

219 if protocol not in self.list_readers: 

220 if protocol is not None: 

221 protocol_dir = os.path.join(self.get_base_directory(), protocol) 

222 if not os.path.isdir(protocol_dir): 

223 raise ValueError( 

224 "The directory %s for the given protocol '%s' does not exist" 

225 % (protocol_dir, protocol) 

226 ) 

227 self.list_readers[protocol] = ListReader( 

228 self.keep_read_lists_in_memory 

229 ) 

230 

231 return self.list_readers[protocol] 

232 

233 def _make_bio(self, files): 

234 return [ 

235 self.bio_file_class( 

236 client_id=f.client_id, path=f.path, file_id=f.id 

237 ) 

238 for f in files 

239 ] 

240 

241 def all_files(self, groups=["dev"], add_zt_files=True): 

242 """Returns all files for the given group. The internally stored protocol is used, throughout. 

243 

244 Parameters 

245 ---------- 

246 

247 groups : [str] 

248 A list of groups to retrieve the files for. 

249 

250 add_zt_files : bool 

251 If selected, also files for ZT-norm scoring will be added. 

252 Please select this option only if this dataset provides ZT-norm files, see :py:meth:`implements_zt`. 

253 

254 Returns 

255 ------- 

256 

257 [BioFile] 

258 A list of all files that fulfill your query. 

259 """ 

260 files = self.objects(groups, self.protocol, **self.all_files_options) 

261 # add all files that belong to the ZT-norm 

262 for group in groups: 

263 if group == "world": 

264 continue 

265 if add_zt_files: 

266 if self.implements_zt(self.protocol, group): 

267 files += self.tobjects(group, self.protocol) 

268 files += self.zobjects( 

269 group, self.protocol, **self.z_probe_options 

270 ) 

271 else: 

272 logger.warning( 

273 "ZT score files are requested, but no such files are defined in group %s for protocol %s", 

274 group, 

275 self.protocol, 

276 ) 

277 

278 return self.sort(self._make_bio(files)) 

279 

280 def groups(self, protocol=None, add_world=True, add_subworld=True): 

281 """This function returns the list of groups for this database. 

282 

283 Parameters 

284 ---------- 

285 

286 protocol : str or ``None`` 

287 The protocol for which the groups should be retrieved. 

288 If ``None``, the internally stored protocol is used. 

289 

290 add_world : bool 

291 Add the world groups? 

292 

293 add_subworld : bool 

294 Add the sub-world groups? Only valid, when ``add_world=True`` 

295 

296 Returns 

297 ------- 

298 

299 [str] 

300 A list of groups 

301 """ 

302 groups = [] 

303 protocol = protocol or self.protocol 

304 if protocol is not None: 

305 if os.path.isdir( 

306 os.path.join( 

307 self.get_base_directory(), protocol, self.m_dev_subdir 

308 ) 

309 ): 

310 groups.append("dev") 

311 if os.path.isdir( 

312 os.path.join( 

313 self.get_base_directory(), protocol, self.m_eval_subdir 

314 ) 

315 ): 

316 groups.append("eval") 

317 if add_world: 

318 if os.path.isfile( 

319 os.path.join( 

320 self.get_base_directory(), 

321 protocol, 

322 self.m_world_filename, 

323 ) 

324 ): 

325 groups.append("world") 

326 if add_world and add_subworld: 

327 if os.path.isfile( 

328 os.path.join( 

329 self.get_base_directory(), 

330 protocol, 

331 self.m_optional_world_1_filename, 

332 ) 

333 ): 

334 groups.append("optional_world_1") 

335 if os.path.isfile( 

336 os.path.join( 

337 self.get_base_directory(), 

338 protocol, 

339 self.m_optional_world_2_filename, 

340 ) 

341 ): 

342 groups.append("optional_world_2") 

343 else: 

344 if os.path.isdir( 

345 os.path.join(self.get_base_directory(), self.m_dev_subdir) 

346 ): 

347 groups.append("dev") 

348 if os.path.isdir( 

349 os.path.join(self.get_base_directory(), self.m_eval_subdir) 

350 ): 

351 groups.append("eval") 

352 if add_world: 

353 if os.path.isfile( 

354 os.path.join( 

355 self.get_base_directory(), self.m_world_filename 

356 ) 

357 ): 

358 groups.append("world") 

359 if add_world and add_subworld: 

360 if os.path.isfile( 

361 os.path.join( 

362 self.get_base_directory(), 

363 self.m_optional_world_1_filename, 

364 ) 

365 ): 

366 groups.append("optional_world_1") 

367 if os.path.isfile( 

368 os.path.join( 

369 self.get_base_directory(), 

370 self.m_optional_world_2_filename, 

371 ) 

372 ): 

373 groups.append("optional_world_2") 

374 return groups 

375 

376 def implements_zt(self, protocol=None, groups=None): 

377 """Checks if the file lists for the ZT score normalization are available. 

378 

379 Parameters 

380 ---------- 

381 

382 protocol : str or ``None`` 

383 The protocol for which the groups should be retrieved. 

384 

385 groups : str or [str] or ``None`` 

386 The groups for which the ZT score normalization file lists should be checked ``('dev', 'eval')``. 

387 

388 Returns 

389 ------- 

390 

391 bool 

392 ``True`` if the all file lists for ZT score normalization exist, otherwise ``False``. 

393 """ 

394 protocol = protocol or self.protocol 

395 groups = check_parameters_for_validity( 

396 groups, "group", self.groups(protocol, add_world=False) 

397 ) 

398 

399 for group in groups: 

400 for t in ["for_tnorm", "for_znorm"]: 

401 if not os.path.exists(self._get_list_file(group, t, protocol)): 

402 return False 

403 # all files exist 

404 return True 

405 

406 def uses_dense_probe_file(self, protocol): 

407 """Determines if a dense probe file list is used based on the existence of parameters.""" 

408 # return, whatever was specified in constructor, if not None 

409 if self.m_use_dense_probe_file_list is not None: 

410 return self.m_use_dense_probe_file_list 

411 

412 # check the existence of the files 

413 probes = True 

414 scores = True 

415 for group in self.groups(protocol, add_world=False): 

416 probes = probes and os.path.exists( 

417 self._get_list_file(group, type="for_probes", protocol=protocol) 

418 ) 

419 scores = scores and os.path.exists( 

420 self._get_list_file(group, type="for_scores", protocol=protocol) 

421 ) 

422 # decide, which score files are available 

423 if probes and not scores: 

424 return True 

425 if not probes and scores: 

426 return False 

427 raise ValueError( 

428 "Unable to determine, which way of probing should be used. Please specify." 

429 ) 

430 

431 def get_base_directory(self): 

432 """Returns the base directory where the filelists defining the database 

433 are located.""" 

434 return self.m_base_dir 

435 

436 def set_base_directory(self, filelists_directory): 

437 """Resets the base directory where the filelists defining the database 

438 are located.""" 

439 self.m_base_dir = filelists_directory 

440 if not os.path.isdir(self.filelists_directory): 

441 raise RuntimeError( 

442 "Invalid directory specified %s." % (self.filelists_directory) 

443 ) 

444 

445 def _get_list_file(self, group, type=None, protocol=None): 

446 if protocol: 

447 base_directory = os.path.join(self.get_base_directory(), protocol) 

448 else: 

449 base_directory = self.get_base_directory() 

450 if group == "world": 

451 return os.path.join(base_directory, self.m_world_filename) 

452 elif group == "optional_world_1": 

453 return os.path.join( 

454 base_directory, self.m_optional_world_1_filename 

455 ) 

456 elif group == "optional_world_2": 

457 return os.path.join( 

458 base_directory, self.m_optional_world_2_filename 

459 ) 

460 else: 

461 group_dir = ( 

462 self.m_dev_subdir if group == "dev" else self.m_eval_subdir 

463 ) 

464 list_name = { 

465 "for_models": self.m_models_filename, 

466 "for_probes": self.m_probes_filename, 

467 "for_scores": self.m_scores_filename, 

468 "for_tnorm": self.m_tnorm_filename, 

469 "for_znorm": self.m_znorm_filename, 

470 }[type] 

471 return os.path.join(base_directory, group_dir, list_name) 

472 

473 def client_id_from_model_id(self, model_id, group="dev"): 

474 """Returns the client id that is connected to the given model id. 

475 

476 Parameters 

477 ---------- 

478 

479 model_id : str or ``None`` 

480 The model id for which the client id should be returned. 

481 

482 groups : str or [str] or ``None`` 

483 (optional) the groups, the client belongs to. 

484 Might be one or more of ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``. 

485 If groups are given, only these groups are considered. 

486 

487 protocol : str or ``None`` 

488 The protocol to consider. 

489 

490 Returns 

491 ------- 

492 

493 str 

494 The client id for the given model id, if found. 

495 """ 

496 protocol = self.protocol 

497 groups = check_parameters_for_validity( 

498 group, 

499 "group", 

500 self.groups(protocol), 

501 default_parameters=self.groups(protocol, add_subworld=False), 

502 ) 

503 

504 for group in groups: 

505 model_dict = self._list_reader(protocol).read_models( 

506 self._get_list_file(group, "for_models", protocol), 

507 group, 

508 "for_models", 

509 ) 

510 if model_id in model_dict: 

511 return model_dict[model_id] 

512 

513 raise ValueError( 

514 "The given model id '%s' cannot be found in one of the groups '%s'" 

515 % (model_id, groups) 

516 ) 

517 

518 def client_id_from_t_model_id(self, t_model_id, group="dev"): 

519 """Returns the client id that is connected to the given T-Norm model id. 

520 

521 Parameters 

522 ---------- 

523 

524 model_id : str or ``None`` 

525 The model id for which the client id should be returned. 

526 

527 groups : str or [str] or ``None`` 

528 (optional) the groups, the client belongs to. 

529 Might be one or more of ``('dev', 'eval')``. 

530 If groups are given, only these groups are considered. 

531 

532 Returns 

533 ------- 

534 

535 str 

536 The client id for the given model id of a T-Norm model, if found. 

537 """ 

538 protocol = self.protocol 

539 groups = check_parameters_for_validity( 

540 group, "group", self.groups(protocol, add_world=False) 

541 ) 

542 

543 for group in groups: 

544 model_dict = self._list_reader(protocol).read_models( 

545 self._get_list_file(group, "for_tnorm", protocol), 

546 group, 

547 "for_tnorm", 

548 ) 

549 if t_model_id in model_dict: 

550 return model_dict[t_model_id] 

551 

552 raise ValueError( 

553 "The given T-norm model id '%s' cannot be found in one of the groups '%s'" 

554 % (t_model_id, groups) 

555 ) 

556 

557 def __client_id_list__(self, groups, type, protocol=None): 

558 ids = set() 

559 protocol = protocol or self.protocol 

560 # read all lists for all groups and extract the model ids 

561 for group in groups: 

562 files = self._list_reader(protocol).read_list( 

563 self._get_list_file(group, type, protocol), group, type 

564 ) 

565 for file in files: 

566 ids.add(file.client_id) 

567 return ids 

568 

569 def client_ids(self, protocol=None, groups=None): 

570 """Returns a list of client ids for the specific query by the user. 

571 

572 Parameters 

573 ---------- 

574 

575 protocol : str or ``None`` 

576 The protocol to consider 

577 

578 groups : str or [str] or ``None`` 

579 The groups to which the clients belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``. 

580 

581 Returns 

582 ------- 

583 

584 [str] 

585 A list containing all the client ids which have the given properties. 

586 """ 

587 

588 protocol = protocol or self.protocol 

589 groups = check_parameters_for_validity( 

590 groups, 

591 "group", 

592 self.groups(protocol), 

593 default_parameters=self.groups(protocol, add_subworld=False), 

594 ) 

595 

596 return self.__client_id_list__(groups, "for_models", protocol) 

597 

598 def tclient_ids(self, protocol=None, groups=None): 

599 """Returns a list of T-Norm client ids for the specific query by the user. 

600 

601 Parameters 

602 ---------- 

603 

604 protocol : str or ``None`` 

605 The protocol to consider 

606 

607 groups : str or [str] or ``None`` 

608 The groups to which the clients belong ("dev", "eval"). 

609 

610 Returns 

611 ------- 

612 

613 [str] 

614 A list containing all the T-Norm client ids which have the given properties. 

615 """ 

616 

617 protocol = protocol or self.protocol 

618 groups = check_parameters_for_validity( 

619 groups, "group", self.groups(protocol, add_world=False) 

620 ) 

621 

622 return self.__client_id_list__(groups, "for_tnorm", protocol) 

623 

624 def zclient_ids(self, protocol=None, groups=None): 

625 """Returns a list of Z-Norm client ids for the specific query by the user. 

626 

627 Parameters 

628 ---------- 

629 

630 protocol : str or ``None`` 

631 The protocol to consider 

632 

633 groups : str or [str] or ``None`` 

634 The groups to which the clients belong ("dev", "eval"). 

635 

636 Returns 

637 ------- 

638 

639 [str] 

640 A list containing all the Z-Norm client ids which have the given properties. 

641 """ 

642 

643 protocol = protocol or self.protocol 

644 groups = check_parameters_for_validity( 

645 groups, "group", self.groups(protocol, add_world=False) 

646 ) 

647 

648 return self.__client_id_list__(groups, "for_znorm", protocol) 

649 

650 def __model_id_list__(self, groups, type, protocol=None): 

651 ids = set() 

652 protocol = protocol or self.protocol 

653 # read all lists for all groups and extract the model ids 

654 for group in groups: 

655 dict = self._list_reader(protocol).read_models( 

656 self._get_list_file(group, type, protocol), group, type 

657 ) 

658 ids.update(dict.keys()) 

659 return list(ids) 

660 

661 def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): 

662 """Returns a list of model ids for the specific query by the user. 

663 

664 Parameters 

665 ---------- 

666 

667 protocol : str or ``None`` 

668 The protocol to consider 

669 

670 groups : str or [str] or ``None`` 

671 The groups to which the models belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``. 

672 

673 Returns 

674 ------- 

675 

676 [str] 

677 A list containing all the model ids which have the given properties. 

678 """ 

679 protocol = protocol or self.protocol 

680 groups = check_parameters_for_validity( 

681 groups, "group", self.groups(protocol=protocol) 

682 ) 

683 

684 return self.__model_id_list__(groups, "for_models", protocol) 

685 

686 def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): 

687 """Returns a list of T-Norm model ids for the specific query by the user. 

688 

689 Parameters 

690 ---------- 

691 

692 protocol : str or ``None`` 

693 The protocol to consider 

694 

695 groups : str or [str] or ``None`` 

696 The groups to which the models belong ``('dev', 'eval')``. 

697 

698 Returns 

699 ------- 

700 

701 [str] 

702 A list containing all the T-Norm model ids belonging to the given group. 

703 """ 

704 protocol = protocol or self.protocol 

705 groups = check_parameters_for_validity( 

706 groups, "group", self.groups(protocol, add_world=False) 

707 ) 

708 

709 return self.__model_id_list__(groups, "for_tnorm", protocol) 

710 

711 def objects( 

712 self, 

713 groups=None, 

714 protocol=None, 

715 purposes=None, 

716 model_ids=None, 

717 classes=None, 

718 **kwargs, 

719 ): 

720 """Returns a set of :py:class:`bob.bio.base.database.BioFile` objects for the specific query by the user. 

721 

722 Parameters 

723 ---------- 

724 

725 protocol : str or ``None`` 

726 The protocol to consider 

727 

728 purposes : str or [str] or ``None`` 

729 The purposes required to be retrieved ``('enroll', 'probe')`` or a tuple 

730 with several of them. If ``None`` is given (this is the default), it is 

731 considered the same as a tuple with all possible values. This field is 

732 ignored for the data from the ``'world', 'optional_world_1', 'optional_world_2'`` groups. 

733 

734 model_ids : str or [str] or ``None`` 

735 Only retrieves the files for the provided list of model ids (claimed 

736 client id). If ``None`` is given (this is the default), no filter over 

737 the model_ids is performed. 

738 

739 groups : str or [str] or ``None`` 

740 One of the groups ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')`` or a tuple with several of them. 

741 If ``None`` is given (this is the default), it is considered to be the existing subset of ``('world', 'dev', 'eval')``. 

742 

743 classes : str or [str] or ``None`` 

744 The classes (types of accesses) to be retrieved ``('client', 'impostor')`` 

745 or a tuple with several of them. If ``None`` is given (this is the 

746 default), it is considered the same as a tuple with all possible values. 

747 

748 .. note:: 

749 Classes are not allowed to be specified when 'probes_filename' is used in the constructor. 

750 

751 Returns 

752 ------- 

753 

754 [BioFile] 

755 A list of :py:class:`BioFile` objects considering all the filtering criteria. 

756 """ 

757 

758 protocol = protocol or self.protocol 

759 if self.uses_dense_probe_file(protocol) and classes is not None: 

760 raise ValueError( 

761 "To be able to use the 'classes' keyword, please use the 'for_scores.lst' list file." 

762 ) 

763 

764 purposes = check_parameters_for_validity( 

765 purposes, "purpose", ("enroll", "probe") 

766 ) 

767 groups = check_parameters_for_validity( 

768 groups, 

769 "group", 

770 self.groups(protocol), 

771 default_parameters=self.groups(protocol, add_subworld=False), 

772 ) 

773 classes = check_parameters_for_validity( 

774 classes, "class", ("client", "impostor") 

775 ) 

776 

777 if isinstance(model_ids, str): 

778 model_ids = (model_ids,) 

779 

780 # first, collect all the lists that we want to process 

781 lists = [] 

782 probe_lists = [] 

783 if "world" in groups: 

784 lists.append( 

785 self._list_reader(protocol).read_list( 

786 self._get_list_file("world", protocol=protocol), "world" 

787 ) 

788 ) 

789 if "optional_world_1" in groups: 

790 lists.append( 

791 self._list_reader(protocol).read_list( 

792 self._get_list_file("optional_world_1", protocol=protocol), 

793 "optional_world_1", 

794 ) 

795 ) 

796 if "optional_world_2" in groups: 

797 lists.append( 

798 self._list_reader(protocol).read_list( 

799 self._get_list_file("optional_world_2", protocol=protocol), 

800 "optional_world_2", 

801 ) 

802 ) 

803 

804 for group in ("dev", "eval"): 

805 if group in groups: 

806 if "enroll" in purposes: 

807 lists.append( 

808 self._list_reader(protocol).read_list( 

809 self._get_list_file( 

810 group, "for_models", protocol=protocol 

811 ), 

812 group, 

813 "for_models", 

814 ) 

815 ) 

816 if "probe" in purposes: 

817 if self.uses_dense_probe_file(protocol): 

818 probe_lists.append( 

819 self._list_reader(protocol).read_list( 

820 self._get_list_file( 

821 group, "for_probes", protocol=protocol 

822 ), 

823 group, 

824 "for_probes", 

825 ) 

826 ) 

827 else: 

828 probe_lists.append( 

829 self._list_reader(protocol).read_list( 

830 self._get_list_file( 

831 group, "for_scores", protocol=protocol 

832 ), 

833 group, 

834 "for_scores", 

835 ) 

836 ) 

837 

838 # now, go through the lists and filter the elements 

839 

840 # remember the file ids that are already in the list 

841 file_ids = set() 

842 retval = [] 

843 

844 # non-probe files; just filter by model id 

845 for list in lists: 

846 for file in list: 

847 # check if we already have this file 

848 if file.id not in file_ids: 

849 if model_ids is None or file._model_id in model_ids: 

850 file_ids.add(file.id) 

851 retval.append(file) 

852 

853 # probe files; filter by model id and by class 

854 for list in probe_lists: 

855 if self.uses_dense_probe_file(protocol): 

856 # dense probing is used; do not filter over the model ids and not over the classes 

857 # -> just add all probe files 

858 for file in list: 

859 if file.id not in file_ids: 

860 file_ids.add(file.id) 

861 retval.append(file) 

862 

863 else: 

864 # sparse probing is used; filter over model ids and over the classes 

865 for file in list: 

866 # filter by model id 

867 if model_ids is None or file._model_id in model_ids: 

868 # filter by class 

869 if ( 

870 "client" in classes 

871 and file.client_id == file.claimed_id 

872 ) or ( 

873 "impostor" in classes 

874 and file.client_id != file.claimed_id 

875 ): 

876 # check if we already have this file 

877 if file.id not in file_ids: 

878 file_ids.add(file.id) 

879 retval.append(file) 

880 

881 return self._make_bio(retval) 

882 

883 def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): 

884 """Returns a list of :py:class:`bob.bio.base.database.BioFile` objects for enrolling T-norm models for score normalization. 

885 

886 Parameters 

887 ---------- 

888 

889 protocol : str or ``None`` 

890 The protocol to consider 

891 

892 model_ids : str or [str] or ``None`` 

893 Only retrieves the files for the provided list of model ids (claimed 

894 client id). If ``None`` is given (this is the default), no filter over 

895 the model_ids is performed. 

896 

897 groups : str or [str] or ``None`` 

898 The groups to which the models belong ``('dev', 'eval')``. 

899 

900 Returns 

901 ------- 

902 

903 [BioFile] 

904 A list of :py:class:`BioFile` objects considering all the filtering criteria. 

905 """ 

906 protocol = protocol or self.protocol 

907 groups = check_parameters_for_validity( 

908 groups, "group", self.groups(protocol, add_world=False) 

909 ) 

910 

911 if isinstance(model_ids, str): 

912 model_ids = (model_ids,) 

913 

914 # iterate over the lists and extract the files 

915 # we assume that there is no duplicate file here... 

916 retval = [] 

917 for group in groups: 

918 for file in self._list_reader(protocol).read_list( 

919 self._get_list_file(group, "for_tnorm", protocol), 

920 group, 

921 "for_tnorm", 

922 ): 

923 if model_ids is None or file._model_id in model_ids: 

924 retval.append(file) 

925 

926 return self._make_bio(retval) 

927 

928 def zobjects(self, groups=None, protocol=None, **kwargs): 

929 """Returns a list of :py:class:`BioFile` objects to perform Z-norm score normalization. 

930 

931 Parameters 

932 ---------- 

933 

934 protocol : str or ``None`` 

935 The protocol to consider 

936 

937 groups : str or [str] or ``None`` 

938 The groups to which the clients belong ``('dev', 'eval')``. 

939 

940 Returns 

941 ------- 

942 

943 [BioFile] 

944 A list of File objects considering all the filtering criteria. 

945 """ 

946 

947 protocol = protocol or self.protocol 

948 groups = check_parameters_for_validity( 

949 groups, "group", self.groups(protocol, add_world=False) 

950 ) 

951 

952 # iterate over the lists and extract the files 

953 # we assume that there is no duplicate file here... 

954 retval = [] 

955 for group in groups: 

956 retval.extend( 

957 [ 

958 file 

959 for file in self._list_reader(protocol).read_list( 

960 self._get_list_file(group, "for_znorm", protocol), 

961 group, 

962 "for_znorm", 

963 ) 

964 ] 

965 ) 

966 

967 return self._make_bio(retval) 

968 

969 def annotations(self, file): 

970 """Reads the annotations for the given file id from file and returns them in a dictionary. 

971 

972 Parameters 

973 ---------- 

974 

975 file : BioFile 

976 The BioFile object for which the annotations should be read. 

977 

978 Returns 

979 ------- 

980 

981 dict 

982 The annotations as a dictionary, e.g.: ``{'reye':(re_y,re_x), 'leye':(le_y,le_x)}`` 

983 """ 

984 if self.annotation_directory is None: 

985 return None 

986 

987 # since the file id is equal to the file name, we can simply use it 

988 annotation_file = os.path.join( 

989 self.annotation_directory, file.id + self.annotation_extension 

990 ) 

991 

992 # return the annotations as read from file 

993 return read_annotation_file(annotation_file, self.annotation_type) 

994 

995 def original_file_name(self, file, check_existence=True): 

996 """Returns the original file name of the given file. 

997 

998 This interface supports several original extensions, so that file lists can contain images 

999 of different data types. 

1000 

1001 When multiple original extensions are specified, this function will check the existence of any of 

1002 these file names, and return the first one that actually exists. 

1003 In this case, the ``check_existence`` flag is ignored. 

1004 

1005 Parameters 

1006 ---------- 

1007 

1008 file : BioFile 

1009 The BioFile object for which the file name should be returned. 

1010 

1011 check_existence : bool 

1012 Should the existence of the original file be checked? 

1013 (Ignored when multiple original extensions were specified in the constructor.) 

1014 

1015 Returns 

1016 ------- 

1017 

1018 str 

1019 The full path of the original data file. 

1020 """ 

1021 

1022 if isinstance(self.original_extension, str): 

1023 # extract file name 

1024 file_name = file.make_path( 

1025 self.original_directory, self.original_extension 

1026 ) 

1027 if not check_existence or os.path.exists(file_name): 

1028 return file_name 

1029 

1030 # check all registered extensions 

1031 for extension in self.original_extension: 

1032 file_name = file.make_path(self.original_directory, extension) 

1033 if os.path.exists(file_name): 

1034 return file_name 

1035 

1036 # None of the extensions matched 

1037 raise IOError( 

1038 "File '%s' does not exist with any of the extensions '%s'" 

1039 % ( 

1040 file.make_path(self.original_directory, None), 

1041 self.original_extension, 

1042 ) 

1043 )