Coverage for src/bob/bio/base/database/filelist/query.py: 93%

1#!/usr/bin/env python

2# vim: set fileencoding=utf-8 :

3import logging

4import os

6from bob.bio.base.database.legacy import check_parameters_for_validity

7from bob.bio.base.utils.annotations import read_annotation_file

9from .. import BioFile, ZTBioDatabase

10from .models import ListReader

12logger = logging.getLogger("bob.bio.base")

15class FileListBioDatabase(ZTBioDatabase):

16 """This class provides a user-friendly interface to databases that are given as file lists.

18 Parameters

19 ----------

21 filelists_directory : str

22 The directory that contains the filelists defining the protocol(s). If you use the protocol

23 attribute when querying the database, it will be appended to the base directory, such that

24 several protocols are supported by the same class instance of `bob.bio.base`.

26 name : str

27 The name of the database

29 protocol : str

30 The protocol of the database. This should be a folder inside ``filelists_directory``.

32 bio_file_class : ``class``

33 The class that should be used to return the files.

34 This can be :py:class:`bob.bio.base.database.BioFile`, :py:class:`bob.bio.spear.database.AudioBioFile`, :py:class:`bob.bio.face.database.FaceBioFile`, or anything similar.

36 original_directory : str or ``None``

37 The directory, where the original data can be found.

39 original_extension : str or [str] or ``None``

40 The filename extension of the original data, or multiple extensions.

42 annotation_directory : str or ``None``

43 The directory, where additional annotation files can be found.

45 annotation_extension : str or ``None``

46 The filename extension of the annotation files.

48 annotation_type : str or ``None``

49 The type of annotation that can be read.

50 Currently, options are ``'eyecenter', 'named', 'idiap'``.

51 See :py:func:`read_annotation_file` for details.

53 dev_sub_directory : str or ``None``

54 Specify a custom subdirectory for the filelists of the development set (default is ``'dev'``)

56 eval_sub_directory : str or ``None``

57 Specify a custom subdirectory for the filelists of the development set (default is ``'eval'``)

59 world_filename : str or ``None``

60 Specify a custom filename for the training filelist (default is ``'norm/train_world.lst'``)

62 optional_world_1_filename : str or ``None``

63 Specify a custom filename for the (first optional) training filelist

64 (default is ``'norm/train_optional_world_1.lst'``)

66 optional_world_2_filename : str or ``None``

67 Specify a custom filename for the (second optional) training filelist

68 (default is ``'norm/train_optional_world_2.lst'``)

70 models_filename : str or ``None``

71 Specify a custom filename for the model filelists (default is ``'for_models.lst'``)

73 probes_filename : str or ``None``

74 Specify a custom filename for the probes filelists (default is ``'for_probes.lst'``)

76 scores_filename : str or ``None``

77 Specify a custom filename for the scores filelists (default is ``'for_scores.lst'``)

79 tnorm_filename : str or ``None``

80 Specify a custom filename for the T-norm scores filelists (default is ``'for_tnorm.lst'``)

82 znorm_filename : str or ``None``

83 Specify a custom filename for the Z-norm scores filelists (default is ``'for_znorm.lst'``)

85 use_dense_probe_file_list : bool or None

86 Specify which list to use among ``probes_filename`` (dense) or ``scores_filename``.

87 If ``None`` it is tried to be estimated based on the given parameters.

89 keep_read_lists_in_memory : bool

90 If set to ``True`` (the default), the lists are read only once and stored in memory.

91 Otherwise the lists will be re-read for every query (not recommended).

92 """

94 def __init__(

95 self,

96 filelists_directory,

97 name,

98 protocol=None,

99 bio_file_class=BioFile,

100 original_directory=None,

101 original_extension=None,

102 annotation_directory=None,

103 annotation_extension=".json",

104 annotation_type="json",

105 dev_sub_directory=None,

106 eval_sub_directory=None,

107 world_filename=None,

108 optional_world_1_filename=None,

109 optional_world_2_filename=None,

110 models_filename=None,

111 # For probing, use ONE of the two score file lists:

112 probes_filename=None, # File containing the probe files -> dense model/probe score matrix

113 scores_filename=None, # File containing list of model and probe files -> sparse model/probe score matrix

114 # For ZT-Norm:

115 tnorm_filename=None,

116 znorm_filename=None,

117 use_dense_probe_file_list=None,

118 # if both probe_filename and scores_filename is given, what kind of list should be used?

119 keep_read_lists_in_memory=True,

120 # if set to True (the RECOMMENDED default) lists are read only once and stored in memory.

121 **kwargs,

122 ):

123 """Initializes the database with the file lists from the given base directory,

124 and the given sub-directories and file names (which default to useful values if not given).

125 """

126

127 super(FileListBioDatabase, self).__init__(

128 name=name,

129 protocol=protocol,

130 original_directory=original_directory,

131 original_extension=original_extension,

132 annotation_directory=annotation_directory,

133 annotation_extension=annotation_extension,

134 annotation_type=annotation_type,

135 **kwargs,

136 )

137 # extra args for pretty printing

138 self._kwargs.update(

139 dict(

140 filelists_directory=filelists_directory,

141 dev_sub_directory=dev_sub_directory,

142 eval_sub_directory=eval_sub_directory,

143 world_filename=world_filename,

144 optional_world_1_filename=optional_world_1_filename,

145 optional_world_2_filename=optional_world_2_filename,

146 models_filename=models_filename,

147 probes_filename=probes_filename,

148 scores_filename=scores_filename,

149 tnorm_filename=tnorm_filename,

150 znorm_filename=znorm_filename,

151 use_dense_probe_file_list=use_dense_probe_file_list,

152 # if both probe_filename and scores_filename are given, what kind

153 # of list should be used?

154 keep_read_lists_in_memory=keep_read_lists_in_memory,

155 )

156 )

157 # self.original_directory = original_directory

158 # self.original_extension = original_extension

159 self.bio_file_class = bio_file_class

160 self.keep_read_lists_in_memory = keep_read_lists_in_memory

161 self.list_readers = {}

162

163 self.m_base_dir = os.path.abspath(filelists_directory)

164 if not os.path.isdir(self.m_base_dir):

165 raise RuntimeError(

166 "Invalid directory specified %s." % (self.m_base_dir)

167 )

168

169 # sub-directories for dev and eval set:

170 self.m_dev_subdir = (

171 dev_sub_directory if dev_sub_directory is not None else "dev"

172 )

173 self.m_eval_subdir = (

174 eval_sub_directory if eval_sub_directory is not None else "eval"

175 )

176

177 # training list: format: filename client_id

178 self.m_world_filename = (

179 world_filename

180 if world_filename is not None

181 else os.path.join("norm", "train_world.lst")

182 )

183 # optional training list 1: format: filename client_id

184 self.m_optional_world_1_filename = (

185 optional_world_1_filename

186 if optional_world_1_filename is not None

187 else os.path.join("norm", "train_optional_world_1.lst")

188 )

189 # optional training list 2: format: filename client_id

190 self.m_optional_world_2_filename = (

191 optional_world_2_filename

192 if optional_world_2_filename is not None

193 else os.path.join("norm", "train_optional_world_2.lst")

194 )

195 # model list: format: filename model_id client_id

196 self.m_models_filename = (

197 models_filename if models_filename is not None else "for_models.lst"

198 )

199 # scores list: format: filename model_id claimed_client_id client_id

200 self.m_scores_filename = (

201 scores_filename if scores_filename is not None else "for_scores.lst"

202 )

203 # probe list: format: filename client_id

204 self.m_probes_filename = (

205 probes_filename if probes_filename is not None else "for_probes.lst"

206 )

207 # T-Norm models format: filename model_id client_id

208 self.m_tnorm_filename = (

209 tnorm_filename if tnorm_filename is not None else "for_tnorm.lst"

210 )

211 # Z-Norm files format: filename client_id

212 self.m_znorm_filename = (

213 znorm_filename if znorm_filename is not None else "for_znorm.lst"

214 )

215

216 self.m_use_dense_probe_file_list = use_dense_probe_file_list

217

218 def _list_reader(self, protocol):

219 if protocol not in self.list_readers:

220 if protocol is not None:

221 protocol_dir = os.path.join(self.get_base_directory(), protocol)

222 if not os.path.isdir(protocol_dir):

223 raise ValueError(

224 "The directory %s for the given protocol '%s' does not exist"

225 % (protocol_dir, protocol)

226 )

227 self.list_readers[protocol] = ListReader(

228 self.keep_read_lists_in_memory

229 )

230

231 return self.list_readers[protocol]

232

233 def _make_bio(self, files):

234 return [

235 self.bio_file_class(

236 client_id=f.client_id, path=f.path, file_id=f.id

237 )

238 for f in files

239 ]

240

241 def all_files(self, groups=["dev"], add_zt_files=True):

242 """Returns all files for the given group. The internally stored protocol is used, throughout.

243

244 Parameters

245 ----------

246

247 groups : [str]

248 A list of groups to retrieve the files for.

249

250 add_zt_files : bool

251 If selected, also files for ZT-norm scoring will be added.

252 Please select this option only if this dataset provides ZT-norm files, see :py:meth:`implements_zt`.

253

254 Returns

255 -------

256

257 [BioFile]

258 A list of all files that fulfill your query.

259 """

260 files = self.objects(groups, self.protocol, **self.all_files_options)

261 # add all files that belong to the ZT-norm

262 for group in groups:

263 if group == "world":

264 continue

265 if add_zt_files:

266 if self.implements_zt(self.protocol, group):

267 files += self.tobjects(group, self.protocol)

268 files += self.zobjects(

269 group, self.protocol, **self.z_probe_options

270 )

271 else:

272 logger.warning(

273 "ZT score files are requested, but no such files are defined in group %s for protocol %s",

274 group,

275 self.protocol,

276 )

277

278 return self.sort(self._make_bio(files))

279

280 def groups(self, protocol=None, add_world=True, add_subworld=True):

281 """This function returns the list of groups for this database.

282

283 Parameters

284 ----------

285

286 protocol : str or ``None``

287 The protocol for which the groups should be retrieved.

288 If ``None``, the internally stored protocol is used.

289

290 add_world : bool

291 Add the world groups?

292

293 add_subworld : bool

294 Add the sub-world groups? Only valid, when ``add_world=True``

295

296 Returns

297 -------

298

299 [str]

300 A list of groups

301 """

302 groups = []

303 protocol = protocol or self.protocol

304 if protocol is not None:

305 if os.path.isdir(

306 os.path.join(

307 self.get_base_directory(), protocol, self.m_dev_subdir

308 )

309 ):

310 groups.append("dev")

311 if os.path.isdir(

312 os.path.join(

313 self.get_base_directory(), protocol, self.m_eval_subdir

314 )

315 ):

316 groups.append("eval")

317 if add_world:

318 if os.path.isfile(

319 os.path.join(

320 self.get_base_directory(),

321 protocol,

322 self.m_world_filename,

323 )

324 ):

325 groups.append("world")

326 if add_world and add_subworld:

327 if os.path.isfile(

328 os.path.join(

329 self.get_base_directory(),

330 protocol,

331 self.m_optional_world_1_filename,

332 )

333 ):

334 groups.append("optional_world_1")

335 if os.path.isfile(

336 os.path.join(

337 self.get_base_directory(),

338 protocol,

339 self.m_optional_world_2_filename,

340 )

341 ):

342 groups.append("optional_world_2")

343 else:

344 if os.path.isdir(

345 os.path.join(self.get_base_directory(), self.m_dev_subdir)

346 ):

347 groups.append("dev")

348 if os.path.isdir(

349 os.path.join(self.get_base_directory(), self.m_eval_subdir)

350 ):

351 groups.append("eval")

352 if add_world:

353 if os.path.isfile(

354 os.path.join(

355 self.get_base_directory(), self.m_world_filename

356 )

357 ):

358 groups.append("world")

359 if add_world and add_subworld:

360 if os.path.isfile(

361 os.path.join(

362 self.get_base_directory(),

363 self.m_optional_world_1_filename,

364 )

365 ):

366 groups.append("optional_world_1")

367 if os.path.isfile(

368 os.path.join(

369 self.get_base_directory(),

370 self.m_optional_world_2_filename,

371 )

372 ):

373 groups.append("optional_world_2")

374 return groups

375

376 def implements_zt(self, protocol=None, groups=None):

377 """Checks if the file lists for the ZT score normalization are available.

378

379 Parameters

380 ----------

381

382 protocol : str or ``None``

383 The protocol for which the groups should be retrieved.

384

385 groups : str or [str] or ``None``

386 The groups for which the ZT score normalization file lists should be checked ``('dev', 'eval')``.

387

388 Returns

389 -------

390

391 bool

392 ``True`` if the all file lists for ZT score normalization exist, otherwise ``False``.

393 """

394 protocol = protocol or self.protocol

395 groups = check_parameters_for_validity(

396 groups, "group", self.groups(protocol, add_world=False)

397 )

398

399 for group in groups:

400 for t in ["for_tnorm", "for_znorm"]:

401 if not os.path.exists(self._get_list_file(group, t, protocol)):

402 return False

403 # all files exist

404 return True

405

406 def uses_dense_probe_file(self, protocol):

407 """Determines if a dense probe file list is used based on the existence of parameters."""

408 # return, whatever was specified in constructor, if not None

409 if self.m_use_dense_probe_file_list is not None:

410 return self.m_use_dense_probe_file_list

411

412 # check the existence of the files

413 probes = True

414 scores = True

415 for group in self.groups(protocol, add_world=False):

416 probes = probes and os.path.exists(

417 self._get_list_file(group, type="for_probes", protocol=protocol)

418 )

419 scores = scores and os.path.exists(

420 self._get_list_file(group, type="for_scores", protocol=protocol)

421 )

422 # decide, which score files are available

423 if probes and not scores:

424 return True

425 if not probes and scores:

426 return False

427 raise ValueError(

428 "Unable to determine, which way of probing should be used. Please specify."

429 )

430

431 def get_base_directory(self):

432 """Returns the base directory where the filelists defining the database

433 are located."""

434 return self.m_base_dir

435

436 def set_base_directory(self, filelists_directory):

437 """Resets the base directory where the filelists defining the database

438 are located."""

439 self.m_base_dir = filelists_directory

440 if not os.path.isdir(self.filelists_directory):

441 raise RuntimeError(

442 "Invalid directory specified %s." % (self.filelists_directory)

443 )

444

445 def _get_list_file(self, group, type=None, protocol=None):

446 if protocol:

447 base_directory = os.path.join(self.get_base_directory(), protocol)

448 else:

449 base_directory = self.get_base_directory()

450 if group == "world":

451 return os.path.join(base_directory, self.m_world_filename)

452 elif group == "optional_world_1":

453 return os.path.join(

454 base_directory, self.m_optional_world_1_filename

455 )

456 elif group == "optional_world_2":

457 return os.path.join(

458 base_directory, self.m_optional_world_2_filename

459 )

460 else:

461 group_dir = (

462 self.m_dev_subdir if group == "dev" else self.m_eval_subdir

463 )

464 list_name = {

465 "for_models": self.m_models_filename,

466 "for_probes": self.m_probes_filename,

467 "for_scores": self.m_scores_filename,

468 "for_tnorm": self.m_tnorm_filename,

469 "for_znorm": self.m_znorm_filename,

470 }[type]

471 return os.path.join(base_directory, group_dir, list_name)

472

473 def client_id_from_model_id(self, model_id, group="dev"):

474 """Returns the client id that is connected to the given model id.

475

476 Parameters

477 ----------

478

479 model_id : str or ``None``

480 The model id for which the client id should be returned.

481

482 groups : str or [str] or ``None``

483 (optional) the groups, the client belongs to.

484 Might be one or more of ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.

485 If groups are given, only these groups are considered.

486

487 protocol : str or ``None``

488 The protocol to consider.

489

490 Returns

491 -------

492

493 str

494 The client id for the given model id, if found.

495 """

496 protocol = self.protocol

497 groups = check_parameters_for_validity(

498 group,

499 "group",

500 self.groups(protocol),

501 default_parameters=self.groups(protocol, add_subworld=False),

502 )

503

504 for group in groups:

505 model_dict = self._list_reader(protocol).read_models(

506 self._get_list_file(group, "for_models", protocol),

507 group,

508 "for_models",

509 )

510 if model_id in model_dict:

511 return model_dict[model_id]

512

513 raise ValueError(

514 "The given model id '%s' cannot be found in one of the groups '%s'"

515 % (model_id, groups)

516 )

517

518 def client_id_from_t_model_id(self, t_model_id, group="dev"):

519 """Returns the client id that is connected to the given T-Norm model id.

520

521 Parameters

522 ----------

523

524 model_id : str or ``None``

525 The model id for which the client id should be returned.

526

527 groups : str or [str] or ``None``

528 (optional) the groups, the client belongs to.

529 Might be one or more of ``('dev', 'eval')``.

530 If groups are given, only these groups are considered.

531

532 Returns

533 -------

534

535 str

536 The client id for the given model id of a T-Norm model, if found.

537 """

538 protocol = self.protocol

539 groups = check_parameters_for_validity(

540 group, "group", self.groups(protocol, add_world=False)

541 )

542

543 for group in groups:

544 model_dict = self._list_reader(protocol).read_models(

545 self._get_list_file(group, "for_tnorm", protocol),

546 group,

547 "for_tnorm",

548 )

549 if t_model_id in model_dict:

550 return model_dict[t_model_id]

551

552 raise ValueError(

553 "The given T-norm model id '%s' cannot be found in one of the groups '%s'"

554 % (t_model_id, groups)

555 )

556

557 def __client_id_list__(self, groups, type, protocol=None):

558 ids = set()

559 protocol = protocol or self.protocol

560 # read all lists for all groups and extract the model ids

561 for group in groups:

562 files = self._list_reader(protocol).read_list(

563 self._get_list_file(group, type, protocol), group, type

564 )

565 for file in files:

566 ids.add(file.client_id)

567 return ids

568

569 def client_ids(self, protocol=None, groups=None):

570 """Returns a list of client ids for the specific query by the user.

571

572 Parameters

573 ----------

574

575 protocol : str or ``None``

576 The protocol to consider

577

578 groups : str or [str] or ``None``

579 The groups to which the clients belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.

580

581 Returns

582 -------

583

584 [str]

585 A list containing all the client ids which have the given properties.

586 """

587

588 protocol = protocol or self.protocol

589 groups = check_parameters_for_validity(

590 groups,

591 "group",

592 self.groups(protocol),

593 default_parameters=self.groups(protocol, add_subworld=False),

594 )

595

596 return self.__client_id_list__(groups, "for_models", protocol)

597

598 def tclient_ids(self, protocol=None, groups=None):

599 """Returns a list of T-Norm client ids for the specific query by the user.

600

601 Parameters

602 ----------

603

604 protocol : str or ``None``

605 The protocol to consider

606

607 groups : str or [str] or ``None``

608 The groups to which the clients belong ("dev", "eval").

609

610 Returns

611 -------

612

613 [str]

614 A list containing all the T-Norm client ids which have the given properties.

615 """

616

617 protocol = protocol or self.protocol

618 groups = check_parameters_for_validity(

619 groups, "group", self.groups(protocol, add_world=False)

620 )

621

622 return self.__client_id_list__(groups, "for_tnorm", protocol)

623

624 def zclient_ids(self, protocol=None, groups=None):

625 """Returns a list of Z-Norm client ids for the specific query by the user.

626

627 Parameters

628 ----------

629

630 protocol : str or ``None``

631 The protocol to consider

632

633 groups : str or [str] or ``None``

634 The groups to which the clients belong ("dev", "eval").

635

636 Returns

637 -------

638

639 [str]

640 A list containing all the Z-Norm client ids which have the given properties.

641 """

642

643 protocol = protocol or self.protocol

644 groups = check_parameters_for_validity(

645 groups, "group", self.groups(protocol, add_world=False)

646 )

647

648 return self.__client_id_list__(groups, "for_znorm", protocol)

649

650 def __model_id_list__(self, groups, type, protocol=None):

651 ids = set()

652 protocol = protocol or self.protocol

653 # read all lists for all groups and extract the model ids

654 for group in groups:

655 dict = self._list_reader(protocol).read_models(

656 self._get_list_file(group, type, protocol), group, type

657 )

658 ids.update(dict.keys())

659 return list(ids)

660

661 def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs):

662 """Returns a list of model ids for the specific query by the user.

663

664 Parameters

665 ----------

666

667 protocol : str or ``None``

668 The protocol to consider

669

670 groups : str or [str] or ``None``

671 The groups to which the models belong ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')``.

672

673 Returns

674 -------

675

676 [str]

677 A list containing all the model ids which have the given properties.

678 """

679 protocol = protocol or self.protocol

680 groups = check_parameters_for_validity(

681 groups, "group", self.groups(protocol=protocol)

682 )

683

684 return self.__model_id_list__(groups, "for_models", protocol)

685

686 def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs):

687 """Returns a list of T-Norm model ids for the specific query by the user.

688

689 Parameters

690 ----------

691

692 protocol : str or ``None``

693 The protocol to consider

694

695 groups : str or [str] or ``None``

696 The groups to which the models belong ``('dev', 'eval')``.

697

698 Returns

699 -------

700

701 [str]

702 A list containing all the T-Norm model ids belonging to the given group.

703 """

704 protocol = protocol or self.protocol

705 groups = check_parameters_for_validity(

706 groups, "group", self.groups(protocol, add_world=False)

707 )

708

709 return self.__model_id_list__(groups, "for_tnorm", protocol)

710

711 def objects(

712 self,

713 groups=None,

714 protocol=None,

715 purposes=None,

716 model_ids=None,

717 classes=None,

718 **kwargs,

719 ):

720 """Returns a set of :py:class:`bob.bio.base.database.BioFile` objects for the specific query by the user.

721

722 Parameters

723 ----------

724

725 protocol : str or ``None``

726 The protocol to consider

727

728 purposes : str or [str] or ``None``

729 The purposes required to be retrieved ``('enroll', 'probe')`` or a tuple

730 with several of them. If ``None`` is given (this is the default), it is

731 considered the same as a tuple with all possible values. This field is

732 ignored for the data from the ``'world', 'optional_world_1', 'optional_world_2'`` groups.

733

734 model_ids : str or [str] or ``None``

735 Only retrieves the files for the provided list of model ids (claimed

736 client id). If ``None`` is given (this is the default), no filter over

737 the model_ids is performed.

738

739 groups : str or [str] or ``None``

740 One of the groups ``('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')`` or a tuple with several of them.

741 If ``None`` is given (this is the default), it is considered to be the existing subset of ``('world', 'dev', 'eval')``.

742

743 classes : str or [str] or ``None``

744 The classes (types of accesses) to be retrieved ``('client', 'impostor')``

745 or a tuple with several of them. If ``None`` is given (this is the

746 default), it is considered the same as a tuple with all possible values.

747

748 .. note::

749 Classes are not allowed to be specified when 'probes_filename' is used in the constructor.

750

751 Returns

752 -------

753

754 [BioFile]

755 A list of :py:class:`BioFile` objects considering all the filtering criteria.

756 """

757

758 protocol = protocol or self.protocol

759 if self.uses_dense_probe_file(protocol) and classes is not None:

760 raise ValueError(

761 "To be able to use the 'classes' keyword, please use the 'for_scores.lst' list file."

762 )

763

764 purposes = check_parameters_for_validity(

765 purposes, "purpose", ("enroll", "probe")

766 )

767 groups = check_parameters_for_validity(

768 groups,

769 "group",

770 self.groups(protocol),

771 default_parameters=self.groups(protocol, add_subworld=False),

772 )

773 classes = check_parameters_for_validity(

774 classes, "class", ("client", "impostor")

775 )

776

777 if isinstance(model_ids, str):

778 model_ids = (model_ids,)

779

780 # first, collect all the lists that we want to process

781 lists = []

782 probe_lists = []

783 if "world" in groups:

784 lists.append(

785 self._list_reader(protocol).read_list(

786 self._get_list_file("world", protocol=protocol), "world"

787 )

788 )

789 if "optional_world_1" in groups:

790 lists.append(

791 self._list_reader(protocol).read_list(

792 self._get_list_file("optional_world_1", protocol=protocol),

793 "optional_world_1",

794 )

795 )

796 if "optional_world_2" in groups:

797 lists.append(

798 self._list_reader(protocol).read_list(

799 self._get_list_file("optional_world_2", protocol=protocol),

800 "optional_world_2",

801 )

802 )

803

804 for group in ("dev", "eval"):

805 if group in groups:

806 if "enroll" in purposes:

807 lists.append(

808 self._list_reader(protocol).read_list(

809 self._get_list_file(

810 group, "for_models", protocol=protocol

811 ),

812 group,

813 "for_models",

814 )

815 )

816 if "probe" in purposes:

817 if self.uses_dense_probe_file(protocol):

818 probe_lists.append(

819 self._list_reader(protocol).read_list(

820 self._get_list_file(

821 group, "for_probes", protocol=protocol

822 ),

823 group,

824 "for_probes",

825 )

826 )

827 else:

828 probe_lists.append(

829 self._list_reader(protocol).read_list(

830 self._get_list_file(

831 group, "for_scores", protocol=protocol

832 ),

833 group,

834 "for_scores",

835 )

836 )

837

838 # now, go through the lists and filter the elements

839

840 # remember the file ids that are already in the list

841 file_ids = set()

842 retval = []

843

844 # non-probe files; just filter by model id

845 for list in lists:

846 for file in list:

847 # check if we already have this file

848 if file.id not in file_ids:

849 if model_ids is None or file._model_id in model_ids:

850 file_ids.add(file.id)

851 retval.append(file)

852

853 # probe files; filter by model id and by class

854 for list in probe_lists:

855 if self.uses_dense_probe_file(protocol):

856 # dense probing is used; do not filter over the model ids and not over the classes

857 # -> just add all probe files

858 for file in list:

859 if file.id not in file_ids:

860 file_ids.add(file.id)

861 retval.append(file)

862

863 else:

864 # sparse probing is used; filter over model ids and over the classes

865 for file in list:

866 # filter by model id

867 if model_ids is None or file._model_id in model_ids:

868 # filter by class

869 if (

870 "client" in classes

871 and file.client_id == file.claimed_id

872 ) or (

873 "impostor" in classes

874 and file.client_id != file.claimed_id

875 ):

876 # check if we already have this file

877 if file.id not in file_ids:

878 file_ids.add(file.id)

879 retval.append(file)

880

881 return self._make_bio(retval)

882

883 def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs):

884 """Returns a list of :py:class:`bob.bio.base.database.BioFile` objects for enrolling T-norm models for score normalization.

885

886 Parameters

887 ----------

888

889 protocol : str or ``None``

890 The protocol to consider

891

892 model_ids : str or [str] or ``None``

893 Only retrieves the files for the provided list of model ids (claimed

894 client id). If ``None`` is given (this is the default), no filter over

895 the model_ids is performed.

896

897 groups : str or [str] or ``None``

898 The groups to which the models belong ``('dev', 'eval')``.

899

900 Returns

901 -------

902

903 [BioFile]

904 A list of :py:class:`BioFile` objects considering all the filtering criteria.

905 """

906 protocol = protocol or self.protocol

907 groups = check_parameters_for_validity(

908 groups, "group", self.groups(protocol, add_world=False)

909 )

910

911 if isinstance(model_ids, str):

912 model_ids = (model_ids,)

913

914 # iterate over the lists and extract the files

915 # we assume that there is no duplicate file here...

916 retval = []

917 for group in groups:

918 for file in self._list_reader(protocol).read_list(

919 self._get_list_file(group, "for_tnorm", protocol),

920 group,

921 "for_tnorm",

922 ):

923 if model_ids is None or file._model_id in model_ids:

924 retval.append(file)

925

926 return self._make_bio(retval)

927

928 def zobjects(self, groups=None, protocol=None, **kwargs):

929 """Returns a list of :py:class:`BioFile` objects to perform Z-norm score normalization.

930

931 Parameters

932 ----------

933

934 protocol : str or ``None``

935 The protocol to consider

936

937 groups : str or [str] or ``None``

938 The groups to which the clients belong ``('dev', 'eval')``.

939

940 Returns

941 -------

942

943 [BioFile]

944 A list of File objects considering all the filtering criteria.

945 """

946

947 protocol = protocol or self.protocol

948 groups = check_parameters_for_validity(

949 groups, "group", self.groups(protocol, add_world=False)

950 )

951

952 # iterate over the lists and extract the files

953 # we assume that there is no duplicate file here...

954 retval = []

955 for group in groups:

956 retval.extend(

957 [

958 file

959 for file in self._list_reader(protocol).read_list(

960 self._get_list_file(group, "for_znorm", protocol),

961 group,

962 "for_znorm",

963 )

964 ]

965 )

966

967 return self._make_bio(retval)

968

969 def annotations(self, file):

970 """Reads the annotations for the given file id from file and returns them in a dictionary.

971

972 Parameters

973 ----------

974

975 file : BioFile

976 The BioFile object for which the annotations should be read.

977

978 Returns

979 -------

980

981 dict

982 The annotations as a dictionary, e.g.: ``{'reye':(re_y,re_x), 'leye':(le_y,le_x)}``

983 """

984 if self.annotation_directory is None:

985 return None

986

987 # since the file id is equal to the file name, we can simply use it

988 annotation_file = os.path.join(

989 self.annotation_directory, file.id + self.annotation_extension

990 )

991

992 # return the annotations as read from file

993 return read_annotation_file(annotation_file, self.annotation_type)

994

995 def original_file_name(self, file, check_existence=True):

996 """Returns the original file name of the given file.

997

998 This interface supports several original extensions, so that file lists can contain images

999 of different data types.

1000

1001 When multiple original extensions are specified, this function will check the existence of any of

1002 these file names, and return the first one that actually exists.

1003 In this case, the ``check_existence`` flag is ignored.

1004

1005 Parameters

1006 ----------

1007

1008 file : BioFile

1009 The BioFile object for which the file name should be returned.

1010

1011 check_existence : bool

1012 Should the existence of the original file be checked?

1013 (Ignored when multiple original extensions were specified in the constructor.)

1014

1015 Returns

1016 -------

1017

1018 str

1019 The full path of the original data file.

1020 """

1021

1022 if isinstance(self.original_extension, str):

1023 # extract file name

1024 file_name = file.make_path(

1025 self.original_directory, self.original_extension

1026 )

1027 if not check_existence or os.path.exists(file_name):

1028 return file_name

1029

1030 # check all registered extensions

1031 for extension in self.original_extension:

1032 file_name = file.make_path(self.original_directory, extension)

1033 if os.path.exists(file_name):

1034 return file_name

1035

1036 # None of the extensions matched

1037 raise IOError(

1038 "File '%s' does not exist with any of the extensions '%s'"

1039 % (

1040 file.make_path(self.original_directory, None),

1041 self.original_extension,

1042 )

1043 )