Coverage for src/bob/bio/base/score/load.py: 78%

189 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-12 22:34 +0200

1#!/usr/bin/env python 

2# vim: set fileencoding=utf-8 : 

3# Mon 23 May 2011 16:23:05 CEST 

4 

5"""A set of utilities to load score files with different formats.""" 

6 

7import csv 

8import logging 

9import os 

10import tarfile 

11 

12from collections import defaultdict 

13from pathlib import Path 

14 

15import dask.dataframe 

16import numpy 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21def iscsv(filename): 

22 return ".csv" in Path(filename).suffixes 

23 

24 

25def open_file(filename, mode="rt"): 

26 """Opens the given score file for reading. 

27 

28 Score files might be raw text files, or a tar-file including a single score 

29 file inside. 

30 

31 

32 Parameters: 

33 

34 filename (:py:class:`str`, ``file-like``): The name of the score file to 

35 open, or a file-like object open for reading. If a file name is given, 

36 the according file might be a raw text file or a (compressed) tar file 

37 containing a raw text file. 

38 

39 

40 Returns: 

41 

42 

43 ``file-like``: A read-only file-like object as it would be returned by 

44 :py:func:`open`. 

45 

46 """ 

47 

48 if not isinstance(filename, str) and hasattr(filename, "read"): 

49 # It seems that this is an open file 

50 return filename 

51 

52 if not os.path.isfile(filename): 

53 raise IOError("Score file '%s' does not exist." % filename) 

54 if not tarfile.is_tarfile(filename): 

55 return open(filename, mode) 

56 

57 # open the tar file for reading 

58 tar = tarfile.open(filename, "r") 

59 # get the first file in the tar file 

60 tar_info = tar.next() 

61 while tar_info is not None and not tar_info.isfile(): 

62 tar_info = tar.next() 

63 # check that one file was found in the archive 

64 if tar_info is None: 

65 raise IOError( 

66 "The given file is a .tar file, but it does not contain any file." 

67 ) 

68 

69 # open the file for reading 

70 return tar.extractfile(tar_info) 

71 

72 

73def four_column(filename): 

74 """Loads a score set from a single file and yield its lines 

75 

76 Loads a score set from a single file and yield its lines (to avoid loading 

77 the score file at once into memory). This function verifies that all fields 

78 are correctly placed and contain valid fields. The score file must contain 

79 the following information in each line: 

80 

81 .. code-block:: text 

82 

83 claimed_id real_id test_label score 

84 

85 

86 Parameters: 

87 

88 filename (:py:class:`str`, ``file-like``): The file object that will be 

89 opened with :py:func:`open_file` containing the scores. 

90 

91 

92 Yields: 

93 

94 str: The claimed identity -- the client name of the model that was used in 

95 the comparison 

96 

97 str: The real identity -- the client name of the probe that was used in 

98 the comparison 

99 

100 str: A label of the probe -- usually the probe file name, or the probe id 

101 

102 float: The result of the comparison of the model and the probe 

103 

104 """ 

105 return _iterate_score_file(filename) 

106 

107 

108def split_four_column(filename): 

109 """Loads a score set from a single file and splits the scores 

110 

111 Loads a score set from a single file and splits the scores between negatives 

112 and positives. The score file has to respect the 4 column format as defined 

113 in the method :py:func:`four_column`. 

114 

115 This method avoids loading and allocating memory for the strings present in 

116 the file. We only keep the scores. 

117 

118 

119 Parameters: 

120 

121 filename (:py:class:`str`, ``file-like``): The file object that will be 

122 opened with :py:func:`open_file` containing the scores. 

123 

124 

125 Returns: 

126 

127 array: negatives, 1D float array containing the list of scores, for which 

128 the ``claimed_id`` and the ``real_id`` are different 

129 (see :py:func:`four_column`) 

130 

131 array: positives, 1D float array containing the list of scores, for which 

132 the ``claimed_id`` and the ``real_id`` are identical 

133 (see :py:func:`four_column`) 

134 

135 """ 

136 

137 score_lines = four_column(filename) 

138 return _split_scores(score_lines, 1) 

139 

140 

141def get_split_dataframe(filename): 

142 """Loads a score set that was written with :any:`bob.bio.base.pipelines.CSVScoreWriter` 

143 

144 Returns two dataframes, split between positives and negatives. 

145 

146 Parameters 

147 ---------- 

148 

149 filename (:py:class:`str`, ``file-like``): The file object that will be 

150 opened with :py:func:`open_file` containing the scores. 

151 

152 Returns 

153 ------- 

154 

155 dataframe: negatives, contains the list of scores (and metadata) for which 

156 the fields of the ``bio_ref_subject_id`` and ``probe_subject_id`` 

157 columns are different. (see 

158 :ref:`bob.bio.base.pipeline_simple_advanced_features`) 

159 

160 dataframe: positives, contains the list of scores (and metadata) for which 

161 the fields of the ``bio_ref_subject_id`` and ``probe_subject_id`` 

162 columns are identical. (see 

163 :ref:`bob.bio.base.pipeline_simple_advanced_features`) 

164 

165 """ 

166 df = dask.dataframe.read_csv( 

167 filename, dtype=defaultdict(lambda: str, {"score": float}) 

168 ) 

169 

170 genuines = df[df.probe_subject_id == df.bio_ref_subject_id] 

171 impostors = df[df.probe_subject_id != df.bio_ref_subject_id] 

172 

173 return impostors, genuines 

174 

175 

176def split_csv_scores(filename, score_column: str = "score"): 

177 """Loads a score set that was written with :any:`bob.bio.base.pipelines.CSVScoreWriter` 

178 

179 Parameters 

180 ---------- 

181 

182 filename (:py:class:`str`, ``file-like``): The file object that will be 

183 opened with :py:func:`open_file` containing the scores. 

184 

185 score_column: The CSV column that contains the score values. 

186 

187 Returns 

188 ------- 

189 

190 array: negatives, 1D float array containing the list of scores, for which 

191 the fields of the ``bio_ref_subject_id`` and ``probe_subject_id`` 

192 columns are different. (see 

193 :ref:`bob.bio.base.pipeline_simple_advanced_features`) 

194 

195 array: positives, 1D float array containing the list of scores, for which 

196 the fields of the ``bio_ref_subject_id`` and ``probe_subject_id`` 

197 columns are identical. (see 

198 :ref:`bob.bio.base.pipeline_simple_advanced_features`) 

199 

200 """ 

201 df = dask.dataframe.read_csv( 

202 filename, dtype=defaultdict(lambda: str, {"score": float}) 

203 ) 

204 

205 genuines = df[df.probe_subject_id == df.bio_ref_subject_id] 

206 impostors = df[df.probe_subject_id != df.bio_ref_subject_id] 

207 

208 return ( 

209 impostors[score_column].to_dask_array().compute(), 

210 genuines[score_column].to_dask_array().compute(), 

211 ) 

212 

213 

214def cmc_four_column(filename): 

215 """Loads scores to compute CMC curves from a file in four column format. 

216 

217 The four column file needs to be in the same format as described in 

218 :py:func:`four_column`, and the ``test_label`` (column 3) has to contain the 

219 test/probe file name or a probe id. 

220 

221 This function returns a list of tuples. For each probe file, the tuple 

222 consists of a list of negative scores and a list of positive scores. 

223 Usually, the list of positive scores should contain only one element, but 

224 more are allowed. The result of this function can directly be passed to, 

225 e.g., the :py:func:`bob.measure.cmc` function. 

226 

227 

228 Parameters: 

229 

230 filename (:py:class:`str`, ``file-like``): The file object that will be 

231 opened with :py:func:`open_file` containing the scores. 

232 

233 

234 Returns: 

235 

236 :any:`list`: A list of tuples, where each tuple contains the 

237 ``negative`` and ``positive`` scores for one probe of the database. Both 

238 ``negatives`` and ``positives`` can be either an 1D 

239 :py:class:`numpy.ndarray` of type ``float``, or ``None``. 

240 

241 """ 

242 

243 score_lines = four_column(filename) 

244 return _split_cmc_scores(score_lines, 1) 

245 

246 

247def five_column(filename): 

248 """Loads a score set from a single file and yield its lines 

249 

250 Loads a score set from a single file and yield its lines (to avoid loading 

251 the score file at once into memory). This function verifies that all fields 

252 are correctly placed and contain valid fields. The score file must contain 

253 the following information in each line: 

254 

255 .. code-block:: text 

256 

257 claimed_id model_label real_id test_label score 

258 

259 

260 Parameters: 

261 

262 filename (:py:class:`str`, ``file-like``): The file object that will be 

263 opened with :py:func:`open_file` containing the scores. 

264 

265 

266 Yields: 

267 

268 str: The claimed identity -- the client name of the model that was used in 

269 the comparison 

270 

271 str: A label for the model -- usually the model file name, or the model id 

272 

273 str: The real identity -- the client name of the probe that was used in 

274 the comparison 

275 

276 str: A label of the probe -- usually the probe file name, or the probe id 

277 

278 float: The result of the comparison of the model and the probe 

279 

280 """ 

281 

282 return _iterate_score_file(filename) 

283 

284 

285def split_five_column(filename): 

286 """Loads a score set from a single file and splits the scores 

287 

288 Loads a score set from a single file in five column format and splits the 

289 scores between negatives and positives. The score file has to respect the 5 

290 column format as defined in the method :py:func:`five_column`. 

291 

292 This method avoids loading and allocating memory for the strings present in 

293 the file. We only keep the scores. 

294 

295 

296 Parameters: 

297 

298 filename (:py:class:`str`, ``file-like``): The file object that will be 

299 opened with :py:func:`open_file` containing the scores. 

300 

301 

302 Returns: 

303 

304 array: negatives, 1D float array containing the list of scores, for which 

305 the ``claimed_id`` and the ``real_id`` are different 

306 (see :py:func:`four_column`) 

307 

308 array: positives, 1D float array containing the list of scores, for which 

309 the ``claimed_id`` and the ``real_id`` are identical 

310 (see :py:func:`four_column`) 

311 

312 """ 

313 

314 score_lines = four_column(filename) 

315 return _split_scores(score_lines, 2) 

316 

317 

318def cmc_five_column(filename): 

319 """Loads scores to compute CMC curves from a file in five column format. 

320 

321 The five column file needs to be in the same format as described in 

322 :py:func:`five_column`, and the ``test_label`` (column 4) has to contain the 

323 test/probe file name or a probe id. 

324 

325 This function returns a list of tuples. For each probe file, the tuple 

326 consists of a list of negative scores and a list of positive scores. 

327 Usually, the list of positive scores should contain only one element, but 

328 more are allowed. The result of this function can directly be passed to, 

329 e.g., the :py:func:`bob.measure.cmc` function. 

330 

331 

332 Parameters: 

333 

334 filename (:py:class:`str`, ``file-like``): The file object that will be 

335 opened with :py:func:`open_file` containing the scores. 

336 

337 

338 Returns: 

339 

340 :any:`list`: A list of tuples, where each tuple contains the 

341 ``negative`` and ``positive`` scores for one probe of the database. 

342 

343 """ 

344 score_lines = four_column(filename) 

345 return _split_cmc_scores(score_lines, 2) 

346 

347 

348def scores(filename, ncolumns=None): 

349 """Loads the scores from the given score file and yield its lines. 

350 Depending on the score file format, four or five elements are yielded, see 

351 :py:func:`bob.bio.base.score.load.four_column` and 

352 :py:func:`bob.bio.base.score.load.five_column` for details. 

353 

354 Parameters: 

355 

356 filename: :py:class:`str`, ``file-like``: 

357 The file object that will be opened with :py:func:`open_file` containing 

358 the scores. 

359 

360 ncolumns: any 

361 ignored 

362 

363 Yields: 

364 

365 tuple: 

366 see :py:func:`bob.bio.base.score.load.four_column` or 

367 :py:func:`bob.bio.base.score.load.five_column` 

368 """ 

369 return _iterate_score_file(filename) 

370 

371 

372def split(filename, ncolumns=None, sort=False, csv_score_column: str = "score"): 

373 """Loads the scores from the given score file and splits them into positives 

374 and negatives. 

375 Depending on the score file format, it calls see 

376 :py:func:`bob.bio.base.score.load.split_four_column` and 

377 :py:func:`bob.bio.base.score.load.split_five_column` for details. 

378 

379 Parameters 

380 ---------- 

381 

382 filename : str 

383 The path to the score file. 

384 ncolumns : int or ``None`` 

385 If specified to be ``4`` or ``5``, the score file will be assumed to be 

386 in the given format. If not specified, the score file format will be 

387 estimated automatically 

388 sort : :obj:`bool`, optional 

389 If ``True``, will return sorted negatives and positives 

390 csv_score_column : 

391 When loading a CSV file, specifies the column that holds scores. 

392 

393 Returns 

394 ------- 

395 

396 negatives : 1D :py:class:`numpy.ndarray` of type float 

397 This array contains the list of scores, for which the ``claimed_id`` and 

398 the ``real_id`` are different (see :py:func:`four_column`) 

399 positives : 1D :py:class:`numpy.ndarray` of type float 

400 This array contains the list of scores, for which the ``claimed_id`` and 

401 the ``real_id`` are identical (see :py:func:`four_column`) 

402 """ 

403 if iscsv(filename): 

404 neg, pos = split_csv_scores(filename, score_column=csv_score_column) 

405 else: 

406 ncolumns = _estimate_score_file_format(filename, ncolumns) 

407 if ncolumns == 4: 

408 neg, pos = split_four_column(filename) 

409 else: 

410 assert ncolumns == 5 

411 neg, pos = split_five_column(filename) 

412 

413 if sort: 

414 neg.sort() 

415 pos.sort() 

416 

417 return neg, pos 

418 

419 

420def cmc(filename, ncolumns=None, csv_score_column: str = "score"): 

421 """cmc(filename, ncolumns=None) -> list 

422 

423 Loads scores to compute CMC curves. 

424 

425 Depending on the score file format, it calls see 

426 :py:func:`bob.bio.base.score.load.cmc_four_column` and 

427 `:py:func:`bob.bio.base.score.load.cmc_five_column` for details. 

428 

429 Parameters: 

430 

431 filename (:py:class:`str` or ``file-like``): The file object that will be 

432 opened with :py:func:`open_file` containing the scores. 

433 

434 ncolumns: (:py:class:`int`, Optional): If specified to be ``4`` or ``5``, 

435 the score file will be assumed to be in the given format. If not 

436 specified, the score file format will be estimated automatically 

437 

438 csv_score_column: When loading a CSV file, specifies the column that holds 

439 scores. 

440 

441 Returns: 

442 

443 :any:`list`: [(neg,pos)] A list of tuples, where each tuple contains the 

444 ``negative`` and ``positive`` scores for one probe of the database. 

445 

446 """ 

447 

448 ncolumns = ( 

449 4 

450 if iscsv(filename) 

451 else _estimate_score_file_format(filename, ncolumns) 

452 ) 

453 

454 if ncolumns == 4: 

455 return cmc_four_column(filename) 

456 else: 

457 assert ncolumns == 5 

458 return cmc_five_column(filename) 

459 

460 

461def load_score(filename, ncolumns=None, minimal=False, **kwargs): 

462 """Load scores using numpy.loadtxt and return the data as a numpy array. 

463 

464 Parameters: 

465 

466 filename (:py:class:`str`, ``file-like``): The file object that will be 

467 opened with :py:func:`open_file` containing the scores. 

468 

469 ncolumns (:py:class:`int`, optional): 4, 5 or None (the default), 

470 specifying the number of columns in the score file. If None is provided, 

471 the number of columns will be guessed. 

472 

473 minimal (:py:class:`bool`, optional): If True, only loads ``claimed_id``, 

474 ``real_id``, and ``scores``. 

475 

476 **kwargs: Keyword arguments passed to :py:func:`numpy.genfromtxt` 

477 

478 

479 Returns: 

480 

481 array: An array which contains not only the actual ``score`` but also the 

482 ``claimed_id``, ``real_id``, ``test_label`` and ``['model_label']`` 

483 

484 """ 

485 

486 def convertfunc(x): 

487 return x 

488 

489 ncolumns = _estimate_score_file_format(filename, ncolumns) 

490 

491 usecols = kwargs.pop("usecols", None) 

492 if ncolumns == 4: 

493 names = ("claimed_id", "real_id", "test_label", "score") 

494 converters = {0: convertfunc, 1: convertfunc, 2: convertfunc, 3: float} 

495 if minimal: 

496 usecols = (0, 1, 3) 

497 

498 elif ncolumns == 5: 

499 names = ("claimed_id", "model_label", "real_id", "test_label", "score") 

500 converters = { 

501 0: convertfunc, 

502 1: convertfunc, 

503 2: convertfunc, 

504 3: convertfunc, 

505 4: float, 

506 } 

507 if minimal: 

508 usecols = (0, 2, 4) 

509 else: 

510 raise ValueError("ncolumns of 4 and 5 are supported only.") 

511 

512 score_lines = numpy.genfromtxt( 

513 open_file(filename, mode="rb"), 

514 dtype=None, 

515 names=names, 

516 converters=converters, 

517 invalid_raise=True, 

518 usecols=usecols, 

519 **kwargs, 

520 ) 

521 new_dtype = [] 

522 for name in score_lines.dtype.names[:-1]: 

523 new_dtype.append((name, str(score_lines.dtype[name]).replace("S", "U"))) 

524 new_dtype.append(("score", float)) 

525 score_lines = numpy.array(score_lines, new_dtype) 

526 return score_lines 

527 

528 

529def load_files(filenames, func_load): 

530 """Load a list of score files and return a list of tuples of (neg, pos) 

531 

532 Parameters 

533 ---------- 

534 

535 filenames : :any:`list` 

536 list of file paths 

537 func_load : 

538 function that can read files in the list 

539 

540 Returns 

541 ------- 

542 

543 :any:`list`: [(neg,pos)] A list of tuples, where each tuple contains the 

544 ``negative`` and ``positive`` sceach system/probee. 

545 

546 """ 

547 if filenames is None: 

548 return None 

549 res = [] 

550 for filepath in filenames: 

551 res.append(func_load(filepath)) 

552 return res 

553 

554 

555def get_negatives_positives(score_lines, score_column: str = "score"): 

556 """Take the output of load_score and return negatives and positives. This 

557 function aims to replace split_four_column and split_five_column but takes a 

558 different input. It's up to you to use which one. 

559 """ 

560 

561 pos_mask = score_lines["claimed_id"] == score_lines["real_id"] 

562 positives = score_lines[score_column][pos_mask] 

563 negatives = score_lines[score_column][numpy.logical_not(pos_mask)] 

564 return (negatives, positives) 

565 

566 

567def get_negatives_positives_from_file(filename, **kwargs): 

568 """Loads the scores first efficiently and then calls 

569 get_negatives_positives""" 

570 score_lines = load_score(filename, minimal=True, **kwargs) 

571 return get_negatives_positives(score_lines, score_column="score") 

572 

573 

574def get_negatives_positives_all(score_lines_list, score_column: str = "score"): 

575 """Take a list of outputs of load_score and return stacked negatives and 

576 positives. 

577 """ 

578 

579 negatives, positives = [], [] 

580 for score_lines in score_lines_list: 

581 neg_pos = get_negatives_positives( 

582 score_lines, score_column=score_column 

583 ) 

584 negatives.append(neg_pos[0]) 

585 positives.append(neg_pos[1]) 

586 negatives = numpy.vstack(negatives).T 

587 positives = numpy.vstack(positives).T 

588 return (negatives, positives) 

589 

590 

591def get_all_scores(score_lines_list, score_column: str = "score"): 

592 """Take a list of outputs of load_score and return stacked scores""" 

593 

594 return numpy.vstack( 

595 [score_lines[score_column] for score_lines in score_lines_list] 

596 ).T 

597 

598 

599def dump_score(filename, score_lines): 

600 """Dump scores that were loaded using :py:func:`load_score` 

601 The number of columns is automatically detected. 

602 """ 

603 

604 if len(score_lines.dtype) == 5: 

605 fmt = "%s %s %s %s %.9f" 

606 elif len(score_lines.dtype) == 4: 

607 fmt = "%s %s %s %.9f" 

608 else: 

609 raise ValueError("Only scores with 4 and 5 columns are supported.") 

610 numpy.savetxt(filename, score_lines, fmt=fmt) 

611 

612 

613def _estimate_score_file_format(filename, ncolumns=None): 

614 """Estimates the score file format from the given score file. 

615 If ``ncolumns`` is in ``(4,5)``, then ``ncolumns`` is returned instead. 

616 """ 

617 if ncolumns in (4, 5): 

618 return ncolumns 

619 

620 f = open_file(filename, "rb") 

621 try: 

622 line = f.readline() 

623 ncolumns = len(line.split()) 

624 except Exception: 

625 logger.warn( 

626 "Could not guess the number of columns in file: {}. " 

627 "Assuming 4 column format.".format(filename) 

628 ) 

629 ncolumns = 4 

630 finally: 

631 f.close() 

632 return ncolumns 

633 

634 

635def _iterate_score_file(filename, csv_score_column: str = "score"): 

636 """Opens the score file and yields the score file lines in a tuple/list. 

637 

638 The last element of the line (which is the score) will be transformed to 

639 float, the other elements will be str. 

640 """ 

641 if iscsv(filename): 

642 for row in _iterate_csv_score_file( 

643 filename, score_column=csv_score_column 

644 ): 

645 yield [ 

646 row["bio_ref_subject_id"], 

647 row["probe_subject_id"], 

648 row["probe_template_id"], 

649 row[csv_score_column], 

650 ] 

651 else: 

652 opened = open_file(filename, "rb") 

653 import io 

654 

655 if not isinstance(opened, io.TextIOWrapper): 

656 opened = io.TextIOWrapper(opened, newline="") 

657 

658 reader = csv.reader(opened, delimiter=" ") 

659 for splits in reader: 

660 splits[-1] = float(splits[-1]) 

661 yield splits 

662 

663 

664def _iterate_csv_score_file(filename, score_column: str = "score"): 

665 """Opens a CSV score file for reading and yields each line in a dict. 

666 

667 The ``score_column`` field of the line will be cast to float, the other 

668 elements will be str. 

669 """ 

670 opened = open_file(filename) 

671 reader = csv.DictReader(opened) 

672 for row in reader: 

673 row[score_column] = float(row[score_column]) 

674 yield row 

675 

676 

677def _split_scores( 

678 score_lines, real_id_index, claimed_id_index=0, score_index=-1 

679): 

680 """Take the output of :py:func:`four_column` or :py:func:`five_column` and 

681 return negatives and positives. 

682 """ 

683 positives, negatives = [], [] 

684 for line in score_lines: 

685 which = ( 

686 positives 

687 if line[claimed_id_index] == line[real_id_index] 

688 else negatives 

689 ) 

690 which.append(line[score_index]) 

691 

692 return (numpy.array(negatives), numpy.array(positives)) 

693 

694 

695def _split_cmc_scores( 

696 score_lines, 

697 real_id_index, 

698 probe_name_index=None, 

699 claimed_id_index=0, 

700 score_index=-1, 

701): 

702 """Takes the output of :py:func:`four_column` or :py:func:`five_column` and 

703 return cmc scores. 

704 """ 

705 if probe_name_index is None: 

706 probe_name_index = real_id_index + 1 

707 # extract positives and negatives 

708 

709 pos_dict = {} 

710 neg_dict = {} 

711 # read four column list 

712 for line in score_lines: 

713 which = ( 

714 pos_dict 

715 if line[claimed_id_index] == line[real_id_index] 

716 else neg_dict 

717 ) 

718 probe_name = line[probe_name_index] 

719 # append score 

720 if probe_name not in which: 

721 which[probe_name] = [] 

722 which[probe_name].append(line[score_index]) 

723 

724 # convert to lists of tuples of ndarrays (or None) 

725 probe_names = sorted(set(neg_dict.keys()).union(set(pos_dict.keys()))) 

726 # get all scores in the desired format 

727 return [ 

728 ( 

729 ( 

730 numpy.array(neg_dict[probe_name], numpy.float64) 

731 if probe_name in neg_dict 

732 else None 

733 ), 

734 ( 

735 numpy.array(pos_dict[probe_name], numpy.float64) 

736 if probe_name in pos_dict 

737 else None 

738 ), 

739 ) 

740 for probe_name in probe_names 

741 ] 

742 

743 

744def split_csv_vuln(filename, score_column: str = "score"): 

745 """Loads vulnerability scores from a CSV score file. 

746 

747 Returns the scores split between positive and negative as well as licit 

748 and presentation attack (spoof). 

749 

750 The CSV must contain a ``probe_attack_type`` column with each field either 

751 containing a str defining the attack type (spoof), or empty (licit). 

752 

753 Parameters 

754 ---------- 

755 

756 filename: str 

757 The path to a CSV file containing all the scores 

758 

759 Returns 

760 ------- 

761 

762 split_scores: dict of str: numpy.ndarray 

763 The licit negative and positive, and spoof scores for probes. 

764 """ 

765 logger.debug(f"Loading CSV score file: '{filename}'") 

766 split_scores = {"licit_neg": [], "licit_pos": [], "spoof": []} 

767 for row in _iterate_csv_score_file(filename, score_column=score_column): 

768 if not row["probe_attack_type"]: # licit 

769 if row["probe_subject_id"] == row["bio_ref_subject_id"]: 

770 split_scores["licit_pos"].append(row[score_column]) 

771 else: 

772 split_scores["licit_neg"].append(row[score_column]) 

773 else: 

774 split_scores["spoof"].append(row[score_column]) 

775 logger.debug( 

776 f"Found {len(split_scores['licit_neg'])} negative (ZEI), " 

777 f"{len(split_scores['licit_pos'])} positive (licit), and " 

778 f"{len(split_scores['spoof'])} PA (spoof) scores." 

779 ) 

780 # Cast to numpy float 

781 for key, val in split_scores.items(): 

782 split_scores[key] = numpy.array(val, dtype=numpy.float64) 

783 return split_scores