Coverage for src/bob/learn/em/factor_analysis.py: 92%

540 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-06-16 14:34 +0200

1#!/usr/bin/env python 

2# @author: Tiago de Freitas Pereira 

3 

4 

5import functools 

6import logging 

7import operator 

8 

9import dask 

10import dask.bag 

11import numpy as np 

12 

13from dask.delayed import Delayed 

14from sklearn.base import BaseEstimator 

15from sklearn.utils import check_consistent_length 

16from sklearn.utils.multiclass import unique_labels 

17 

18from .gmm import GMMMachine, GMMStats 

19from .linear_scoring import linear_scoring 

20from .utils import check_and_persist_dask_input 

21 

22logger = logging.getLogger(__name__) 

23 

24 

25def is_input_dask_nested(X): 

26 """Check if the input is a dask delayed or a (nested) list of dask delayed.""" 

27 if isinstance(X, (list, tuple)): 

28 return is_input_dask_nested(X[0]) 

29 

30 if isinstance(X, Delayed): 

31 return True 

32 else: 

33 return False 

34 

35 

36def check_dask_input_samples_per_class(X, y): 

37 input_is_dask = is_input_dask_nested(X) 

38 

39 if input_is_dask: 

40 y = functools.reduce(lambda x1, x2: list(x1) + list(x2), y) 

41 unique_labels_y = unique_labels(y) 

42 n_classes = len(unique_labels_y) 

43 n_samples_per_class = [sum(y == class_id) for class_id in unique_labels_y] 

44 return input_is_dask, n_classes, n_samples_per_class 

45 

46 

47def reduce_iadd(*args): 

48 """Reduces one or several lists by adding all elements into the first element""" 

49 ret = [] 

50 for a in args: 

51 ret.append(functools.reduce(operator.iadd, a)) 

52 

53 if len(ret) == 1: 

54 return ret[0] 

55 return ret 

56 

57 

58def mult_along_axis(A, B, axis): 

59 """ 

60 Magic function to multiply two arrays along a given axis. 

61 Taken from https://stackoverflow.com/questions/30031828/multiply-numpy-ndarray-with-1d-array-along-a-given-axis 

62 """ 

63 

64 # shape check 

65 if axis >= A.ndim: 

66 raise np.AxisError(axis, A.ndim) 

67 if A.shape[axis] != B.size: 

68 raise ValueError( 

69 "Length of 'A' along the given axis must be the same as B.size" 

70 ) 

71 

72 # np.broadcast_to puts the new axis as the last axis, so 

73 # we swap the given axis with the last one, to determine the 

74 # corresponding array shape. np.swapaxes only returns a view 

75 # of the supplied array, so no data is copied unnecessarily. 

76 shape = np.swapaxes(A, A.ndim - 1, axis).shape 

77 

78 # Broadcast to an array with the shape as above. Again, 

79 # no data is copied, we only get a new look at the existing data. 

80 B_brc = np.broadcast_to(B, shape) 

81 

82 # Swap back the axes. As before, this only changes our "point of view". 

83 B_brc = np.swapaxes(B_brc, A.ndim - 1, axis) 

84 

85 return A * B_brc 

86 

87 

88class FactorAnalysisBase(BaseEstimator): 

89 """ 

90 Factor Analysis base class. 

91 This class is not intended to be used directly, but rather to be inherited from. 

92 For more information check [McCool2013]_ . 

93 

94 

95 Parameters 

96 ---------- 

97 

98 r_U: int 

99 Dimension of the subspace U 

100 

101 r_V: int 

102 Dimension of the subspace V 

103 

104 em_iterations: int 

105 Number of EM iterations 

106 

107 relevance_factor: float 

108 Factor analysis relevance factor 

109 

110 random_state: int 

111 random_state for the random number generator 

112 

113 ubm: :py:class:`bob.learn.em.GMMMachine` 

114 A trained UBM (Universal Background Model) or a parametrized 

115 :py:class:`bob.learn.em.GMMMachine` to train the UBM with. If None, 

116 `ubm_kwargs` are passed as parameters of a new 

117 :py:class:`bob.learn.em.GMMMachine`. 

118 """ 

119 

120 def __init__( 

121 self, 

122 r_U, 

123 r_V=None, 

124 relevance_factor=4.0, 

125 em_iterations=10, 

126 random_state=0, 

127 enroll_iterations=1, 

128 ubm=None, 

129 ubm_kwargs=None, 

130 **kwargs, 

131 ): 

132 super().__init__(**kwargs) 

133 self.ubm = ubm 

134 self.ubm_kwargs = ubm_kwargs 

135 self.em_iterations = em_iterations 

136 self.random_state = random_state 

137 self.enroll_iterations = enroll_iterations 

138 

139 # axis 1 dimensions of U and V 

140 self.r_U = r_U 

141 self.r_V = r_V 

142 

143 self.relevance_factor = relevance_factor 

144 

145 if ubm is not None and ubm._means is not None: 

146 self.create_UVD() 

147 

148 @property 

149 def feature_dimension(self): 

150 """Get the UBM Dimension""" 

151 # TODO: Add this on the GMMMachine class 

152 return self.ubm.means.shape[1] 

153 

154 @property 

155 def supervector_dimension(self): 

156 """Get the supervector dimension""" 

157 return self.ubm.n_gaussians * self.feature_dimension 

158 

159 @property 

160 def mean_supervector(self): 

161 """Returns the mean supervector""" 

162 return self.ubm.means.flatten() 

163 

164 @property 

165 def variance_supervector(self): 

166 """Returns the variance supervector""" 

167 return self.ubm.variances.flatten() 

168 

169 @property 

170 def U(self): 

171 """An alias for `_U`.""" 

172 return self._U 

173 

174 @U.setter 

175 def U(self, value): 

176 self._U = np.array(value) 

177 

178 @property 

179 def D(self): 

180 """An alias for `_D`.""" 

181 return self._D 

182 

183 @D.setter 

184 def D(self, value): 

185 self._D = np.array(value) 

186 

187 @property 

188 def V(self): 

189 """An alias for `_V`.""" 

190 return self._V 

191 

192 @V.setter 

193 def V(self, value): 

194 self._V = np.array(value) 

195 

196 def estimate_number_of_classes(self, y): 

197 """Estimates the number of classes given the labels""" 

198 return len(unique_labels(y)) 

199 

200 def initialize_using_array(self, X): 

201 """ 

202 Accumulating 0th and 1st order statistics. Trains the UBM if needed. 

203 

204 Parameters 

205 ---------- 

206 X: list of numpy arrays 

207 List of data to accumulate the statistics 

208 y: list of ints 

209 

210 Returns 

211 ------- 

212 

213 n_acc: array 

214 (n_classes, n_gaussians) representing the accumulated 0th order statistics 

215 

216 f_acc: array 

217 (n_classes, n_gaussians, feature_dim) representing the accumulated 1st order statistics 

218 

219 """ 

220 

221 if self.ubm is None: 

222 logger.info("Creating a new GMMMachine and training it.") 

223 self.ubm = GMMMachine(**self.ubm_kwargs) 

224 self.ubm.fit(X) 

225 

226 if self.ubm._means is None: 

227 logger.info("UBM means are None, training the UBM.") 

228 self.ubm.fit(X) 

229 

230 def initialize(self, ubm_projected_X, y, n_classes): 

231 # Accumulating 0th and 1st order statistics 

232 # https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/ISVTrainer.cpp#L68 

233 

234 logger.debug("Initializing the ISV/JFA using the UBM statistics.") 

235 

236 # Initializing the state matrix 

237 if not hasattr(self, "_U") or not hasattr(self, "_D"): 

238 self.create_UVD() 

239 

240 if is_input_dask_nested(ubm_projected_X): 

241 n_acc = [ 

242 dask.delayed(self._sum_n_statistics)(xx, yy, n_classes) 

243 for xx, yy in zip(ubm_projected_X, y) 

244 ] 

245 

246 f_acc = [ 

247 dask.delayed(self._sum_f_statistics)(xx, yy, n_classes) 

248 for xx, yy in zip(ubm_projected_X, y) 

249 ] 

250 n_acc, f_acc = reduce_iadd(n_acc, f_acc) 

251 else: 

252 # 0th order stats 

253 n_acc = self._sum_n_statistics(ubm_projected_X, y, n_classes) 

254 # 1st order stats 

255 f_acc = self._sum_f_statistics(ubm_projected_X, y, n_classes) 

256 

257 n_acc, f_acc = dask.compute(n_acc, f_acc) 

258 return n_acc, f_acc 

259 

260 def create_UVD(self): 

261 """ 

262 Create the state matrices U, V and D 

263 

264 Returns 

265 ------- 

266 

267 U: (n_gaussians*feature_dimension, r_U) represents the session variability matrix (within-class variability) 

268 

269 V: (n_gaussians*feature_dimension, r_V) represents the session variability matrix (between-class variability) 

270 

271 D: (n_gaussians*feature_dimension) represents the client offset vector 

272 

273 """ 

274 

275 if self.random_state is not None: 

276 np.random.seed(self.random_state) 

277 

278 U_shape = (self.supervector_dimension, self.r_U) 

279 

280 # U matrix is initialized using a normal distribution 

281 self._U = np.random.normal(scale=1.0, loc=0.0, size=U_shape) 

282 

283 # D matrix is initialized as `D = sqrt(variance(UBM) / relevance_factor)` 

284 self._D = np.sqrt(self.variance_supervector / self.relevance_factor) 

285 

286 # V matrix (or between-class variation matrix) 

287 # TODO: so far not doing JFA 

288 if self.r_V is not None: 

289 V_shape = (self.supervector_dimension, self.r_V) 

290 self._V = np.random.normal(scale=1.0, loc=0.0, size=V_shape) 

291 else: 

292 self._V = 0 

293 

294 def _sum_n_statistics(self, X, y, n_classes): 

295 """ 

296 Accumulates the 0th statistics for each client 

297 

298 Parameters 

299 ---------- 

300 X: list of :py:class:`bob.learn.em.GMMStats` 

301 List of statistics of each sample 

302 

303 y: list of ints 

304 List of corresponding labels 

305 

306 n_classes: int 

307 Number of classes 

308 

309 Returns 

310 ------- 

311 n_acc: array 

312 (n_classes, n_gaussians) representing the accumulated 0th order statistics 

313 

314 """ 

315 

316 # 0th order stats 

317 n_acc = np.zeros((n_classes, self.ubm.n_gaussians), like=X[0].n) 

318 

319 # Iterate for each client 

320 for x_i, y_i in zip(X, y): 

321 # Accumulate the 0th statistics for each class 

322 n_acc[y_i, :] += x_i.n 

323 

324 return n_acc 

325 

326 def _sum_f_statistics(self, X, y, n_classes): 

327 """ 

328 Accumulates the 1st order statistics for each client 

329 

330 Parameters 

331 ---------- 

332 X: list of :py:class:`bob.learn.em.GMMStats` 

333 

334 y: list of ints 

335 List of corresponding labels 

336 

337 n_classes: int 

338 Number of classes 

339 

340 Returns 

341 ------- 

342 f_acc: array 

343 (n_classes, n_gaussians, feature_dimension) representing the accumulated 1st order statistics 

344 

345 """ 

346 

347 # 1st order stats 

348 f_acc = np.zeros( 

349 ( 

350 n_classes, 

351 self.ubm.n_gaussians, 

352 self.feature_dimension, 

353 ), 

354 like=X[0].sum_px, 

355 ) 

356 # Iterate for each client 

357 for x_i, y_i in zip(X, y): 

358 # Accumulate the 1st order statistics 

359 f_acc[y_i, :, :] += x_i.sum_px 

360 

361 return f_acc 

362 

363 def _get_statistics_by_class_id(self, X, y, i): 

364 """ 

365 Returns the statistics for a given class 

366 

367 Parameters 

368 ---------- 

369 X: list of :py:class:`bob.learn.em.GMMStats` 

370 

371 y: list of ints 

372 List of corresponding labels 

373 

374 i: int 

375 Class id to return the statistics for 

376 """ 

377 

378 indices = np.where(np.array(y) == i)[0] 

379 return [X[i] for i in indices] 

380 

381 # Estimating U and x # 

382 

383 def _compute_id_plus_u_prod_ih(self, x_i, UProd): 

384 """ 

385 Computes ( I+Ut*diag(sigma)^-1*Ni*U)^-1 

386 See equation (29) in [McCool2013]_ 

387 

388 Parameters 

389 ---------- 

390 x_i: :py:class:`bob.learn.em.GMMStats` 

391 Statistics of a single sample 

392 

393 UProd: array 

394 Matrix containing U_c.T*inv(Sigma_c) @ U_c.T 

395 

396 Returns 

397 ------- 

398 id_plus_u_prod_ih: array 

399 ( I+Ut*diag(sigma)^-1*Ni*U)^-1 

400 

401 """ 

402 

403 n_i = x_i.n 

404 I = np.eye(self.r_U, self.r_U) # noqa: E741 

405 

406 # TODO: make the invertion matrix function as a parameter 

407 return np.linalg.inv(I + (UProd * n_i[:, None, None]).sum(axis=0)) 

408 

409 def _compute_fn_x_ih(self, x_i, latent_z_i=None, latent_y_i=None): 

410 """ 

411 Computes Fn_x_ih = N_{i,h}*(o_{i,h} - m - D*z_{i} - V*y_{i}) 

412 Check equation (29) in [McCool2013]_ 

413 

414 Parameters 

415 ---------- 

416 x_i: :py:class:`bob.learn.em.GMMStats` 

417 Statistics of a single sample 

418 

419 latent_z_i: array 

420 E[z_i] for class `i` 

421 

422 latent_y_i: array 

423 E[y_i] for class `i` 

424 

425 """ 

426 

427 f_i = x_i.sum_px 

428 n_i = x_i.n 

429 n_ic = np.repeat(n_i, self.feature_dimension) 

430 V = self._V 

431 

432 # N_ih*( m + D*z) 

433 # z is zero when the computation flow comes from update_X 

434 if latent_z_i is None: 

435 # Fn_x_ih = N_{i,h}*(o_{i,h} - m) 

436 fn_x_ih = f_i.flatten() - n_ic * (self.mean_supervector) 

437 else: 

438 # code goes here when the computation flow comes from compute_accumulators 

439 # Fn_x_ih = N_{i,h}*(o_{i,h} - m - D*z_{i}) 

440 fn_x_ih = f_i.flatten() - n_ic * ( 

441 self.mean_supervector + self._D * latent_z_i 

442 ) 

443 

444 # JFA Part (eq 29) # 

445 

446 V_dot_v = V @ latent_y_i if latent_y_i is not None else 0 

447 fn_x_ih -= n_ic * V_dot_v if latent_y_i is not None else 0 

448 

449 return fn_x_ih 

450 

451 def compute_latent_x( 

452 self, *, X, y, n_classes, UProd, latent_y=None, latent_z=None 

453 ): 

454 """ 

455 Computes a new math:`E[x]` See equation (29) in [McCool2013]_ 

456 

457 

458 Parameters 

459 ---------- 

460 

461 X: list of :py:class:`bob.learn.em.GMMStats` 

462 List of statistics 

463 

464 y: list of ints 

465 List of corresponding labels 

466 

467 UProd: array 

468 Matrix containing U_c.T*inv(Sigma_c) @ U_c.T 

469 

470 latent_y: array 

471 E(y) latent variable 

472 

473 latent_z: array 

474 E(z) latent variable 

475 

476 Returns 

477 ------- 

478 Returns the new latent_x 

479 """ 

480 

481 # U.T @ inv(Sigma) - See Eq(37) 

482 UTinvSigma = self._U.T / self.variance_supervector 

483 

484 if is_input_dask_nested(X): 

485 latent_x = [ 

486 dask.delayed(self._compute_latent_x_per_class)( 

487 X_i=X_i, 

488 UProd=UProd, 

489 UTinvSigma=UTinvSigma, 

490 latent_y_i=latent_y[y_i] if latent_y is not None else None, 

491 latent_z_i=latent_z[y_i] if latent_z is not None else None, 

492 ) 

493 for y_i, X_i in enumerate(X) 

494 ] 

495 latent_x = dask.compute(*latent_x) 

496 else: 

497 latent_x = [None] * n_classes 

498 for y_i in unique_labels(y): 

499 latent_x[y_i] = self._compute_latent_x_per_class( 

500 X_i=self._get_statistics_by_class_id(X, y, y_i), 

501 UProd=UProd, 

502 UTinvSigma=UTinvSigma, 

503 latent_y_i=latent_y[y_i] if latent_y is not None else None, 

504 latent_z_i=latent_z[y_i] if latent_z is not None else None, 

505 ) 

506 

507 return latent_x 

508 

509 def _compute_latent_x_per_class( 

510 self, *, X_i, UProd, UTinvSigma, latent_y_i, latent_z_i 

511 ): 

512 # For each sample 

513 latent_x_i = [] 

514 for x_i in X_i: 

515 id_plus_prod_ih = self._compute_id_plus_u_prod_ih(x_i, UProd) 

516 

517 fn_x_ih = self._compute_fn_x_ih( 

518 x_i, latent_z_i=latent_z_i, latent_y_i=latent_y_i 

519 ) 

520 latent_x_i.append(id_plus_prod_ih @ (UTinvSigma @ fn_x_ih)) 

521 # make sure latent_x_i stays a dask array by converting the list to a 

522 # dask array explicitly 

523 latent_x_i = np.vstack(latent_x_i) 

524 latent_x_i = np.swapaxes(latent_x_i, 0, 1) 

525 return latent_x_i 

526 

527 def update_U(self, acc_U_A1, acc_U_A2): 

528 """ 

529 Update rule for U 

530 

531 Parameters 

532 ---------- 

533 

534 acc_U_A1: array 

535 Accumulated statistics for U_A1(n_gaussians, r_U, r_U) 

536 

537 acc_U_A2: array 

538 Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U) 

539 

540 """ 

541 

542 # Inverting A1 over the zero axis 

543 # https://stackoverflow.com/questions/11972102/is-there-a-way-to-efficiently-invert-an-array-of-matrices-with-numpy 

544 inv_A1 = np.linalg.inv(acc_U_A1) 

545 

546 # Iterating over the gaussians to update U 

547 U_c = ( 

548 acc_U_A2.reshape( 

549 self.ubm.n_gaussians, self.feature_dimension, self.r_U 

550 ) 

551 @ inv_A1 

552 ) 

553 self._U = U_c.reshape( 

554 self.ubm.n_gaussians * self.feature_dimension, self.r_U 

555 ) 

556 return self._U 

557 

558 def _compute_uprod(self): 

559 """ 

560 Computes U_c.T*inv(Sigma_c) @ U_c.T 

561 

562 

563 https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L325 

564 """ 

565 

566 # UProd = (self.ubm.n_gaussians, self.r_U, self.r_U) 

567 

568 Uc = self._U.reshape( 

569 (self.ubm.n_gaussians, self.feature_dimension, self.r_U) 

570 ) 

571 UcT = Uc.transpose(0, 2, 1) 

572 

573 sigma_c = self.ubm.variances[:, np.newaxis] 

574 UProd = (UcT / sigma_c) @ Uc 

575 

576 return UProd 

577 

578 def compute_accumulators_U(self, X, y, UProd, latent_x, latent_y, latent_z): 

579 """ 

580 Computes the accumulators (A1 and A2) for the U matrix. 

581 This is useful for parallelization purposes. 

582 

583 The accumulators are defined as 

584 

585 :math:`A_1 = \\sum\\limits_{i=1}^{I}\\sum\\limits_{h=1}^{H}N_{i,h,c}E(x_{i,h,c} x^{\\top}_{i,h,c})` 

586 

587 

588 :math:`A_2 = \\sum\\limits_{i=1}^{I}\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -D_{c}z_{i,c} -V_{c}y_{i,c} )E[x_{i,h}]^{\\top}` 

589 

590 

591 More information, please, check the technical notes attached 

592 

593 Parameters 

594 ---------- 

595 X: list of :py:class:`bob.learn.em.GMMStats` 

596 List of statistics 

597 

598 y: list of ints 

599 List of corresponding labels 

600 

601 UProd: array 

602 Matrix containing U_c.T*inv(Sigma_c) @ U_c.T 

603 

604 latent_x: array 

605 E(x) latent variable 

606 

607 latent_y: array 

608 E(y) latent variable 

609 

610 latent_z: array 

611 E(z) latent variable 

612 

613 Returns 

614 ------- 

615 acc_U_A1: 

616 (n_gaussians, r_U, r_U) A1 accumulator 

617 

618 acc_U_A2: 

619 (n_gaussians* feature_dimension, r_U) A2 accumulator 

620 

621 

622 """ 

623 

624 # U accumulators 

625 acc_U_A1 = np.zeros( 

626 (self.ubm.n_gaussians, self.r_U, self.r_U), like=X[0].n 

627 ) 

628 acc_U_A2 = np.zeros((self.supervector_dimension, self.r_U), like=X[0].n) 

629 

630 # Loops over all people 

631 for y_i in set(y): 

632 # For each session 

633 for session_index, x_i in enumerate( 

634 self._get_statistics_by_class_id(X, y, y_i) 

635 ): 

636 id_plus_prod_ih = self._compute_id_plus_u_prod_ih(x_i, UProd) 

637 latent_z_i = latent_z[y_i] if latent_z is not None else None 

638 latent_y_i = latent_y[y_i] if latent_y is not None else None 

639 fn_x_ih = self._compute_fn_x_ih( 

640 x_i, latent_y_i=latent_y_i, latent_z_i=latent_z_i 

641 ) 

642 

643 latent_x_i = latent_x[y_i][:, session_index] 

644 id_plus_prod_ih += ( 

645 latent_x_i[:, np.newaxis] @ latent_x_i[:, np.newaxis].T 

646 ) 

647 

648 acc_U_A1 += mult_along_axis( 

649 id_plus_prod_ih[np.newaxis].repeat( 

650 self.ubm.n_gaussians, axis=0 

651 ), 

652 x_i.n, 

653 axis=0, 

654 ) 

655 

656 acc_U_A2 += fn_x_ih[np.newaxis].T @ latent_x_i[:, np.newaxis].T 

657 

658 return acc_U_A1, acc_U_A2 

659 

660 # Estimating D and z # 

661 

662 def update_z(self, X, y, latent_x, latent_y, latent_z, n_acc, f_acc): 

663 """ 

664 Computes a new math:`E[z]` See equation (30) in [McCool2013]_ 

665 

666 Parameters 

667 ---------- 

668 

669 X: list of :py:class:`bob.learn.em.GMMStats` 

670 List of statistics 

671 

672 y: list of ints 

673 List of corresponding labels 

674 

675 latent_x: array 

676 E(x) latent variable 

677 

678 latent_y: array 

679 E(y) latent variable 

680 

681 latent_z: array 

682 E(z) latent variable 

683 

684 n_acc: array 

685 Accumulated 0th order statistics for each class (math:`N_{i}`) 

686 

687 f_acc: array 

688 Accumulated 1st order statistics for each class (math:`F_{i}`) 

689 

690 Returns 

691 ------- 

692 Returns the new latent_z 

693 

694 """ 

695 

696 # Precomputing 

697 # self._D.T / sigma 

698 dt_inv_sigma = self._D / self.variance_supervector 

699 # self._D.T / sigma * self._D 

700 dt_inv_sigma_d = dt_inv_sigma * self._D 

701 

702 # for each class 

703 for y_i in set(y): 

704 

705 id_plus_d_prod = self._compute_id_plus_d_prod_i( 

706 dt_inv_sigma_d, n_acc[y_i] 

707 ) 

708 X_i = self._get_statistics_by_class_id(X, y, y_i) 

709 latent_x_i = latent_x[y_i] 

710 

711 latent_y_i = latent_y[y_i] if latent_y is not None else None 

712 

713 fn_z_i = self._compute_fn_z_i( 

714 X_i, latent_x_i, latent_y_i, n_acc[y_i], f_acc[y_i] 

715 ) 

716 latent_z[y_i] = id_plus_d_prod * dt_inv_sigma * fn_z_i 

717 

718 return latent_z 

719 

720 def _compute_id_plus_d_prod_i(self, dt_inv_sigma_d, n_acc_i): 

721 """ 

722 Computes: (I+Dt*diag(sigma)^-1*Ni*D)^-1 

723 See equation (31) in [McCool2013]_ 

724 

725 Parameters 

726 ---------- 

727 

728 i: int 

729 Class id 

730 

731 dt_inv_sigma_d: array 

732 Matrix representing `D.T / sigma` 

733 

734 """ 

735 

736 tmp_CD = np.repeat(n_acc_i, self.feature_dimension) 

737 id_plus_d_prod = np.ones(tmp_CD.shape) + dt_inv_sigma_d * tmp_CD 

738 return 1 / id_plus_d_prod 

739 

740 def _compute_fn_z_i(self, X_i, latent_x_i, latent_y_i, n_acc_i, f_acc_i): 

741 """ 

742 Compute Fn_z_i = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - V*y_{i} - U*x_{i,h}) (Normalized first order statistics) 

743 

744 Parameters 

745 ---------- 

746 i: int 

747 Class id 

748 

749 """ 

750 

751 U = self._U 

752 V = self._V 

753 

754 m = self.mean_supervector 

755 

756 tmp_CD = np.repeat(n_acc_i, self.feature_dimension) 

757 

758 # JFA session part 

759 V_dot_v = V @ latent_y_i if latent_y_i is not None else 0 

760 

761 # m_cache_Fn_z_i = Fi - m_tmp_CD * (m + m_tmp_CD_b); // Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - V*y_{i}) 

762 fn_z_i = f_acc_i.flatten() - tmp_CD * (m + V_dot_v) 

763 # convert fn_z_i to dask array here if required to make sure fn_z_i -= ... works 

764 fn_z_i = np.array(fn_z_i, like=X_i[0].n) 

765 

766 # Looping over the sessions 

767 for session_id, x_i_s in enumerate(X_i): 

768 n_i = x_i_s.n 

769 tmp_CD = np.repeat(n_i, self.feature_dimension) 

770 x_i_h = latent_x_i[:, session_id] 

771 

772 fn_z_i -= tmp_CD * (U @ x_i_h) 

773 

774 return fn_z_i 

775 

776 def compute_accumulators_D( 

777 self, X, y, latent_x, latent_y, latent_z, n_acc, f_acc 

778 ): 

779 """ 

780 Compute the accumulators for the D matrix 

781 

782 The accumulators are defined as 

783 

784 :math:`A_1 = \\sum\\limits_{i=1}^{I}E[z_{i,c}z^{\\top}_{i,c}]` 

785 

786 

787 :math:`A_2 = \\sum\\limits_{i=1}^{I} \\Bigg[\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -U_{c}x_{i,h,c} -V_{c}y_{i,c} )\\Bigg]E[z_{i}]^{\\top}` 

788 

789 

790 More information, please, check the technical notes attached 

791 

792 

793 Parameters 

794 ---------- 

795 

796 X: array 

797 Input data 

798 

799 y: array 

800 Class labels 

801 

802 latent_z: array 

803 E(z) latent variable 

804 

805 latent_x: array 

806 E(x) latent variable 

807 

808 latent_y: array 

809 E(y) latent variable 

810 

811 n_acc: array 

812 Accumulated 0th order statistics for each class (math:`N_{i}`) 

813 

814 f_acc: array 

815 Accumulated 1st order statistics for each class (math:`F_{i}`) 

816 

817 Returns 

818 ------- 

819 acc_D_A1: 

820 (n_gaussians* feature_dimension) A1 accumulator 

821 

822 acc_D_A2: 

823 (n_gaussians* feature_dimension) A2 accumulator 

824 

825 """ 

826 

827 acc_D_A1 = np.zeros((self.supervector_dimension,), like=X[0].n) 

828 acc_D_A2 = np.zeros((self.supervector_dimension,), like=X[0].n) 

829 

830 # Precomputing 

831 # self._D.T / sigma 

832 dt_inv_sigma = self._D / self.variance_supervector 

833 # self._D.T / sigma * self._D 

834 dt_inv_sigma_d = dt_inv_sigma * self._D 

835 

836 # Loops over all people 

837 for y_i in set(y): 

838 

839 id_plus_d_prod = self._compute_id_plus_d_prod_i( 

840 dt_inv_sigma_d, n_acc[y_i] 

841 ) 

842 X_i = self._get_statistics_by_class_id(X, y, y_i) 

843 latent_x_i = latent_x[y_i] 

844 

845 latent_y_i = latent_y[y_i] if latent_y is not None else None 

846 

847 fn_z_i = self._compute_fn_z_i( 

848 X_i, latent_x_i, latent_y_i, n_acc[y_i], f_acc[y_i] 

849 ) 

850 

851 tmp_CD = np.repeat(n_acc[y_i], self.feature_dimension) 

852 acc_D_A1 += ( 

853 id_plus_d_prod + latent_z[y_i] * latent_z[y_i] 

854 ) * tmp_CD 

855 acc_D_A2 += fn_z_i * latent_z[y_i] 

856 

857 return acc_D_A1, acc_D_A2 

858 

859 def initialize_XYZ(self, n_samples_per_class, like=None): 

860 """ 

861 Initialize E[x], E[y], E[z] state variables 

862 

863 Eq. (38) 

864 latent_z = (n_classes, supervector_dimension) 

865 

866 

867 Eq. (37) 

868 latent_y = (n_classes, r_V) or None 

869 

870 Eq. (36) 

871 latent_x = (n_classes, r_U, n_sessions) 

872 

873 """ 

874 

875 kw = dict(like=like) if isinstance(like, dask.array.core.Array) else {} 

876 

877 # x (Eq. 36) 

878 # (n_classes, r_U, n_samples ) 

879 latent_x = [] 

880 for n_s in n_samples_per_class: 

881 latent_x.append(np.zeros((self.r_U, n_s), **kw)) 

882 

883 n_classes = len(n_samples_per_class) 

884 latent_y = ( 

885 np.zeros((n_classes, self.r_V), **kw) 

886 if self.r_V and self.r_V > 0 

887 else None 

888 ) 

889 

890 latent_z = np.zeros((n_classes, self.supervector_dimension), **kw) 

891 

892 return latent_x, latent_y, latent_z 

893 

894 # Estimating V and y # 

895 

896 def update_y( 

897 self, 

898 *, 

899 X, 

900 y, 

901 n_classes, 

902 VProd, 

903 latent_x, 

904 latent_y, 

905 latent_z, 

906 n_acc, 

907 f_acc, 

908 ): 

909 """ 

910 Computes a new math:`E[y]` See equation (30) in [McCool2013]_ 

911 

912 Parameters 

913 ---------- 

914 

915 X: list of :py:class:`bob.learn.em.GMMStats` 

916 List of statistics 

917 

918 y: list of ints 

919 List of corresponding labels 

920 

921 n_classes: int 

922 Number of classes 

923 

924 VProd: array 

925 Matrix representing V_c.T*inv(Sigma_c) @ V_c.T 

926 

927 latent_x: array 

928 E(x) latent variable 

929 

930 latent_y: array 

931 E(y) latent variable 

932 

933 latent_z: array 

934 E(z) latent variable 

935 

936 n_acc: array 

937 Accumulated 0th order statistics for each class (math:`N_{i}`) 

938 

939 f_acc: array 

940 Accumulated 1st order statistics for each class (math:`F_{i}`) 

941 

942 """ 

943 

944 # V.T / sigma 

945 VTinvSigma = self._V.T / self.variance_supervector 

946 

947 if is_input_dask_nested(X): 

948 latent_y = [ 

949 dask.delayed(self._latent_y_per_class)( 

950 X_i=X_i, 

951 n_acc_i=n_acc[label], 

952 f_acc_i=f_acc[label], 

953 VProd=VProd, 

954 VTinvSigma=VTinvSigma, 

955 latent_x_i=latent_x[label], 

956 latent_z_i=latent_z[label], 

957 ) 

958 for label, X_i in enumerate(X) 

959 ] 

960 latent_y = dask.compute(*latent_y) 

961 else: 

962 # Loops over the labels 

963 for label in range(n_classes): 

964 X_i = self._get_statistics_by_class_id(X, y, label) 

965 latent_y[label] = self._latent_y_per_class( 

966 X_i=X_i, 

967 n_acc_i=n_acc[label], 

968 f_acc_i=f_acc[label], 

969 VProd=VProd, 

970 VTinvSigma=VTinvSigma, 

971 latent_x_i=latent_x[label], 

972 latent_z_i=latent_z[label], 

973 ) 

974 return latent_y 

975 

976 def _latent_y_per_class( 

977 self, 

978 *, 

979 X_i, 

980 n_acc_i, 

981 f_acc_i, 

982 VProd, 

983 VTinvSigma, 

984 latent_x_i, 

985 latent_z_i, 

986 ): 

987 id_plus_v_prod_i = self._compute_id_plus_vprod_i(n_acc_i, VProd) 

988 fn_y_i = self._compute_fn_y_i( 

989 X_i, 

990 latent_x_i, 

991 latent_z_i, 

992 n_acc_i, 

993 f_acc_i, 

994 ) 

995 latent_y = (VTinvSigma @ fn_y_i) @ id_plus_v_prod_i 

996 return latent_y 

997 

998 def _compute_id_plus_vprod_i(self, n_acc_i, VProd): 

999 """ 

1000 Computes: (I+Vt*diag(sigma)^-1*Ni*V)^-1 (see Eq. (30) in [McCool2013]_) 

1001 

1002 Parameters 

1003 ---------- 

1004 

1005 n_acc_i: array 

1006 Accumulated 0th order statistics for each class (math:`N_{i}`) 

1007 

1008 VProd: array 

1009 Matrix representing V_c.T*inv(Sigma_c) @ V_c.T 

1010 

1011 """ 

1012 

1013 I = np.eye(self.r_V, self.r_V) # noqa: E741 

1014 

1015 # TODO: make the inversion matrix function as a parameter 

1016 return np.linalg.inv(I + (VProd * n_acc_i[:, None, None]).sum(axis=0)) 

1017 

1018 def _compute_vprod(self): 

1019 """ 

1020 Computes V_c.T*inv(Sigma_c) @ V_c.T 

1021 

1022 

1023 https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L193 

1024 """ 

1025 

1026 Vc = self._V.reshape( 

1027 (self.ubm.n_gaussians, self.feature_dimension, self.r_V) 

1028 ) 

1029 VcT = Vc.transpose(0, 2, 1) 

1030 

1031 sigma_c = self.ubm.variances[:, np.newaxis] 

1032 VProd = (VcT / sigma_c) @ Vc 

1033 

1034 return VProd 

1035 

1036 def compute_accumulators_V( 

1037 self, X, y, VProd, n_acc, f_acc, latent_x, latent_y, latent_z 

1038 ): 

1039 """ 

1040 Computes the accumulators for the update of V matrix 

1041 The accumulators are defined as 

1042 

1043 :math:`A_1 = \\sum\\limits_{i=1}^{I}E[y_{i,c}y^{\\top}_{i,c}]` 

1044 

1045 

1046 :math:`A_2 = \\sum\\limits_{i=1}^{I} \\Bigg[\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -U_{c}x_{i,h,c} -D_{c}z_{i,c} )\\Bigg]E[y_{i}]^{\\top}` 

1047 

1048 

1049 More information, please, check the technical notes attached 

1050 

1051 Parameters 

1052 ---------- 

1053 

1054 X: list of :py:class:`bob.learn.em.GMMStats` 

1055 List of statistics 

1056 

1057 y: list of ints 

1058 List of corresponding labels 

1059 

1060 VProd: array 

1061 Matrix representing V_c.T*inv(Sigma_c) @ V_c.T 

1062 

1063 n_acc: array 

1064 Accumulated 0th order statistics for each class (math:`N_{i}`) 

1065 

1066 f_acc: array 

1067 Accumulated 1st order statistics for each class (math:`F_{i}`) 

1068 

1069 latent_x: array 

1070 E(x) latent variable 

1071 

1072 latent_y: array 

1073 E(y) latent variable 

1074 

1075 latent_z: array 

1076 E(z) latent variable 

1077 

1078 

1079 Returns 

1080 ------- 

1081 

1082 acc_V_A1: 

1083 (n_gaussians, r_V, r_V) A1 accumulator 

1084 

1085 acc_V_A2: 

1086 (n_gaussians* feature_dimension, r_V) A2 accumulator 

1087 

1088 """ 

1089 

1090 # U accumulators 

1091 acc_V_A1 = np.zeros( 

1092 (self.ubm.n_gaussians, self.r_V, self.r_V), like=X[0].n 

1093 ) 

1094 acc_V_A2 = np.zeros((self.supervector_dimension, self.r_V), like=X[0].n) 

1095 

1096 # Loops over all people 

1097 for i in set(y): 

1098 n_acc_i = n_acc[i] 

1099 f_acc_i = f_acc[i] 

1100 X_i = self._get_statistics_by_class_id(X, y, i) 

1101 latent_x_i = latent_x[i] 

1102 latent_y_i = latent_y[i] 

1103 latent_z_i = latent_z[i] 

1104 

1105 # Computing A1 accumulator: \sum_{i=1}^{N}(E(y_i_c @ y_i_c.T)) 

1106 id_plus_prod_v_i = self._compute_id_plus_vprod_i(n_acc_i, VProd) 

1107 id_plus_prod_v_i += ( 

1108 latent_y_i[:, np.newaxis] @ latent_y_i[:, np.newaxis].T 

1109 ) 

1110 

1111 acc_V_A1 += mult_along_axis( 

1112 id_plus_prod_v_i[np.newaxis].repeat( 

1113 self.ubm.n_gaussians, axis=0 

1114 ), 

1115 n_acc_i, 

1116 axis=0, 

1117 ) 

1118 

1119 # Computing A2 accumulator: \sum_{i=1}^{N}( \sum_{h=1}^{H}(N_i_h_c (o_i_h, - m_c - D_c*z_i_c - U_c*x_i_h_c))@ E(y_i).T ) 

1120 fn_y_i = self._compute_fn_y_i( 

1121 X_i, 

1122 latent_x_i=latent_x_i, 

1123 latent_z_i=latent_z_i, 

1124 n_acc_i=n_acc_i, 

1125 f_acc_i=f_acc_i, 

1126 ) 

1127 

1128 acc_V_A2 += fn_y_i[np.newaxis].T @ latent_y_i[:, np.newaxis].T 

1129 

1130 return acc_V_A1, acc_V_A2 

1131 

1132 def _compute_fn_y_i(self, X_i, latent_x_i, latent_z_i, n_acc_i, f_acc_i): 

1133 """ 

1134 // Compute Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - D*z_{i} - U*x_{i,h}) (Normalized first order statistics) 

1135 See equation (30) in [McCool2013]_ 

1136 

1137 Parameters 

1138 ---------- 

1139 

1140 X_i: list of :py:class:`bob.learn.em.GMMStats` 

1141 List of statistics for a class 

1142 

1143 latent_x_i: array 

1144 E(x_i) latent variable 

1145 

1146 latent_z_i: array 

1147 E(z_i) latent variable 

1148 

1149 n_acc_i: array 

1150 Accumulated 0th order statistics for each class (math:`N_{i}`) 

1151 

1152 f_acc_i: array 

1153 Accumulated 1st order statistics for each class (math:`F_{i}`) 

1154 

1155 

1156 """ 

1157 

1158 U = self._U 

1159 D = self._D # Not doing the JFA 

1160 

1161 m = self.mean_supervector 

1162 

1163 # y = self.y[i] # Not doing JFA 

1164 

1165 tmp_CD = np.repeat(n_acc_i, self.feature_dimension) 

1166 

1167 fn_y_i = f_acc_i.flatten() - tmp_CD * ( 

1168 m - D * latent_z_i 

1169 ) # Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - D*z_{i}) 

1170 

1171 # Looping over the sessions of a ;ane; 

1172 for session_id, x_i_s in enumerate(X_i): 

1173 n_i = x_i_s.n 

1174 U_dot_x = U @ latent_x_i[:, session_id] 

1175 tmp_CD = np.repeat(n_i, self.feature_dimension) 

1176 fn_y_i -= tmp_CD * U_dot_x 

1177 

1178 return fn_y_i 

1179 

1180 # Scoring # 

1181 

1182 def estimate_x(self, X): 

1183 

1184 id_plus_us_prod_inv = self._compute_id_plus_us_prod_inv(X) 

1185 fn_x = self._compute_fn_x(X) 

1186 

1187 # UtSigmaInv * Fn_x = Ut*diag(sigma)^-1 * N*(o - m) 

1188 ut_inv_sigma = self._U.T / self.variance_supervector 

1189 

1190 return id_plus_us_prod_inv @ (ut_inv_sigma @ fn_x) 

1191 

1192 def estimate_ux(self, X): 

1193 x = self.estimate_x(X) 

1194 return self.U @ x 

1195 

1196 def _compute_id_plus_us_prod_inv(self, X_i): 

1197 """ 

1198 Computes (Id + U^T.Sigma^-1.U.N_{i,h}.U)^-1 = 

1199 

1200 Parameters 

1201 ---------- 

1202 

1203 X_i: list of :py:class:`bob.learn.em.GMMStats` 

1204 List of statistics for a class 

1205 """ 

1206 

1207 I = np.eye(self.r_U, self.r_U) # noqa: E741 

1208 

1209 Uc = self._U.reshape( 

1210 (self.ubm.n_gaussians, self.feature_dimension, self.r_U) 

1211 ) 

1212 

1213 UcT = np.transpose(Uc, axes=(0, 2, 1)) 

1214 

1215 sigma_c = self.ubm.variances[:, np.newaxis] 

1216 

1217 n_sum = sum(x_i_s.n for x_i_s in X_i) 

1218 

1219 n_i_c = np.expand_dims(n_sum[:, np.newaxis], axis=2) 

1220 

1221 id_plus_us_prod_inv = I + (((UcT / sigma_c) @ Uc) * n_i_c).sum(axis=0) 

1222 id_plus_us_prod_inv = np.linalg.inv(id_plus_us_prod_inv) 

1223 

1224 return id_plus_us_prod_inv 

1225 

1226 def _compute_fn_x(self, X_i): 

1227 """ 

1228 Compute Fn_x = sum_{sessions h}(N*(o - m) (Normalized first order statistics) 

1229 

1230 Parameters 

1231 ---------- 

1232 

1233 X_i: list of :py:class:`bob.learn.em.GMMStats` 

1234 List of statistics for a class 

1235 

1236 """ 

1237 

1238 n_sum = sum(x_i_s.n for x_i_s in X_i) 

1239 sum_px_sum = sum(x_i_s.sum_px for x_i_s in X_i) 

1240 

1241 n = n_sum[:, np.newaxis] 

1242 f = sum_px_sum 

1243 

1244 fn_x = f - self.ubm.means * n 

1245 

1246 return fn_x.flatten() 

1247 

1248 def score_using_array(self, model, data): 

1249 """ 

1250 Computes the ISV score using a numpy array as input 

1251 

1252 Parameters 

1253 ---------- 

1254 latent_z : numpy.ndarray 

1255 Latent representation of the client (E[z_i]) 

1256 

1257 data : list of :py:class:`bob.learn.em.GMMStats` 

1258 List of statistics to be scored 

1259 

1260 Returns 

1261 ------- 

1262 score : float 

1263 The linear scored 

1264 

1265 """ 

1266 

1267 return self.score(model, [self.ubm.acc_stats(d) for d in data]) 

1268 

1269 def fit_using_array(self, X, y): 

1270 """Fits the model using a numpy array or a dask array as input 

1271 The y matrix is computed to a numpy array immediately. 

1272 """ 

1273 

1274 input_is_dask, X = check_and_persist_dask_input(X, persist=True) 

1275 y = dask.compute(y)[0] 

1276 y = np.squeeze(np.asarray(y)) 

1277 check_consistent_length(X, y) 

1278 

1279 self.initialize_using_array(X) 

1280 

1281 if input_is_dask: 

1282 # split the X array based on the classes 

1283 # since the EM algorithm is only parallelizable per class 

1284 X_new, y_new = [], [] 

1285 for class_id in unique_labels(y): 

1286 class_indices = y == class_id 

1287 X_new.append(X[class_indices]) 

1288 y_new.append(y[class_indices]) 

1289 X, y = X_new, y_new 

1290 del X_new, y_new 

1291 

1292 stats = [dask.delayed(self.ubm.transform)(xx).persist() for xx in X] 

1293 else: 

1294 stats = self.ubm.transform(X) 

1295 

1296 logger.info("Computing statistics per sample") 

1297 del X 

1298 self.fit(stats, y) 

1299 return self 

1300 

1301 def enroll_using_array(self, X): 

1302 """ 

1303 Enrolls a new client using a numpy array as input 

1304 

1305 Parameters 

1306 ---------- 

1307 X : array 

1308 features to be enrolled 

1309 

1310 iterations : int 

1311 Number of iterations to perform 

1312 

1313 Returns 

1314 ------- 

1315 self : object 

1316 z 

1317 

1318 """ 

1319 

1320 return self.enroll([self.ubm.acc_stats(X)]) 

1321 

1322 def _prepare_dask_input(self, X, y): 

1323 """Prepare the input for the fit method""" 

1324 logger.info( 

1325 "Rechunking bag of stats to delayed list of stats per class. If your worker runs " 

1326 "out of memory in this training step, you have to use workers with more memory." 

1327 ) 

1328 # optimize the graph of X and y and also persist X before computing y 

1329 X, y = dask.optimize(X, y) 

1330 X = X.persist() 

1331 y = np.array(dask.compute(y)[0]) 

1332 n_classes = len(set(y)) 

1333 

1334 # X is a list Stats in a dask bag chunked randomly. We want X to be a 

1335 # list of dask delayed objects where each delayed object is a list of 

1336 # stats per class 

1337 def _len(stats): 

1338 return len(stats) 

1339 

1340 lengths = X.map_partitions(_len).compute() 

1341 delayeds = X.to_delayed() 

1342 X, i = [[] for _ in range(n_classes)], 0 

1343 for length_, delayed_stats_list in zip(lengths, delayeds): 

1344 delayed_stats_list._length = length_ 

1345 for delayed_stat in delayed_stats_list: 

1346 class_id = y[i] 

1347 X[class_id].append(delayed_stat) 

1348 i += 1 

1349 X = [dask.delayed(list)(stats).persist() for stats in X] 

1350 y = [y[y == class_id] for class_id in range(n_classes)] 

1351 

1352 return X, y 

1353 

1354 

1355class ISVMachine(FactorAnalysisBase): 

1356 """ 

1357 Implements the Intersession Variability Modelling hypothesis on top of GMMs 

1358 

1359 Inter-Session Variability (ISV) modeling is a session variability modeling technique built on top of the Gaussian mixture modeling approach. 

1360 It hypothesizes that within-class variations are embedded in a linear subspace in the GMM means subspace and these variations can be suppressed 

1361 by an offset w.r.t each mean during the MAP adaptation. 

1362 For more information check [McCool2013]_ 

1363 

1364 Parameters 

1365 ---------- 

1366 

1367 r_U: int 

1368 Dimension of the subspace U 

1369 

1370 em_iterations: int 

1371 Number of EM iterations 

1372 

1373 relevance_factor: float 

1374 Factor analysis relevance factor 

1375 

1376 random_state: int 

1377 random_state for the random number generator 

1378 

1379 ubm: :py:class:`bob.learn.em.GMMMachine` or None 

1380 A trained UBM (Universal Background Model). If None, the UBM is trained with 

1381 a new :py:class:`bob.learn.em.GMMMachine` when fit is called, with `ubm_kwargs` 

1382 as parameters. 

1383 

1384 """ 

1385 

1386 def __init__( 

1387 self, 

1388 r_U, 

1389 em_iterations=10, 

1390 relevance_factor=4.0, 

1391 random_state=0, 

1392 ubm=None, 

1393 ubm_kwargs=None, 

1394 **kwargs, 

1395 ): 

1396 super().__init__( 

1397 r_U=r_U, 

1398 relevance_factor=relevance_factor, 

1399 em_iterations=em_iterations, 

1400 random_state=random_state, 

1401 ubm=ubm, 

1402 ubm_kwargs=ubm_kwargs, 

1403 **kwargs, 

1404 ) 

1405 

1406 def e_step(self, X, y, n_samples_per_class, n_acc, f_acc): 

1407 """E-step of the EM algorithm.""" 

1408 # self.initialize_XYZ(y) 

1409 UProd = self._compute_uprod() 

1410 _, _, latent_z = self.initialize_XYZ( 

1411 n_samples_per_class=n_samples_per_class, like=X[0].n 

1412 ) 

1413 latent_y = None 

1414 

1415 latent_x = self.compute_latent_x( 

1416 X=X, 

1417 y=y, 

1418 n_classes=len(n_samples_per_class), 

1419 UProd=UProd, 

1420 ) 

1421 latent_z = self.update_z( 

1422 X=X, 

1423 y=y, 

1424 latent_x=latent_x, 

1425 latent_y=latent_y, 

1426 latent_z=latent_z, 

1427 n_acc=n_acc, 

1428 f_acc=f_acc, 

1429 ) 

1430 acc_U_A1, acc_U_A2 = self.compute_accumulators_U( 

1431 X, y, UProd, latent_x, latent_y, latent_z 

1432 ) 

1433 

1434 return acc_U_A1, acc_U_A2 

1435 

1436 def m_step(self, acc_U_A1_acc_U_A2_list): 

1437 """ 

1438 ISV M-step. 

1439 This updates `U` matrix 

1440 

1441 Parameters 

1442 ---------- 

1443 

1444 acc_U_A1: array 

1445 Accumulated statistics for U_A1(n_gaussians, r_U, r_U) 

1446 

1447 acc_U_A2: array 

1448 Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U) 

1449 

1450 """ 

1451 

1452 acc_U_A1 = [acc[0] for acc in acc_U_A1_acc_U_A2_list] 

1453 acc_U_A2 = [acc[1] for acc in acc_U_A1_acc_U_A2_list] 

1454 

1455 acc_U_A1, acc_U_A2 = reduce_iadd(acc_U_A1, acc_U_A2) 

1456 

1457 return self.update_U(acc_U_A1, acc_U_A2) 

1458 

1459 def fit(self, X, y): 

1460 """ 

1461 Trains the U matrix (session variability matrix) 

1462 

1463 Parameters 

1464 ---------- 

1465 X : numpy.ndarray 

1466 Nxd features of N GMM statistics 

1467 y : numpy.ndarray 

1468 The input labels, a 1D numpy array of shape (number of samples, ) 

1469 

1470 Returns 

1471 ------- 

1472 self : object 

1473 Returns self. 

1474 

1475 """ 

1476 

1477 if isinstance(X, dask.bag.Bag): 

1478 X, y = self._prepare_dask_input(X, y) 

1479 

1480 ( 

1481 input_is_dask, 

1482 n_classes, 

1483 n_samples_per_class, 

1484 ) = check_dask_input_samples_per_class(X, y) 

1485 

1486 n_acc, f_acc = self.initialize(X, y, n_classes) 

1487 

1488 for i in range(self.em_iterations): 

1489 logger.info("ISV U Training: Iteration %d", i + 1) 

1490 if input_is_dask: 

1491 e_step_output = [ 

1492 dask.delayed(self.e_step)( 

1493 X=xx, 

1494 y=yy, 

1495 n_samples_per_class=n_samples_per_class, 

1496 n_acc=n_acc, 

1497 f_acc=f_acc, 

1498 ) 

1499 for xx, yy in zip(X, y) 

1500 ] 

1501 delayed_em_step = dask.delayed(self.m_step)(e_step_output) 

1502 self._U = dask.compute(delayed_em_step)[0] 

1503 else: 

1504 e_step_output = self.e_step( 

1505 X=X, 

1506 y=y, 

1507 n_samples_per_class=n_samples_per_class, 

1508 n_acc=n_acc, 

1509 f_acc=f_acc, 

1510 ) 

1511 self.m_step([e_step_output]) 

1512 

1513 return self 

1514 

1515 def transform(self, X): 

1516 ubm_projected_X = self.ubm.acc_stats(X) 

1517 return self.estimate_ux(ubm_projected_X) 

1518 

1519 def enroll(self, X): 

1520 """ 

1521 Enrolls a new client 

1522 In ISV, the enrolment is defined as: :math:`m + Dz` with the latent variables `z` 

1523 representing the enrolled model. 

1524 

1525 Parameters 

1526 ---------- 

1527 X : list of :py:class:`bob.learn.em.GMMStats` 

1528 List of statistics to be enrolled 

1529 

1530 Returns 

1531 ------- 

1532 self : object 

1533 z 

1534 

1535 """ 

1536 

1537 iterations = self.enroll_iterations 

1538 # We have only one class for enrollment 

1539 y = list(np.zeros(len(X), dtype=np.int32)) 

1540 n_acc = self._sum_n_statistics(X, y=y, n_classes=1) 

1541 f_acc = self._sum_f_statistics(X, y=y, n_classes=1) 

1542 

1543 UProd = self._compute_uprod() 

1544 _, _, latent_z = self.initialize_XYZ(n_samples_per_class=[len(X)]) 

1545 latent_y = None 

1546 for i in range(iterations): 

1547 logger.info("Enrollment: Iteration %d", i + 1) 

1548 latent_x = self.compute_latent_x( 

1549 X=X, 

1550 y=y, 

1551 n_classes=1, 

1552 UProd=UProd, 

1553 latent_y=latent_y, 

1554 latent_z=latent_z, 

1555 ) 

1556 latent_z = self.update_z( 

1557 X=X, 

1558 y=y, 

1559 latent_x=latent_x, 

1560 latent_y=latent_y, 

1561 latent_z=latent_z, 

1562 n_acc=n_acc, 

1563 f_acc=f_acc, 

1564 ) 

1565 

1566 return latent_z 

1567 

1568 def enroll_using_array(self, X): 

1569 """ 

1570 Enrolls a new client using a numpy array as input 

1571 

1572 Parameters 

1573 ---------- 

1574 X : array 

1575 features to be enrolled 

1576 

1577 iterations : int 

1578 Number of iterations to perform 

1579 

1580 Returns 

1581 ------- 

1582 self : object 

1583 z 

1584 

1585 """ 

1586 

1587 return self.enroll([self.ubm.acc_stats(X)]) 

1588 

1589 def score(self, latent_z, data): 

1590 """ 

1591 Computes the ISV score 

1592 

1593 Parameters 

1594 ---------- 

1595 latent_z : numpy.ndarray 

1596 Latent representation of the client (E[z_i]) 

1597 

1598 data : list of :py:class:`bob.learn.em.GMMStats` 

1599 List of statistics of one probe template to be scored 

1600 

1601 Returns 

1602 ------- 

1603 score : float 

1604 The linear scored 

1605 

1606 """ 

1607 

1608 x = self.estimate_x(data) 

1609 Ux = self._U @ x 

1610 

1611 # TODO: I don't know why this is not the enrolled model 

1612 # Here I am just reproducing the C++ implementation 

1613 # m + Dz 

1614 z = self.D * latent_z + self.mean_supervector 

1615 

1616 # Support a probe template and a list of probe templates. 

1617 if isinstance(data[0], GMMStats) and len(data) > 1: 

1618 data_sum = sum(data[1:], start=data[0]) 

1619 elif not isinstance(data[0], GMMStats): 

1620 data_sum = [] 

1621 for d in data: 

1622 if len(d) > 1: 

1623 data_sum.append(sum(d[1:], start=d[0])) 

1624 else: 

1625 data_sum.append(d[0]) 

1626 else: 

1627 data_sum = data 

1628 

1629 return linear_scoring( 

1630 z.reshape((self.ubm.n_gaussians, self.feature_dimension)), 

1631 self.ubm, 

1632 data_sum, 

1633 Ux.reshape((self.ubm.n_gaussians, self.feature_dimension)), 

1634 frame_length_normalization=True, 

1635 )[0][0] 

1636 

1637 

1638class JFAMachine(FactorAnalysisBase): 

1639 """ 

1640 Joint Factor Analysis (JFA) is an extension of ISV. Besides the 

1641 within-class assumption (modeled with :math:`U`), it also hypothesize that 

1642 between class variations are embedded in a low rank rectangular matrix 

1643 :math:`V`. In the supervector notation, this modeling has the following shape: 

1644 :math:`\\mu_{i, j} = m + Ux_{i, j} + Vy_{i} + D_z{i}`. 

1645 

1646 For more information check [McCool2013]_ 

1647 

1648 Parameters 

1649 ---------- 

1650 

1651 ubm: :py:class:`bob.learn.em.GMMMachine` 

1652 A trained UBM (Universal Background Model) 

1653 

1654 r_U: int 

1655 Dimension of the subspace U 

1656 

1657 r_V: int 

1658 Dimension of the subspace V 

1659 

1660 em_iterations: int 

1661 Number of EM iterations 

1662 

1663 relevance_factor: float 

1664 Factor analysis relevance factor 

1665 

1666 random_state: int 

1667 random_state for the random number generator 

1668 

1669 """ 

1670 

1671 def __init__( 

1672 self, 

1673 r_U, 

1674 r_V, 

1675 em_iterations=10, 

1676 relevance_factor=4.0, 

1677 random_state=0, 

1678 ubm=None, 

1679 ubm_kwargs=None, 

1680 **kwargs, 

1681 ): 

1682 super().__init__( 

1683 ubm=ubm, 

1684 r_U=r_U, 

1685 r_V=r_V, 

1686 relevance_factor=relevance_factor, 

1687 em_iterations=em_iterations, 

1688 random_state=random_state, 

1689 ubm_kwargs=ubm_kwargs, 

1690 **kwargs, 

1691 ) 

1692 

1693 def e_step_v(self, X, y, n_samples_per_class, n_acc, f_acc): 

1694 """ 

1695 ISV E-step for the V matrix. 

1696 

1697 Parameters 

1698 ---------- 

1699 

1700 X: list of :py:class:`bob.learn.em.GMMStats` 

1701 List of statistics 

1702 

1703 y: list of int 

1704 List of labels 

1705 

1706 n_classes: int 

1707 Number of classes 

1708 

1709 n_acc: array 

1710 Accumulated 0th-order statistics 

1711 

1712 f_acc: array 

1713 Accumulated 1st-order statistics 

1714 

1715 

1716 Returns 

1717 ---------- 

1718 

1719 acc_V_A1: array 

1720 Accumulated statistics for V_A1(n_gaussians, r_V, r_V) 

1721 

1722 acc_V_A2: array 

1723 Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V) 

1724 

1725 """ 

1726 

1727 VProd = self._compute_vprod() 

1728 

1729 latent_x, latent_y, latent_z = self.initialize_XYZ( 

1730 n_samples_per_class=n_samples_per_class, like=X[0].n 

1731 ) 

1732 

1733 # UPDATE Y, X AND FINALLY Z 

1734 

1735 n_classes = len(n_samples_per_class) 

1736 latent_y = self.update_y( 

1737 X=X, 

1738 y=y, 

1739 n_classes=n_classes, 

1740 VProd=VProd, 

1741 latent_x=latent_x, 

1742 latent_y=latent_y, 

1743 latent_z=latent_z, 

1744 n_acc=n_acc, 

1745 f_acc=f_acc, 

1746 ) 

1747 

1748 acc_V_A1, acc_V_A2 = self.compute_accumulators_V( 

1749 X=X, 

1750 y=y, 

1751 VProd=VProd, 

1752 n_acc=n_acc, 

1753 f_acc=f_acc, 

1754 latent_x=latent_x, 

1755 latent_y=latent_y, 

1756 latent_z=latent_z, 

1757 ) 

1758 

1759 return acc_V_A1, acc_V_A2 

1760 

1761 def m_step_v(self, acc_V_A1_acc_V_A2_list): 

1762 """ 

1763 `V` Matrix M-step. 

1764 This updates the `V` matrix 

1765 

1766 Parameters 

1767 ---------- 

1768 

1769 acc_V_A1: array 

1770 Accumulated statistics for V_A1(n_gaussians, r_V, r_V) 

1771 

1772 acc_V_A2: array 

1773 Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V) 

1774 

1775 """ 

1776 

1777 acc_V_A1 = [acc[0] for acc in acc_V_A1_acc_V_A2_list] 

1778 acc_V_A2 = [acc[1] for acc in acc_V_A1_acc_V_A2_list] 

1779 

1780 acc_V_A1, acc_V_A2 = reduce_iadd(acc_V_A1, acc_V_A2) 

1781 

1782 # Inverting A1 over the zero axis 

1783 # https://stackoverflow.com/questions/11972102/is-there-a-way-to-efficiently-invert-an-array-of-matrices-with-numpy 

1784 inv_A1 = np.linalg.inv(acc_V_A1) 

1785 

1786 V_c = ( 

1787 acc_V_A2.reshape( 

1788 (self.ubm.n_gaussians, self.feature_dimension, self.r_V) 

1789 ) 

1790 @ inv_A1 

1791 ) 

1792 self._V = V_c.reshape( 

1793 (self.ubm.n_gaussians * self.feature_dimension, self.r_V) 

1794 ) 

1795 return self._V 

1796 

1797 def finalize_v(self, X, y, n_samples_per_class, n_acc, f_acc): 

1798 """ 

1799 Compute for the last time `E[y]` 

1800 

1801 Parameters 

1802 ---------- 

1803 

1804 X: list of :py:class:`bob.learn.em.GMMStats` 

1805 List of statistics 

1806 

1807 y: list of int 

1808 List of labels 

1809 

1810 n_classes: int 

1811 Number of classes 

1812 

1813 n_acc: array 

1814 Accumulated 0th-order statistics 

1815 

1816 f_acc: array 

1817 Accumulated 1st-order statistics 

1818 

1819 Returns 

1820 ------- 

1821 latent_y: array 

1822 E[y] 

1823 

1824 """ 

1825 

1826 VProd = self._compute_vprod() 

1827 

1828 latent_x, latent_y, latent_z = self.initialize_XYZ( 

1829 n_samples_per_class=n_samples_per_class, like=X[0].n 

1830 ) 

1831 

1832 # UPDATE Y, X AND FINALLY Z 

1833 

1834 n_classes = len(n_samples_per_class) 

1835 latent_y = self.update_y( 

1836 X=X, 

1837 y=y, 

1838 n_classes=n_classes, 

1839 VProd=VProd, 

1840 latent_x=latent_x, 

1841 latent_y=latent_y, 

1842 latent_z=latent_z, 

1843 n_acc=n_acc, 

1844 f_acc=f_acc, 

1845 ) 

1846 return latent_y 

1847 

1848 def e_step_u(self, X, y, n_samples_per_class, latent_y): 

1849 """ 

1850 ISV E-step for the U matrix. 

1851 

1852 Parameters 

1853 ---------- 

1854 

1855 X: list of :py:class:`bob.learn.em.GMMStats` 

1856 List of statistics 

1857 

1858 y: list of int 

1859 List of labels 

1860 

1861 latent_y: array 

1862 E(y) latent variable 

1863 

1864 

1865 Returns 

1866 ---------- 

1867 

1868 acc_U_A1: array 

1869 Accumulated statistics for U_A1(n_gaussians, r_U, r_U) 

1870 

1871 acc_U_A2: array 

1872 Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U) 

1873 

1874 """ 

1875 

1876 # self.initialize_XYZ(y) 

1877 UProd = self._compute_uprod() 

1878 latent_x, _, latent_z = self.initialize_XYZ( 

1879 n_samples_per_class=n_samples_per_class 

1880 ) 

1881 

1882 n_classes = len(n_samples_per_class) 

1883 latent_x = self.compute_latent_x( 

1884 X=X, 

1885 y=y, 

1886 n_classes=n_classes, 

1887 UProd=UProd, 

1888 latent_y=latent_y, 

1889 ) 

1890 

1891 acc_U_A1, acc_U_A2 = self.compute_accumulators_U( 

1892 X=X, 

1893 y=y, 

1894 UProd=UProd, 

1895 latent_x=latent_x, 

1896 latent_y=latent_y, 

1897 latent_z=latent_z, 

1898 ) 

1899 

1900 return acc_U_A1, acc_U_A2 

1901 

1902 def m_step_u(self, acc_U_A1_acc_U_A2_list): 

1903 """ 

1904 `U` Matrix M-step. 

1905 This updates the `U` matrix 

1906 

1907 Parameters 

1908 ---------- 

1909 

1910 acc_V_A1: array 

1911 Accumulated statistics for V_A1(n_gaussians, r_V, r_V) 

1912 

1913 acc_V_A2: array 

1914 Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V) 

1915 

1916 """ 

1917 

1918 acc_U_A1 = [acc[0] for acc in acc_U_A1_acc_U_A2_list] 

1919 acc_U_A2 = [acc[1] for acc in acc_U_A1_acc_U_A2_list] 

1920 

1921 acc_U_A1, acc_U_A2 = reduce_iadd(acc_U_A1, acc_U_A2) 

1922 

1923 return self.update_U(acc_U_A1, acc_U_A2) 

1924 

1925 def finalize_u( 

1926 self, 

1927 X, 

1928 y, 

1929 n_samples_per_class, 

1930 latent_y, 

1931 ): 

1932 """ 

1933 Compute for the last time `E[x]` 

1934 

1935 Parameters 

1936 ---------- 

1937 

1938 X: list of :py:class:`bob.learn.em.GMMStats` 

1939 List of statistics 

1940 

1941 y: list of int 

1942 List of labels 

1943 

1944 n_classes: int 

1945 Number of classes 

1946 

1947 latent_y: array 

1948 E[y] latent variable 

1949 

1950 Returns 

1951 ------- 

1952 latent_x: array 

1953 E[x] 

1954 """ 

1955 

1956 UProd = self._compute_uprod() 

1957 latent_x, _, _ = self.initialize_XYZ( 

1958 n_samples_per_class=n_samples_per_class 

1959 ) 

1960 

1961 n_classes = len(n_samples_per_class) 

1962 latent_x = self.compute_latent_x( 

1963 X=X, 

1964 y=y, 

1965 n_classes=n_classes, 

1966 UProd=UProd, 

1967 latent_y=latent_y, 

1968 ) 

1969 

1970 return latent_x 

1971 

1972 def e_step_d( 

1973 self, X, y, n_samples_per_class, latent_x, latent_y, n_acc, f_acc 

1974 ): 

1975 """ 

1976 ISV E-step for the U matrix. 

1977 

1978 Parameters 

1979 ---------- 

1980 

1981 X: list of :py:class:`bob.learn.em.GMMStats` 

1982 List of statistics 

1983 

1984 y: list of int 

1985 List of labels 

1986 

1987 n_classes: int 

1988 Number of classes 

1989 

1990 latent_x: array 

1991 E(x) latent variable 

1992 

1993 latent_y: array 

1994 E(y) latent variable 

1995 

1996 latent_z: array 

1997 E(z) latent variable 

1998 

1999 n_acc: array 

2000 Accumulated 0th-order statistics 

2001 

2002 f_acc: array 

2003 Accumulated 1st-order statistics 

2004 

2005 

2006 Returns 

2007 ---------- 

2008 

2009 acc_D_A1: array 

2010 Accumulated statistics for D_A1(n_gaussians* feature_dimension, ) 

2011 

2012 acc_D_A2: array 

2013 Accumulated statistics for D_A2(n_gaussians* feature_dimension, ) 

2014 

2015 """ 

2016 

2017 _, _, latent_z = self.initialize_XYZ( 

2018 n_samples_per_class=n_samples_per_class 

2019 ) 

2020 

2021 latent_z = self.update_z( 

2022 X, 

2023 y, 

2024 latent_x=latent_x, 

2025 latent_y=latent_y, 

2026 latent_z=latent_z, 

2027 n_acc=n_acc, 

2028 f_acc=f_acc, 

2029 ) 

2030 

2031 acc_D_A1, acc_D_A2 = self.compute_accumulators_D( 

2032 X, y, latent_x, latent_y, latent_z, n_acc, f_acc 

2033 ) 

2034 

2035 return acc_D_A1, acc_D_A2 

2036 

2037 def m_step_d(self, acc_D_A1_acc_D_A2_list): 

2038 """ 

2039 `D` Matrix M-step. 

2040 This updates the `D` matrix 

2041 

2042 Parameters 

2043 ---------- 

2044 

2045 acc_D_A1: array 

2046 Accumulated statistics for D_A1(n_gaussians* feature_dimension, ) 

2047 

2048 acc_D_A2: array 

2049 Accumulated statistics for D_A2(n_gaussians* feature_dimension, ) 

2050 

2051 """ 

2052 

2053 acc_D_A1 = [acc[0] for acc in acc_D_A1_acc_D_A2_list] 

2054 acc_D_A2 = [acc[1] for acc in acc_D_A1_acc_D_A2_list] 

2055 

2056 acc_D_A1, acc_D_A2 = reduce_iadd(acc_D_A1, acc_D_A2) 

2057 

2058 self._D = acc_D_A2 / acc_D_A1 

2059 return self._D 

2060 

2061 def enroll(self, X): 

2062 """ 

2063 Enrolls a new client. 

2064 In JFA the enrolment is defined as: :math:`m + Vy + Dz` with the latent variables `y` and `z` 

2065 representing the enrolled model. 

2066 

2067 Parameters 

2068 ---------- 

2069 X : list of :py:class:`bob.learn.em.GMMStats` 

2070 List of statistics 

2071 

2072 Returns 

2073 ------- 

2074 self : array 

2075 z, y latent variables 

2076 

2077 """ 

2078 

2079 iterations = self.enroll_iterations 

2080 # We have only one class for enrollment 

2081 y = list(np.zeros(len(X), dtype=np.int32)) 

2082 n_acc = self._sum_n_statistics(X, y=y, n_classes=1) 

2083 f_acc = self._sum_f_statistics(X, y=y, n_classes=1) 

2084 

2085 UProd = self._compute_uprod() 

2086 VProd = self._compute_vprod() 

2087 latent_x, latent_y, latent_z = self.initialize_XYZ( 

2088 n_samples_per_class=[len(X)] 

2089 ) 

2090 

2091 for i in range(iterations): 

2092 logger.info("Enrollment: Iteration %d", i + 1) 

2093 latent_y = self.update_y( 

2094 X=X, 

2095 y=y, 

2096 n_classes=1, 

2097 VProd=VProd, 

2098 latent_x=latent_x, 

2099 latent_y=latent_y, 

2100 latent_z=latent_z, 

2101 n_acc=n_acc, 

2102 f_acc=f_acc, 

2103 ) 

2104 latent_x = self.compute_latent_x( 

2105 X=X, 

2106 y=y, 

2107 n_classes=1, 

2108 UProd=UProd, 

2109 latent_y=latent_y, 

2110 latent_z=latent_z, 

2111 ) 

2112 latent_z = self.update_z( 

2113 X=X, 

2114 y=y, 

2115 latent_x=latent_x, 

2116 latent_y=latent_y, 

2117 latent_z=latent_z, 

2118 n_acc=n_acc, 

2119 f_acc=f_acc, 

2120 ) 

2121 

2122 # The latent variables are wrapped in to 2axis arrays 

2123 return latent_y[0], latent_z[0] 

2124 

2125 def fit(self, X, y): 

2126 """ 

2127 Trains the U matrix (session variability matrix) 

2128 

2129 Parameters 

2130 ---------- 

2131 X : numpy.ndarray 

2132 Nxd features of N GMM statistics 

2133 y : numpy.ndarray 

2134 The input labels, a 1D numpy array of shape (number of samples, ) 

2135 

2136 Returns 

2137 ------- 

2138 self : object 

2139 Returns self. 

2140 

2141 """ 

2142 

2143 if isinstance(X, dask.bag.Bag): 

2144 X, y = self._prepare_dask_input(X, y) 

2145 

2146 ( 

2147 input_is_dask, 

2148 n_classes, 

2149 n_samples_per_class, 

2150 ) = check_dask_input_samples_per_class(X, y) 

2151 

2152 n_acc, f_acc = self.initialize(X, y, n_classes=n_classes) 

2153 

2154 # Updating V 

2155 for i in range(self.em_iterations): 

2156 logger.info("V Training: Iteration %d", i + 1) 

2157 if input_is_dask: 

2158 e_step_output = [ 

2159 dask.delayed(self.e_step_v)( 

2160 X=xx, 

2161 y=yy, 

2162 n_samples_per_class=n_samples_per_class, 

2163 n_acc=n_acc, 

2164 f_acc=f_acc, 

2165 ) 

2166 for xx, yy in zip(X, y) 

2167 ] 

2168 delayed_em_step = dask.delayed(self.m_step_v)(e_step_output) 

2169 self._V = dask.compute(delayed_em_step)[0] 

2170 else: 

2171 e_step_output = self.e_step_v( 

2172 X=X, 

2173 y=y, 

2174 n_samples_per_class=n_samples_per_class, 

2175 n_acc=n_acc, 

2176 f_acc=f_acc, 

2177 ) 

2178 self.m_step_v([e_step_output]) 

2179 latent_y = self.finalize_v( 

2180 X=X, 

2181 y=y, 

2182 n_samples_per_class=n_samples_per_class, 

2183 n_acc=n_acc, 

2184 f_acc=f_acc, 

2185 ) 

2186 

2187 # Updating U 

2188 for i in range(self.em_iterations): 

2189 logger.info("U Training: Iteration %d", i + 1) 

2190 if input_is_dask: 

2191 e_step_output = [ 

2192 dask.delayed(self.e_step_u)( 

2193 X=xx, 

2194 y=yy, 

2195 n_samples_per_class=n_samples_per_class, 

2196 latent_y=latent_y, 

2197 ) 

2198 for xx, yy in zip(X, y) 

2199 ] 

2200 delayed_em_step = dask.delayed(self.m_step_u)(e_step_output) 

2201 self._U = dask.compute(delayed_em_step)[0] 

2202 else: 

2203 e_step_output = self.e_step_u( 

2204 X=X, 

2205 y=y, 

2206 n_samples_per_class=n_samples_per_class, 

2207 latent_y=latent_y, 

2208 ) 

2209 self.m_step_u([e_step_output]) 

2210 

2211 latent_x = self.finalize_u( 

2212 X=X, y=y, n_samples_per_class=n_samples_per_class, latent_y=latent_y 

2213 ) 

2214 

2215 # Updating D 

2216 for i in range(self.em_iterations): 

2217 logger.info("D Training: Iteration %d", i + 1) 

2218 if input_is_dask: 

2219 e_step_output = [ 

2220 dask.delayed(self.e_step_d)( 

2221 X=xx, 

2222 y=yy, 

2223 n_samples_per_class=n_samples_per_class, 

2224 latent_x=latent_x, 

2225 latent_y=latent_y, 

2226 n_acc=n_acc, 

2227 f_acc=f_acc, 

2228 ) 

2229 for xx, yy in zip(X, y) 

2230 ] 

2231 delayed_em_step = dask.delayed(self.m_step_d)(e_step_output) 

2232 self._D = dask.compute(delayed_em_step)[0] 

2233 else: 

2234 e_step_output = self.e_step_d( 

2235 X=X, 

2236 y=y, 

2237 n_samples_per_class=n_samples_per_class, 

2238 latent_x=latent_x, 

2239 latent_y=latent_y, 

2240 n_acc=n_acc, 

2241 f_acc=f_acc, 

2242 ) 

2243 self.m_step_d([e_step_output]) 

2244 

2245 return self 

2246 

2247 def score(self, model, data): 

2248 """ 

2249 Computes the JFA score 

2250 

2251 Parameters 

2252 ---------- 

2253 latent_z : numpy.ndarray 

2254 Latent representation of the client (E[z_i]) 

2255 

2256 data : list of :py:class:`bob.learn.em.GMMStats` 

2257 List of statistics to be scored 

2258 

2259 Returns 

2260 ------- 

2261 score : float 

2262 The linear scored 

2263 

2264 """ 

2265 

2266 latent_y = model[0] 

2267 latent_z = model[1] 

2268 

2269 x = self.estimate_x(data) 

2270 Ux = self._U @ x 

2271 

2272 # TODO: I don't know why this is not the enrolled model 

2273 # Here I am just reproducing the C++ implementation 

2274 # m + Vy + Dz 

2275 zy = self.V @ latent_y + self.D * latent_z + self.mean_supervector 

2276 

2277 # Support a probe template and a list of probe templates. 

2278 if isinstance(data[0], GMMStats) and len(data) > 1: 

2279 data_sum = sum(data[1:], start=data[0]) 

2280 elif not isinstance(data[0], GMMStats): 

2281 data_sum = [] 

2282 for d in data: 

2283 if len(d) > 1: 

2284 data_sum.append(sum(d[1:], start=d[0])) 

2285 else: 

2286 data_sum.append(d[0]) 

2287 else: 

2288 data_sum = data 

2289 

2290 return linear_scoring( 

2291 zy.reshape((self.ubm.n_gaussians, self.feature_dimension)), 

2292 self.ubm, 

2293 data_sum, 

2294 Ux.reshape((self.ubm.n_gaussians, self.feature_dimension)), 

2295 frame_length_normalization=True, 

2296 )[0][0]