Coverage for src/deepdraw/engine/significance.py: 12%

201 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-11-30 15:00 +0100

1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> 

2# 

3# SPDX-License-Identifier: GPL-3.0-or-later 

4 

5import logging 

6import multiprocessing 

7import os 

8import textwrap 

9 

10import h5py 

11import numpy 

12import scipy.stats 

13import tabulate 

14import torch.nn 

15 

16from tqdm import tqdm 

17 

18from .evaluator import sample_measures_for_threshold 

19 

20logger = logging.getLogger(__name__) 

21 

22PERFORMANCE_FIGURES = [ 

23 "precision", 

24 "recall", 

25 "specificity", 

26 "accuracy", 

27 "jaccard", 

28 "f1_score", 

29] 

30"""List of performance figures supported by this module, in order.""" 

31 

32 

33def _performance_summary(size, winperf, winsize, winstride, figure): 

34 """Generates an array that represents the performance per pixel of the 

35 original image. 

36 

37 The returned array corresponds to a stacked version of performances for 

38 each pixel in the original image taking into consideration the sliding 

39 window performances, their size and stride. 

40 

41 

42 Parameters 

43 ========== 

44 

45 size : tuple 

46 A two tuple with the original height and width of the image being 

47 analyzed 

48 

49 winperf : numpy.ndarray 

50 A 3D array with shape ``(N, H, W)``, where ``N`` represents the number 

51 of performance measures supported by this module, ``(H,W)`` is the 

52 total number of vertical and horizontal sliding windows. 

53 

54 winsize : tuple 

55 A two tuple that indicates the size of the sliding window (height, 

56 width) 

57 

58 winstride : tuple 

59 A two tuple that indicates the stride of the sliding window (height, 

60 width) 

61 

62 figure : str 

63 Name of the performance figure to use for the summary 

64 

65 

66 Returns 

67 ======= 

68 

69 n : numpy.ndarray 

70 A 2D numpy array containing the number of performance scores for every 

71 pixel in the original image 

72 

73 avg : numpy.ndarray 

74 A 2D numpy array containing the average performances for every pixel on 

75 the input image considering the sliding window sizes and strides 

76 applied to the image 

77 

78 std : numpy.ndarray 

79 A 2D numpy array containing the (unbiased) standard deviations for the 

80 provided performance figure, for every pixel on the input image 

81 considering the sliding window sizes and strides applied to the image 

82 """ 

83 

84 # first, estimate how many overlaps we have per pixel in the original image 

85 

86 # we calculate the required padding so that the last windows on the left 

87 # and bottom size of predictions/ground-truth data are zero padded, and 

88 # torch unfolding works exactly. The last windows on the left and bottom 

89 # parts of the image may be extended with zeros. 

90 final_size = list(size) 

91 rem = (size[0] - winsize[0]) % winstride[0] 

92 if rem != 0: 

93 final_size[0] += winstride[0] - rem 

94 rem = (size[1] - winsize[1]) % winstride[1] 

95 if rem != 0: 

96 final_size[1] += winstride[1] - rem 

97 n = numpy.zeros(final_size, dtype=int) 

98 

99 # calculates the stacked performance 

100 layers = int( 

101 numpy.ceil(winsize[0] / winstride[0]) 

102 * numpy.ceil(winsize[1] / winstride[1]) 

103 ) 

104 # figindex = PERFORMANCE_FIGURES.index(figure) 

105 perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype) 

106 n = -1 * numpy.ones(final_size, dtype=int) 

107 data = winperf[PERFORMANCE_FIGURES.index(figure)] 

108 for j in range(data.shape[0]): 

109 yup = slice(winstride[0] * j, (winstride[0] * j) + winsize[0], 1) 

110 for i in range(data.shape[1]): 

111 xup = slice(winstride[1] * i, (winstride[1] * i) + winsize[1], 1) 

112 nup = n[yup, xup] 

113 nup += 1 

114 yr, xr = numpy.meshgrid( 

115 range(yup.start, yup.stop, yup.step), 

116 range(xup.start, xup.stop, xup.step), 

117 indexing="ij", 

118 ) 

119 perf[nup.flat, yr.flat, xr.flat] = data[j, i] 

120 

121 # for each element in the ``perf``matrix, calculates avg and std. 

122 n += 1 # adjust for starting at -1 before 

123 avg = perf.sum(axis=0) / n 

124 # calculate variances 

125 std = numpy.zeros_like(avg) 

126 for k in range(2, n.max() + 1): 

127 std[n == k] = perf[:k].std(axis=0, ddof=1)[n == k] 

128 

129 # returns only valid bounds wrt to the original image 

130 return ( 

131 n[: size[0], : size[1]], 

132 avg[: size[0], : size[1]], 

133 std[: size[0], : size[1]], 

134 ) 

135 

136 

137def _winperf_measures(pred, gt, mask, threshold, size, stride): 

138 """Calculates measures on sliding windows of a single sample. 

139 

140 Parameters 

141 ---------- 

142 

143 pred : torch.Tensor 

144 pixel-wise predictions 

145 

146 gt : torch.Tensor 

147 ground-truth (annotations) 

148 

149 mask : torch.Tensor 

150 mask for the region of interest 

151 

152 threshold : float 

153 threshold to use for evaluating individual sliding window performances 

154 

155 size : tuple 

156 size (vertical, horizontal) for windows for which we will calculate 

157 partial performances based on the threshold and existing ground-truth 

158 

159 stride : tuple 

160 strides (vertical, horizontal) for windows for which we will calculate 

161 partial performances based on the threshold and existing ground-truth 

162 

163 

164 Returns 

165 ------- 

166 

167 measures : numpy.ndarray 

168 

169 A 3D float array with all supported performance entries for each 

170 sliding window. 

171 

172 The first dimension of the array is therefore 6. The other two 

173 dimensions correspond to resulting size of the sliding window operation 

174 applied to the input data and taking into consideration the sliding 

175 window size and the stride. 

176 """ 

177 

178 # we calculate the required padding so that the last windows on the left 

179 # and bottom size of predictions/ground-truth data are zero padded, and 

180 # torch unfolding works exactly. The last windows on the left and bottom 

181 # parts of the image may be extended with zeros. 

182 padding = (0, 0) 

183 rem = (pred.shape[1] - size[1]) % stride[1] 

184 if rem != 0: 

185 padding = (0, (stride[1] - rem)) 

186 rem = (pred.shape[0] - size[0]) % stride[0] 

187 if rem != 0: 

188 padding += (0, (stride[0] - rem)) 

189 

190 pred_padded = torch.nn.functional.pad( 

191 pred, padding, mode="constant", value=0.0 

192 ) 

193 gt_padded = torch.nn.functional.pad( 

194 gt.squeeze(0), padding, mode="constant", value=0.0 

195 ) 

196 mask_padded = torch.nn.functional.pad( 

197 mask.squeeze(0), padding, mode="constant", value=1.0 

198 ) 

199 

200 # this will create as many views as required 

201 pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold( 

202 1, size[1], stride[1] 

203 ) 

204 gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold( 

205 1, size[1], stride[1] 

206 ) 

207 mask_windows = mask_padded.unfold(0, size[0], stride[0]).unfold( 

208 1, size[1], stride[1] 

209 ) 

210 assert pred_windows.shape == gt_windows.shape 

211 assert gt_windows.shape == mask_windows.shape 

212 ylen, xlen, _, _ = pred_windows.shape 

213 

214 retval = numpy.array( 

215 [ 

216 sample_measures_for_threshold( 

217 pred_windows[j, i, :, :], 

218 gt_windows[j, i, :, :], 

219 mask_windows[j, i, :, :], 

220 threshold, 

221 ) 

222 for j in range(ylen) 

223 for i in range(xlen) 

224 ] 

225 ) 

226 return retval.transpose(1, 0).reshape(6, ylen, xlen) 

227 

228 

229def _visual_dataset_performance(stem, img, n, avg, std, outdir): 

230 """Runs a visual performance assessment for each entry in a given dataset. 

231 

232 Parameters 

233 ---------- 

234 

235 stem : str 

236 The input file stem, for which a figure will be saved in ``outdir``, 

237 in PDF format 

238 

239 img : pytorch.Tensor 

240 A 3D tensor containing the original image that was analyzed 

241 

242 n : numpy.ndarray 

243 A 2D integer array with the same size as `img` that indicates how many 

244 overlapping windows are available for each pixel in the image 

245 

246 avg : numpy.ndarray 

247 A 2D floating-point array with the average performance per pixel 

248 calculated over all overlapping windows for that particular pixel 

249 

250 std : numpy.ndarray 

251 A 2D floating-point array with the unbiased standard-deviation 

252 (``ddof=1``) performance per pixel calculated over all overlapping 

253 windows for that particular pixel 

254 

255 outdir : str 

256 The base directory where to save output PDF images generated by this 

257 procedure. The value of ``stem`` will be suffixed to this output 

258 directory using a standard path join. The output filename will have a 

259 ``.pdf`` extension. 

260 """ 

261 

262 import matplotlib.pyplot as plt 

263 

264 figsize = img.shape[1:] 

265 

266 fig, axs = plt.subplots(2, 2) 

267 axs[0, 0].imshow(img.numpy().transpose(1, 2, 0)) 

268 axs[0, 0].set_title("Original image") 

269 

270 pos01 = axs[0, 1].imshow( 

271 n[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none" 

272 ) 

273 fig.colorbar(pos01, ax=axs[0, 1], orientation="vertical") 

274 axs[0, 1].set_title("# Overlapping Windows") 

275 

276 pos10 = axs[1, 0].imshow( 

277 avg[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none" 

278 ) 

279 fig.colorbar(pos10, ax=axs[1, 0], orientation="vertical") 

280 axs[1, 0].set_title("Average Performance") 

281 

282 pos11 = axs[1, 1].imshow( 

283 std[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none" 

284 ) 

285 fig.colorbar(pos11, ax=axs[1, 1], orientation="vertical") 

286 axs[1, 1].set_title("Performance Std. Dev.") 

287 

288 plt.tight_layout() 

289 

290 fname = os.path.join(outdir, stem + ".pdf") 

291 os.makedirs(os.path.dirname(fname), exist_ok=True) 

292 fig.savefig(fname) 

293 plt.close(fig) 

294 

295 

296def _winperf_for_sample( 

297 basedir, 

298 threshold, 

299 size, 

300 stride, 

301 dataset, 

302 k, 

303 figure, 

304 outdir, 

305): 

306 """Evaluates sliding window performances per sample. 

307 

308 Parameters 

309 ---------- 

310 

311 basedir : str 

312 folder where predictions for the dataset images has been previously 

313 stored 

314 

315 threshold : :py:class:`float` 

316 this should be a threshold (floating point) to apply to prediction maps 

317 to decide on positives and negatives. 

318 

319 size : tuple 

320 size (vertical, horizontal) for windows for which we will calculate 

321 partial performances based on the threshold and existing ground-truth 

322 

323 stride : tuple 

324 strides (vertical, horizontal) for windows for which we will calculate 

325 partial performances based on the threshold and existing ground-truth 

326 

327 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` 

328 datasets to iterate on 

329 

330 k : int 

331 the sample number (order inside the dataset, starting from zero), to 

332 calculate sliding window performances for 

333 

334 figure : str 

335 the performance figure to use for calculating sliding window micro 

336 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be 

337 a supported performance figure as defined in 

338 :py:attr:`PERFORMANCE_FIGURES` 

339 

340 outdir : str 

341 path were to save a visual representation of sliding window 

342 performances. If set to ``None``, then do not save those to disk. 

343 

344 

345 Returns 

346 ------- 

347 

348 stem : str 

349 The input file stem, that was just analyzed 

350 

351 data : dict 

352 A dictionary containing the following fields: 

353 

354 * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window 

355 performance figures 

356 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as 

357 the original image pertaining to the analyzed sample, that indicates 

358 how many overlapping windows are available for each pixel in the 

359 image 

360 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the 

361 average performance per pixel calculated over all overlapping windows 

362 for that particular pixel 

363 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the 

364 unbiased standard-deviation (``ddof=1``) performance per pixel 

365 calculated over all overlapping windows for that particular pixel 

366 """ 

367 

368 sample = dataset[k] 

369 with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f: 

370 pred = torch.from_numpy(f["array"][:]) 

371 mask = None if len(sample) < 4 else sample[3] 

372 winperf = _winperf_measures(pred, sample[2], mask, threshold, size, stride) 

373 n, avg, std = _performance_summary( 

374 sample[1].shape[1:], winperf, size, stride, figure 

375 ) 

376 if outdir is not None: 

377 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir) 

378 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std) 

379 

380 

381def sliding_window_performances( 

382 dataset, 

383 name, 

384 predictions_folder, 

385 threshold, 

386 size, 

387 stride, 

388 figure, 

389 nproc=1, 

390 outdir=None, 

391): 

392 """Evaluates sliding window performances for a whole dataset. 

393 

394 Parameters 

395 --------- 

396 

397 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` 

398 datasets to iterate on 

399 

400 name : str 

401 the local name of this dataset (e.g. ``train``, or ``test``), to be 

402 used when saving measures files. 

403 

404 predictions_folder : str 

405 folder where predictions for the dataset images has been previously 

406 stored 

407 

408 threshold : :py:class:`float` 

409 this should be a threshold (floating point) to apply to prediction maps 

410 to decide on positives and negatives. 

411 

412 size : tuple 

413 size (vertical, horizontal) for windows for which we will calculate 

414 partial performances based on the threshold and existing ground-truth 

415 

416 stride : tuple 

417 strides (vertical, horizontal) for windows for which we will calculate 

418 partial performances based on the threshold and existing ground-truth 

419 

420 figure : str 

421 the performance figure to use for calculating sliding window micro 

422 performances 

423 

424 nproc : :py:class:`int`, Optional 

425 the number of processing cores to use for performance evaluation. 

426 Setting this number to a value ``<= 0`` will cause the function to use 

427 all available computing cores. Otherwise, limit to the number of cores 

428 specified. If set to the value of ``1``, then do not use 

429 multiprocessing. 

430 

431 outdir : :py:class:`str`, Optional 

432 path were to save a visual representation of sliding window 

433 performances. If set to ``None``, then do not save those to disk. 

434 

435 

436 Returns 

437 ------- 

438 

439 d : dict 

440 A dictionary in which keys are filename stems and values are 

441 dictionaries with the following contents: 

442 

443 ``winperf``: numpy.ndarray 

444 

445 ``n`` : numpy.ndarray 

446 A 2D numpy array containing the number of performance scores for 

447 every pixel in the original image 

448 

449 ``avg`` : :py:class:`numpy.ndarray` 

450 A 2D numpy array containing the average performances for every 

451 pixel on the input image considering the sliding window sizes and 

452 strides applied to the image 

453 

454 ``std`` : :py:class:`numpy.ndarray` 

455 A 2D numpy array containing the (unbiased) standard deviations for 

456 the provided performance figure, for every pixel on the input image 

457 considering the sliding window sizes and strides applied to the 

458 image 

459 """ 

460 

461 use_predictions_folder = os.path.join(predictions_folder, name) 

462 if not os.path.exists(use_predictions_folder): 

463 use_predictions_folder = predictions_folder 

464 

465 with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar: 

466 # we avoid the multiprocessing module if nproc==1 

467 # so it is easier to run ipdb 

468 if nproc != 1: 

469 if nproc <= 0: 

470 nproc = multiprocessing.cpu_count() 

471 pool = multiprocessing.Pool(nproc) 

472 results = [ 

473 pool.apply_async( 

474 _winperf_for_sample, 

475 args=( 

476 use_predictions_folder, 

477 threshold, 

478 size, 

479 stride, 

480 dataset[name], 

481 k, 

482 figure, 

483 outdir, 

484 ), 

485 callback=lambda x: pbar.update(), 

486 ) 

487 for k in range(len(dataset[name])) 

488 ] 

489 pool.close() 

490 pool.join() 

491 data = [k.get() for k in results] 

492 else: 

493 data = [] 

494 for k in pbar: 

495 winperf = _winperf_for_sample( 

496 use_predictions_folder, 

497 threshold, 

498 size, 

499 stride, 

500 dataset[name], 

501 k, 

502 figure, 

503 outdir, 

504 ) 

505 data.append(winperf) 

506 

507 return dict(data) 

508 

509 

510def _visual_performances_for_sample( 

511 size, stride, dataset, k, winperf, figure, outdir 

512): 

513 """Displays sliding windows performances per sample. 

514 

515 This is a simplified version of :py:func:`_winper_for_sample` 

516 in which the sliding window performances are not recalculated and used as 

517 input. It can be used in case you have the sliding window performances 

518 stored in disk or if you're evaluating differences between sliding windows 

519 of 2 different systems. 

520 

521 

522 Parameters 

523 ---------- 

524 

525 size : tuple 

526 size (vertical, horizontal) for windows for which we will calculate 

527 partial performances based on the threshold and existing ground-truth 

528 

529 stride : tuple 

530 strides (vertical, horizontal) for windows for which we will calculate 

531 partial performances based on the threshold and existing ground-truth 

532 

533 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` 

534 datasets to iterate on 

535 

536 k : int 

537 the sample number (order inside the dataset, starting from zero), to 

538 calculate sliding window performances for 

539 

540 winperf : numpy.ndarray 

541 the previously calculated sliding window performances to use for this 

542 assessment. 

543 

544 figure : str 

545 the performance figure to use for calculating sliding window micro 

546 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be 

547 a supported performance figure as defined in 

548 :py:attr:`PERFORMANCE_FIGURES` 

549 

550 outdir : :py:class:`str` 

551 path were to save a visual representation of sliding window 

552 performances. If set to ``None``, then do not save those to disk. 

553 

554 

555 Returns 

556 ------- 

557 

558 stem : str 

559 The input file stem, that was just analyzed 

560 

561 data : dict 

562 A dictionary containing the following fields: 

563 

564 * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding 

565 window performance figures. Notice this is just a copy of the input 

566 sliding window performance figures with the same name. 

567 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as 

568 the original image pertaining to the analyzed sample, that indicates 

569 how many overlapping windows are available for each pixel in the 

570 image 

571 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the 

572 average performance per pixel calculated over all overlapping windows 

573 for that particular pixel 

574 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the 

575 unbiased standard-deviation (``ddof=1``) performance per pixel 

576 calculated over all overlapping windows for that particular pixel 

577 """ 

578 

579 sample = dataset[k] 

580 n, avg, std = _performance_summary( 

581 sample[1].shape[1:], winperf, size, stride, figure 

582 ) 

583 if outdir is not None: 

584 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir) 

585 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std) 

586 

587 

588def visual_performances( 

589 dataset, 

590 name, 

591 winperfs, 

592 size, 

593 stride, 

594 figure, 

595 nproc=1, 

596 outdir=None, 

597): 

598 """Displays the performances for for a whole dataset. 

599 

600 This is a simplified version of :py:func:`sliding_window_performances` in 

601 which the sliding window performances are not recalculated and used as 

602 input. It can be used in case you have the sliding window performances 

603 stored in disk or if you're evaluating differences between sliding windows 

604 of 2 different systems. 

605 

606 

607 Parameters 

608 --------- 

609 

610 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` 

611 datasets to iterate on 

612 

613 name : str 

614 the local name of this dataset (e.g. ``train``, or ``test``), to be 

615 used when saving measures files. 

616 

617 winperfs : dict 

618 a dictionary mapping dataset stems to arrays containing the sliding 

619 window performances to be evaluated 

620 

621 size : tuple 

622 size (vertical, horizontal) for windows for which we will calculate 

623 partial performances based on the threshold and existing ground-truth 

624 

625 stride : tuple 

626 strides (vertical, horizontal) for windows for which we will calculate 

627 partial performances based on the threshold and existing ground-truth 

628 

629 figure : str 

630 the performance figure to use for calculating sliding window micro 

631 performances 

632 

633 nproc : :py:class:`int`, Optional 

634 the number of processing cores to use for performance evaluation. 

635 Setting this number to a value ``<= 0`` will cause the function to use 

636 all available computing cores. Otherwise, limit to the number of cores 

637 specified. If set to the value of ``1``, then do not use 

638 multiprocessing. 

639 

640 outdir : :py:class:`str`, Optional 

641 path were to save a visual representation of sliding window 

642 performances. If set to ``None``, then do not save those to disk. 

643 

644 

645 Returns 

646 ------- 

647 

648 d : dict 

649 A dictionary in which keys are filename stems and values are 

650 dictionaries with the following contents: 

651 

652 ``winperf``: numpy.ndarray 

653 A 3D float array with all the sliding window performances for the 

654 input image 

655 

656 ``n`` : numpy.ndarray 

657 A 2D numpy array containing the number of performance scores for 

658 every pixel in the original image 

659 

660 ``avg`` : numpy.ndarray 

661 A 2D numpy array containing the average performances for every 

662 pixel on the input image considering the sliding window sizes and 

663 strides applied to the image 

664 

665 ``std`` : :py:class:`numpy.ndarray` 

666 A 2D numpy array containing the (unbiased) standard deviations for 

667 the provided performance figure, for every pixel on the input image 

668 considering the sliding window sizes and strides applied to the 

669 image 

670 """ 

671 

672 stems = list(dataset[name].keys()) 

673 

674 with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar: 

675 # we avoid the multiprocessing module if nproc==1 

676 # so it is easier to run ipdb 

677 if nproc != 1: 

678 if nproc <= 0: 

679 nproc = multiprocessing.cpu_count() 

680 pool = multiprocessing.Pool(nproc) 

681 results = [ 

682 pool.apply_async( 

683 _visual_performances_for_sample, 

684 args=( 

685 size, 

686 stride, 

687 dataset[name], 

688 k, 

689 winperfs[stems[k]], 

690 figure, 

691 outdir, 

692 ), 

693 callback=lambda x: pbar.update(), 

694 ) 

695 for k in range(len(dataset[name])) 

696 ] 

697 pool.close() 

698 pool.join() 

699 data = [k.get() for k in results] 

700 else: 

701 data = [] 

702 for k in pbar: 

703 winperf = _visual_performances_for_sample( 

704 size, 

705 stride, 

706 dataset[name], 

707 k, 

708 winperfs[stems[k]], 

709 figure, 

710 outdir, 

711 ) 

712 data.append(winperf) 

713 

714 return dict(data) 

715 

716 

717def index_of_outliers(c): 

718 """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values. 

719 

720 The IQR measures the midspread or where 50% of a normal distribution would 

721 sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a 

722 symmetrical range that would encompass most of the data, characterizing 

723 outliers (outside of that range). Check out `this Wikipedia page 

724 <https://en.wikipedia.org/wiki/Interquartile_range>` for more details. 

725 

726 

727 Parameters 

728 ---------- 

729 

730 c : numpy.ndarray 

731 A 1D float array 

732 

733 

734 Returns 

735 ------- 

736 

737 indexes : numpy.ndarray 

738 Indexes of the input column that are considered outliers in the 

739 distribution (outside the 1.5 Interquartile Range). 

740 """ 

741 

742 l, h = numpy.quantile(c, (0.25, 0.75)) 

743 iqr = h - l 

744 limits = (l - 1.5 * iqr, h + 1.5 * iqr) 

745 return (c < limits[0]) | (c > limits[1]) 

746 

747 

748def write_analysis_text(names, da, db, f): 

749 """Writes a text file containing the most important statistics. 

750 

751 Compares sliding window performances in ``da`` and ``db`` taking into 

752 consideration their statistical properties. A significance test is applied 

753 to check whether observed differences in the statistics of both 

754 distributions is significant. 

755 

756 

757 Parameters 

758 ========== 

759 

760 names : tuple 

761 A tuple containing two strings which are the names of the systems being 

762 analyzed 

763 

764 da : numpy.ndarray 

765 A 1D numpy array containing all the performance figures per sliding 

766 window analyzed and organized in a particular order (raster), for the 

767 first system (first entry of ``names``) 

768 

769 db : numpy.ndarray 

770 A 1D numpy array containing all the performance figures per sliding 

771 window analyzed and organized in a particular order (raster), for the 

772 second system (second entry of ``names``) 

773 

774 f : file 

775 An open file that will be used dump the analysis to 

776 """ 

777 

778 diff = da - db 

779 f.write("Basic statistics from distributions:\n") 

780 

781 headers = [ 

782 "system", 

783 "samples", 

784 "median", 

785 "average", 

786 "std.dev.", 

787 "normaltest (p)", 

788 ] 

789 table = [ 

790 [ 

791 names[0], 

792 len(da), 

793 numpy.median(da), 

794 numpy.mean(da), 

795 numpy.std(da, ddof=1), 

796 scipy.stats.normaltest(da)[1], 

797 ], 

798 [ 

799 names[1], 

800 len(db), 

801 numpy.median(db), 

802 numpy.mean(db), 

803 numpy.std(db, ddof=1), 

804 scipy.stats.normaltest(db)[1], 

805 ], 

806 [ 

807 "differences", 

808 len(diff), 

809 numpy.median(diff), 

810 numpy.mean(diff), 

811 numpy.std(diff, ddof=1), 

812 scipy.stats.normaltest(diff)[1], 

813 ], 

814 ] 

815 tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f") 

816 f.write(textwrap.indent(tdata, " ")) 

817 f.write("\n") 

818 

819 # Note: dependent variable = sliding window performance figure in our case 

820 # Assumptions of a Paired T-test: 

821 # * The dependent variable must be continuous (interval/ratio). [OK] 

822 # * The observations are independent of one another. [OK] 

823 # * The dependent variable should be approximately normally distributed. [!!!] 

824 # * The dependent variable should not contain any outliers. [OK] 

825 

826 if (diff == 0.0).all(): 

827 logger.error( 

828 "Differences are exactly zero between both " 

829 "sliding window distributions, for **all** samples. " 

830 "Statistical significance tests are not meaningful in " 

831 "this context and will be skipped. This typically " 

832 "indicates an issue with the setup of prediction folders " 

833 "(duplicated?)" 

834 ) 

835 return 

836 

837 f.write("\nPaired significance tests:\n") 

838 w, p = scipy.stats.ttest_rel(da, db) 

839 f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n") 

840 

841 f.write(" * Wilcoxon:\n") 

842 

843 w, p = scipy.stats.wilcoxon(diff) 

844 f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n") 

845 

846 w, p = scipy.stats.wilcoxon(diff, alternative="greater") 

847 f.write( 

848 f" * H0 = med({names[0]}) < med({names[1]}): " 

849 f"W = {w:g}, p = {p:.5f}\n" 

850 ) 

851 

852 w, p = scipy.stats.wilcoxon(diff, alternative="less") 

853 f.write( 

854 f" * H0 = med({names[0]}) > med({names[1]}): " 

855 f"W = {w:g}, p = {p:.5f}\n" 

856 ) 

857 

858 

859def write_analysis_figures(names, da, db, fname): 

860 """Writes a PDF containing most important plots for analysis. 

861 

862 Parameters 

863 ========== 

864 

865 names : tuple 

866 A tuple containing two strings which are the names of the systems being 

867 analyzed 

868 

869 da : numpy.ndarray 

870 A 1D numpy array containing all the performance figures per sliding 

871 window analyzed and organized in a particular order (raster), for the 

872 first system (first entry of ``names``) 

873 

874 db : numpy.ndarray 

875 A 1D numpy array containing all the performance figures per sliding 

876 window analyzed and organized in a particular order (raster), for the 

877 second system (second entry of ``names``) 

878 

879 fname : str 

880 The filename to use for storing the summarized performance figures 

881 """ 

882 

883 import matplotlib.pyplot as plt 

884 

885 from matplotlib.backends.backend_pdf import PdfPages 

886 

887 diff = da - db 

888 bins = 50 

889 

890 with PdfPages(fname) as pdf: 

891 fig = plt.figure() 

892 plt.grid() 

893 plt.hist(da, bins=bins) 

894 plt.title( 

895 f"{names[0]} - (N={len(da)}; M={numpy.median(da):.3f}; " 

896 f"$\\mu$={numpy.mean(da):.3f}; $\\sigma$={numpy.std(da, ddof=1):.3f})" 

897 ) 

898 pdf.savefig() 

899 plt.close(fig) 

900 

901 fig = plt.figure() 

902 plt.grid() 

903 plt.hist(db, bins=bins) 

904 plt.title( 

905 f"{names[1]} - (N={len(db)}; M={numpy.median(db):.3f}; " 

906 f"$\\mu$={numpy.mean(db):.3f}; $\\sigma$={numpy.std(db, ddof=1):.3f})" 

907 ) 

908 pdf.savefig() 

909 plt.close(fig) 

910 

911 fig = plt.figure() 

912 plt.grid() 

913 plt.boxplot([da, db], labels=names) 

914 plt.title(f"Boxplots (N={len(da)})") 

915 pdf.savefig() 

916 plt.close(fig) 

917 

918 fig = plt.figure() 

919 plt.grid() 

920 plt.boxplot(diff, labels=(f"{names[0]} - {names[1]}",)) 

921 plt.title(f"Paired Differences (N={len(da)})") 

922 pdf.savefig() 

923 plt.close(fig) 

924 

925 fig = plt.figure() 

926 plt.grid() 

927 plt.hist(diff, bins=bins) 

928 plt.title( 

929 f"Paired Differences " 

930 f"(N={len(diff)}; M={numpy.median(diff):.3f}; " 

931 f"$\\mu$={numpy.mean(diff):.3f}; " 

932 f"$\\sigma$={numpy.std(diff, ddof=1):.3f})" 

933 ) 

934 pdf.savefig() 

935 plt.close(fig) 

936 

937 p = scipy.stats.pearsonr(da, db) 

938 fig = plt.figure() 

939 plt.grid() 

940 plt.scatter(da, db, marker=".", color="black") 

941 plt.xlabel(f"{names[0]}") 

942 plt.ylabel(f"{names[1]}") 

943 plt.title(f"Scatter (p={p[0]:.3f})") 

944 pdf.savefig() 

945 plt.close(fig)