Coverage for /scratch/builds/bob/bob.ip.binseg/miniconda/conda-bld/bob.ip.binseg_1635977648782/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_p/lib/python3.8/site-packages/bob/ip/binseg/engine/significance.py : 12%

200 statements

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1#!/usr/bin/env python

2# coding=utf-8

4import logging

5import multiprocessing

6import os

7import textwrap

9import h5py

10import numpy

11import scipy.stats

12import tabulate

13import torch.nn

15from tqdm import tqdm

17from .evaluator import sample_measures_for_threshold

19logger = logging.getLogger(__name__)

21PERFORMANCE_FIGURES = [

22 "precision",

23 "recall",

24 "specificity",

25 "accuracy",

26 "jaccard",

27 "f1_score",

28]

29"""List of performance figures supported by this module, in order"""

32def _performance_summary(size, winperf, winsize, winstride, figure):

33 """Generates an array that represents the performance per pixel of the

34 original image

36 The returned array corresponds to a stacked version of performances for

37 each pixel in the original image taking into consideration the sliding

38 window performances, their size and stride.

41 Parameters

42 ==========

44 size : tuple

45 A two tuple with the original height and width of the image being

46 analyzed

48 winperf : numpy.ndarray

49 A 3D array with shape ``(N, H, W)``, where ``N`` represents the number

50 of performance measures supported by this module, ``(H,W)`` is the

51 total number of vertical and horizontal sliding windows.

53 winsize : tuple

54 A two tuple that indicates the size of the sliding window (height,

55 width)

57 winstride : tuple

58 A two tuple that indicates the stride of the sliding window (height,

59 width)

61 figure : str

62 Name of the performance figure to use for the summary

65 Returns

66 =======

68 n : numpy.ndarray

69 A 2D numpy array containing the number of performance scores for every

70 pixel in the original image

72 avg : numpy.ndarray

73 A 2D numpy array containing the average performances for every pixel on

74 the input image considering the sliding window sizes and strides

75 applied to the image

77 std : numpy.ndarray

78 A 2D numpy array containing the (unbiased) standard deviations for the

79 provided performance figure, for every pixel on the input image

80 considering the sliding window sizes and strides applied to the image

82 """

84 # first, estimate how many overlaps we have per pixel in the original image

86 # we calculate the required padding so that the last windows on the left

87 # and bottom size of predictions/ground-truth data are zero padded, and

88 # torch unfolding works exactly. The last windows on the left and bottom

89 # parts of the image may be extended with zeros.

90 final_size = list(size)

91 rem = (size[0] - winsize[0]) % winstride[0]

92 if rem != 0:

93 final_size[0] += winstride[0] - rem

94 rem = (size[1] - winsize[1]) % winstride[1]

95 if rem != 0:

96 final_size[1] += winstride[1] - rem

97 n = numpy.zeros(final_size, dtype=int)

99 # calculates the stacked performance

100 layers = int(

101 numpy.ceil(winsize[0] / winstride[0])

102 * numpy.ceil(winsize[1] / winstride[1])

103 )

104 # figindex = PERFORMANCE_FIGURES.index(figure)

105 perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype)

106 n = -1 * numpy.ones(final_size, dtype=int)

107 data = winperf[PERFORMANCE_FIGURES.index(figure)]

108 for j in range(data.shape[0]):

109 yup = slice(winstride[0] * j, (winstride[0] * j) + winsize[0], 1)

110 for i in range(data.shape[1]):

111 xup = slice(winstride[1] * i, (winstride[1] * i) + winsize[1], 1)

112 nup = n[yup, xup]

113 nup += 1

114 yr, xr = numpy.meshgrid(

115 range(yup.start, yup.stop, yup.step),

116 range(xup.start, xup.stop, xup.step),

117 indexing="ij",

118 )

119 perf[nup.flat, yr.flat, xr.flat] = data[j, i]

120

121 # for each element in the ``perf``matrix, calculates avg and std.

122 n += 1 # adjust for starting at -1 before

123 avg = perf.sum(axis=0) / n

124 # calculate variances

125 std = numpy.zeros_like(avg)

126 for k in range(2, n.max() + 1):

127 std[n == k] = perf[:k].std(axis=0, ddof=1)[n == k]

128

129 # returns only valid bounds wrt to the original image

130 return (

131 n[: size[0], : size[1]],

132 avg[: size[0], : size[1]],

133 std[: size[0], : size[1]],

134 )

135

136

137def _winperf_measures(pred, gt, mask, threshold, size, stride):

138 """

139 Calculates measures on sliding windows of a single sample

140

141

142 Parameters

143 ----------

144

145 pred : torch.Tensor

146 pixel-wise predictions

147

148 gt : torch.Tensor

149 ground-truth (annotations)

150

151 mask : torch.Tensor

152 mask for the region of interest

153

154 threshold : float

155 threshold to use for evaluating individual sliding window performances

156

157 size : tuple

158 size (vertical, horizontal) for windows for which we will calculate

159 partial performances based on the threshold and existing ground-truth

160

161 stride : tuple

162 strides (vertical, horizontal) for windows for which we will calculate

163 partial performances based on the threshold and existing ground-truth

164

165

166 Returns

167 -------

168

169 measures : numpy.ndarray

170

171 A 3D float array with all supported performance entries for each

172 sliding window.

173

174 The first dimension of the array is therefore 6. The other two

175 dimensions correspond to resulting size of the sliding window operation

176 applied to the input data and taking into consideration the sliding

177 window size and the stride.

178

179 """

180

181 # we calculate the required padding so that the last windows on the left

182 # and bottom size of predictions/ground-truth data are zero padded, and

183 # torch unfolding works exactly. The last windows on the left and bottom

184 # parts of the image may be extended with zeros.

185 padding = (0, 0)

186 rem = (pred.shape[1] - size[1]) % stride[1]

187 if rem != 0:

188 padding = (0, (stride[1] - rem))

189 rem = (pred.shape[0] - size[0]) % stride[0]

190 if rem != 0:

191 padding += (0, (stride[0] - rem))

192

193 pred_padded = torch.nn.functional.pad(

194 pred, padding, mode="constant", value=0.0

195 )

196 gt_padded = torch.nn.functional.pad(

197 gt.squeeze(0), padding, mode="constant", value=0.0

198 )

199 mask_padded = torch.nn.functional.pad(

200 mask.squeeze(0), padding, mode="constant", value=1.0

201 )

202

203 # this will create as many views as required

204 pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold(

205 1, size[1], stride[1]

206 )

207 gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold(

208 1, size[1], stride[1]

209 )

210 mask_windows = mask_padded.unfold(0, size[0], stride[0]).unfold(

211 1, size[1], stride[1]

212 )

213 assert pred_windows.shape == gt_windows.shape

214 assert gt_windows.shape == mask_windows.shape

215 ylen, xlen, _, _ = pred_windows.shape

216

217 retval = numpy.array(

218 [

219 sample_measures_for_threshold(

220 pred_windows[j, i, :, :],

221 gt_windows[j, i, :, :],

222 mask_windows[j, i, :, :],

223 threshold,

224 )

225 for j in range(ylen)

226 for i in range(xlen)

227 ]

228 )

229 return retval.transpose(1, 0).reshape(6, ylen, xlen)

230

231

232def _visual_dataset_performance(stem, img, n, avg, std, outdir):

233 """Runs a visual performance assessment for each entry in a given dataset

234

235

236 Parameters

237 ----------

238

239 stem : str

240 The input file stem, for which a figure will be saved in ``outdir``,

241 in PDF format

242

243 img : pytorch.Tensor

244 A 3D tensor containing the original image that was analyzed

245

246 n : numpy.ndarray

247 A 2D integer array with the same size as `img` that indicates how many

248 overlapping windows are available for each pixel in the image

249

250 avg : numpy.ndarray

251 A 2D floating-point array with the average performance per pixel

252 calculated over all overlapping windows for that particular pixel

253

254 std : numpy.ndarray

255 A 2D floating-point array with the unbiased standard-deviation

256 (``ddof=1``) performance per pixel calculated over all overlapping

257 windows for that particular pixel

258

259 outdir : str

260 The base directory where to save output PDF images generated by this

261 procedure. The value of ``stem`` will be suffixed to this output

262 directory using a standard path join. The output filename will have a

263 ``.pdf`` extension.

264

265 """

266

267 import matplotlib.pyplot as plt

268

269 figsize = img.shape[1:]

270

271 fig, axs = plt.subplots(2, 2)

272 axs[0, 0].imshow(img.numpy().transpose(1, 2, 0))

273 axs[0, 0].set_title("Original image")

274

275 pos01 = axs[0, 1].imshow(

276 n[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

277 )

278 fig.colorbar(pos01, ax=axs[0, 1], orientation="vertical")

279 axs[0, 1].set_title("# Overlapping Windows")

280

281 pos10 = axs[1, 0].imshow(

282 avg[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

283 )

284 fig.colorbar(pos10, ax=axs[1, 0], orientation="vertical")

285 axs[1, 0].set_title("Average Performance")

286

287 pos11 = axs[1, 1].imshow(

288 std[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

289 )

290 fig.colorbar(pos11, ax=axs[1, 1], orientation="vertical")

291 axs[1, 1].set_title("Performance Std. Dev.")

292

293 plt.tight_layout()

294

295 fname = os.path.join(outdir, stem + ".pdf")

296 os.makedirs(os.path.dirname(fname), exist_ok=True)

297 fig.savefig(fname)

298 plt.close(fig)

299

300

301def _winperf_for_sample(

302 basedir,

303 threshold,

304 size,

305 stride,

306 dataset,

307 k,

308 figure,

309 outdir,

310):

311 """

312 Evaluates sliding window performances per sample

313

314

315 Parameters

316 ----------

317

318 basedir : str

319 folder where predictions for the dataset images has been previously

320 stored

321

322 threshold : :py:class:`float`

323 this should be a threshold (floating point) to apply to prediction maps

324 to decide on positives and negatives.

325

326 size : tuple

327 size (vertical, horizontal) for windows for which we will calculate

328 partial performances based on the threshold and existing ground-truth

329

330 stride : tuple

331 strides (vertical, horizontal) for windows for which we will calculate

332 partial performances based on the threshold and existing ground-truth

333

334 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

335 datasets to iterate on

336

337 k : int

338 the sample number (order inside the dataset, starting from zero), to

339 calculate sliding window performances for

340

341 figure : str

342 the performance figure to use for calculating sliding window micro

343 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be

344 a supported performance figure as defined in

345 :py:attr:`PERFORMANCE_FIGURES`

346

347 outdir : str

348 path were to save a visual representation of sliding window

349 performances. If set to ``None``, then do not save those to disk.

350

351

352 Returns

353 -------

354

355 stem : str

356 The input file stem, that was just analyzed

357

358 data : dict

359 A dictionary containing the following fields:

360

361 * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window

362 performance figures

363 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as

364 the original image pertaining to the analyzed sample, that indicates

365 how many overlapping windows are available for each pixel in the

366 image

367 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the

368 average performance per pixel calculated over all overlapping windows

369 for that particular pixel

370 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the

371 unbiased standard-deviation (``ddof=1``) performance per pixel

372 calculated over all overlapping windows for that particular pixel

373

374 """

375

376 sample = dataset[k]

377 with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f:

378 pred = torch.from_numpy(f["array"][:])

379 mask = None if len(sample) < 4 else sample[3]

380 winperf = _winperf_measures(pred, sample[2], mask, threshold, size, stride)

381 n, avg, std = _performance_summary(

382 sample[1].shape[1:], winperf, size, stride, figure

383 )

384 if outdir is not None:

385 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)

386 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)

387

388

389def sliding_window_performances(

390 dataset,

391 name,

392 predictions_folder,

393 threshold,

394 size,

395 stride,

396 figure,

397 nproc=1,

398 outdir=None,

399):

400 """

401 Evaluates sliding window performances for a whole dataset

402

403

404 Parameters

405 ---------

406

407 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

408 datasets to iterate on

409

410 name : str

411 the local name of this dataset (e.g. ``train``, or ``test``), to be

412 used when saving measures files.

413

414 predictions_folder : str

415 folder where predictions for the dataset images has been previously

416 stored

417

418 threshold : :py:class:`float`

419 this should be a threshold (floating point) to apply to prediction maps

420 to decide on positives and negatives.

421

422 size : tuple

423 size (vertical, horizontal) for windows for which we will calculate

424 partial performances based on the threshold and existing ground-truth

425

426 stride : tuple

427 strides (vertical, horizontal) for windows for which we will calculate

428 partial performances based on the threshold and existing ground-truth

429

430 figure : str

431 the performance figure to use for calculating sliding window micro

432 performances

433

434 nproc : :py:class:`int`, Optional

435 the number of processing cores to use for performance evaluation.

436 Setting this number to a value ``<= 0`` will cause the function to use

437 all available computing cores. Otherwise, limit to the number of cores

438 specified. If set to the value of ``1``, then do not use

439 multiprocessing.

440

441 outdir : :py:class:`str`, Optional

442 path were to save a visual representation of sliding window

443 performances. If set to ``None``, then do not save those to disk.

444

445

446 Returns

447 -------

448

449 d : dict

450 A dictionary in which keys are filename stems and values are

451 dictionaries with the following contents:

452

453 ``winperf``: numpy.ndarray

454

455 ``n`` : numpy.ndarray

456 A 2D numpy array containing the number of performance scores for

457 every pixel in the original image

458

459 ``avg`` : :py:class:`numpy.ndarray`

460 A 2D numpy array containing the average performances for every

461 pixel on the input image considering the sliding window sizes and

462 strides applied to the image

463

464 ``std`` : :py:class:`numpy.ndarray`

465 A 2D numpy array containing the (unbiased) standard deviations for

466 the provided performance figure, for every pixel on the input image

467 considering the sliding window sizes and strides applied to the

468 image

469

470 """

471

472 use_predictions_folder = os.path.join(predictions_folder, name)

473 if not os.path.exists(use_predictions_folder):

474 use_predictions_folder = predictions_folder

475

476 with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar:

477 # we avoid the multiprocessing module if nproc==1

478 # so it is easier to run ipdb

479 if nproc != 1:

480 if nproc <= 0:

481 nproc = multiprocessing.cpu_count()

482 pool = multiprocessing.Pool(nproc)

483 results = [

484 pool.apply_async(

485 _winperf_for_sample,

486 args=(

487 use_predictions_folder,

488 threshold,

489 size,

490 stride,

491 dataset[name],

492 k,

493 figure,

494 outdir,

495 ),

496 callback=lambda x: pbar.update(),

497 )

498 for k in range(len(dataset[name]))

499 ]

500 pool.close()

501 pool.join()

502 data = [k.get() for k in results]

503 else:

504 data = []

505 for k in pbar:

506 winperf = _winperf_for_sample(

507 use_predictions_folder,

508 threshold,

509 size,

510 stride,

511 dataset[name],

512 k,

513 figure,

514 outdir,

515 )

516 data.append(winperf)

517

518 return dict(data)

519

520

521def _visual_performances_for_sample(

522 size, stride, dataset, k, winperf, figure, outdir

523):

524 """

525 Displays sliding windows performances per sample

526

527 This is a simplified version of :py:func:`_winper_for_sample`

528 in which the sliding window performances are not recalculated and used as

529 input. It can be used in case you have the sliding window performances

530 stored in disk or if you're evaluating differences between sliding windows

531 of 2 different systems.

532

533

534 Parameters

535 ----------

536

537 size : tuple

538 size (vertical, horizontal) for windows for which we will calculate

539 partial performances based on the threshold and existing ground-truth

540

541 stride : tuple

542 strides (vertical, horizontal) for windows for which we will calculate

543 partial performances based on the threshold and existing ground-truth

544

545 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

546 datasets to iterate on

547

548 k : int

549 the sample number (order inside the dataset, starting from zero), to

550 calculate sliding window performances for

551

552 winperf : numpy.ndarray

553 the previously calculated sliding window performances to use for this

554 assessment.

555

556 figure : str

557 the performance figure to use for calculating sliding window micro

558 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be

559 a supported performance figure as defined in

560 :py:attr:`PERFORMANCE_FIGURES`

561

562 outdir : :py:class:`str`

563 path were to save a visual representation of sliding window

564 performances. If set to ``None``, then do not save those to disk.

565

566

567 Returns

568 -------

569

570 stem : str

571 The input file stem, that was just analyzed

572

573 data : dict

574 A dictionary containing the following fields:

575

576 * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding

577 window performance figures. Notice this is just a copy of the input

578 sliding window performance figures with the same name.

579 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as

580 the original image pertaining to the analyzed sample, that indicates

581 how many overlapping windows are available for each pixel in the

582 image

583 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the

584 average performance per pixel calculated over all overlapping windows

585 for that particular pixel

586 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the

587 unbiased standard-deviation (``ddof=1``) performance per pixel

588 calculated over all overlapping windows for that particular pixel

589

590 """

591

592 sample = dataset[k]

593 n, avg, std = _performance_summary(

594 sample[1].shape[1:], winperf, size, stride, figure

595 )

596 if outdir is not None:

597 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)

598 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)

599

600

601def visual_performances(

602 dataset,

603 name,

604 winperfs,

605 size,

606 stride,

607 figure,

608 nproc=1,

609 outdir=None,

610):

611 """

612 Displays the performances for for a whole dataset

613

614 This is a simplified version of :py:func:`sliding_window_performances` in

615 which the sliding window performances are not recalculated and used as

616 input. It can be used in case you have the sliding window performances

617 stored in disk or if you're evaluating differences between sliding windows

618 of 2 different systems.

619

620

621 Parameters

622 ---------

623

624 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

625 datasets to iterate on

626

627 name : str

628 the local name of this dataset (e.g. ``train``, or ``test``), to be

629 used when saving measures files.

630

631 winperfs : dict

632 a dictionary mapping dataset stems to arrays containing the sliding

633 window performances to be evaluated

634

635 size : tuple

636 size (vertical, horizontal) for windows for which we will calculate

637 partial performances based on the threshold and existing ground-truth

638

639 stride : tuple

640 strides (vertical, horizontal) for windows for which we will calculate

641 partial performances based on the threshold and existing ground-truth

642

643 figure : str

644 the performance figure to use for calculating sliding window micro

645 performances

646

647 nproc : :py:class:`int`, Optional

648 the number of processing cores to use for performance evaluation.

649 Setting this number to a value ``<= 0`` will cause the function to use

650 all available computing cores. Otherwise, limit to the number of cores

651 specified. If set to the value of ``1``, then do not use

652 multiprocessing.

653

654 outdir : :py:class:`str`, Optional

655 path were to save a visual representation of sliding window

656 performances. If set to ``None``, then do not save those to disk.

657

658

659 Returns

660 -------

661

662 d : dict

663 A dictionary in which keys are filename stems and values are

664 dictionaries with the following contents:

665

666 ``winperf``: numpy.ndarray

667 A 3D float array with all the sliding window performances for the

668 input image

669

670 ``n`` : numpy.ndarray

671 A 2D numpy array containing the number of performance scores for

672 every pixel in the original image

673

674 ``avg`` : numpy.ndarray

675 A 2D numpy array containing the average performances for every

676 pixel on the input image considering the sliding window sizes and

677 strides applied to the image

678

679 ``std`` : :py:class:`numpy.ndarray`

680 A 2D numpy array containing the (unbiased) standard deviations for

681 the provided performance figure, for every pixel on the input image

682 considering the sliding window sizes and strides applied to the

683 image

684

685 """

686

687 stems = list(dataset[name].keys())

688

689 with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:

690 # we avoid the multiprocessing module if nproc==1

691 # so it is easier to run ipdb

692 if nproc != 1:

693 if nproc <= 0:

694 nproc = multiprocessing.cpu_count()

695 pool = multiprocessing.Pool(nproc)

696 results = [

697 pool.apply_async(

698 _visual_performances_for_sample,

699 args=(

700 size,

701 stride,

702 dataset[name],

703 k,

704 winperfs[stems[k]],

705 figure,

706 outdir,

707 ),

708 callback=lambda x: pbar.update(),

709 )

710 for k in range(len(dataset[name]))

711 ]

712 pool.close()

713 pool.join()

714 data = [k.get() for k in results]

715 else:

716 data = []

717 for k in pbar:

718 winperf = _visual_performances_for_sample(

719 size,

720 stride,

721 dataset[name],

722 k,

723 winperfs[stems[k]],

724 figure,

725 outdir,

726 )

727 data.append(winperf)

728

729 return dict(data)

730

731

732def index_of_outliers(c):

733 """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values

734

735 The IQR measures the midspread or where 50% of a normal distribution would

736 sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a

737 symmetrical range that would encompass most of the data, characterizing

738 outliers (outside of that range). Check out `this Wikipedia page

739 <https://en.wikipedia.org/wiki/Interquartile_range>` for more details.

740

741

742 Parameters

743 ----------

744

745 c : numpy.ndarray

746 A 1D float array

747

748

749 Returns

750 -------

751

752 indexes : numpy.ndarray

753 Indexes of the input column that are considered outliers in the

754 distribution (outside the 1.5 Interquartile Range).

755

756 """

757

758 l, h = numpy.quantile(c, (0.25, 0.75))

759 iqr = h - l

760 limits = (l - 1.5 * iqr, h + 1.5 * iqr)

761 return (c < limits[0]) | (c > limits[1])

762

763

764def write_analysis_text(names, da, db, f):

765 """Writes a text file containing the most important statistics

766

767 Compares sliding window performances in ``da`` and ``db`` taking into

768 consideration their statistical properties. A significance test is applied

769 to check whether observed differences in the statistics of both

770 distributions is significant.

771

772

773 Parameters

774 ==========

775

776 names : tuple

777 A tuple containing two strings which are the names of the systems being

778 analyzed

779

780 da : numpy.ndarray

781 A 1D numpy array containing all the performance figures per sliding

782 window analyzed and organized in a particular order (raster), for the

783 first system (first entry of ``names``)

784

785 db : numpy.ndarray

786 A 1D numpy array containing all the performance figures per sliding

787 window analyzed and organized in a particular order (raster), for the

788 second system (second entry of ``names``)

789

790 f : file

791 An open file that will be used dump the analysis to

792

793 """

794

795 diff = da - db

796 f.write("Basic statistics from distributions:\n")

797

798 headers = [

799 "system",

800 "samples",

801 "median",

802 "average",

803 "std.dev.",

804 "normaltest (p)",

805 ]

806 table = [

807 [

808 names[0],

809 len(da),

810 numpy.median(da),

811 numpy.mean(da),

812 numpy.std(da, ddof=1),

813 scipy.stats.normaltest(da)[1],

814 ],

815 [

816 names[1],

817 len(db),

818 numpy.median(db),

819 numpy.mean(db),

820 numpy.std(db, ddof=1),

821 scipy.stats.normaltest(db)[1],

822 ],

823 [

824 "differences",

825 len(diff),

826 numpy.median(diff),

827 numpy.mean(diff),

828 numpy.std(diff, ddof=1),

829 scipy.stats.normaltest(diff)[1],

830 ],

831 ]

832 tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")

833 f.write(textwrap.indent(tdata, " "))

834 f.write("\n")

835

836 # Note: dependent variable = sliding window performance figure in our case

837 # Assumptions of a Paired T-test:

838 # * The dependent variable must be continuous (interval/ratio). [OK]

839 # * The observations are independent of one another. [OK]

840 # * The dependent variable should be approximately normally distributed. [!!!]

841 # * The dependent variable should not contain any outliers. [OK]

842

843 if (diff == 0.0).all():

844 logger.error(

845 "Differences are exactly zero between both "

846 "sliding window distributions, for **all** samples. "

847 "Statistical significance tests are not meaningful in "

848 "this context and will be skipped. This typically "

849 "indicates an issue with the setup of prediction folders "

850 "(duplicated?)"

851 )

852 return

853

854 f.write("\nPaired significance tests:\n")

855 w, p = scipy.stats.ttest_rel(da, db)

856 f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")

857

858 f.write(" * Wilcoxon:\n")

859

860 w, p = scipy.stats.wilcoxon(diff)

861 f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n")

862

863 w, p = scipy.stats.wilcoxon(diff, alternative="greater")

864 f.write(

865 f" * H0 = med({names[0]}) < med({names[1]}): "

866 f"W = {w:g}, p = {p:.5f}\n"

867 )

868

869 w, p = scipy.stats.wilcoxon(diff, alternative="less")

870 f.write(

871 f" * H0 = med({names[0]}) > med({names[1]}): "

872 f"W = {w:g}, p = {p:.5f}\n"

873 )

874

875

876def write_analysis_figures(names, da, db, fname):

877 """Writes a PDF containing most important plots for analysis

878

879

880 Parameters

881 ==========

882

883 names : tuple

884 A tuple containing two strings which are the names of the systems being

885 analyzed

886

887 da : numpy.ndarray

888 A 1D numpy array containing all the performance figures per sliding

889 window analyzed and organized in a particular order (raster), for the

890 first system (first entry of ``names``)

891

892 db : numpy.ndarray

893 A 1D numpy array containing all the performance figures per sliding

894 window analyzed and organized in a particular order (raster), for the

895 second system (second entry of ``names``)

896

897 fname : str

898 The filename to use for storing the summarized performance figures

899

900 """

901

902 import matplotlib.pyplot as plt

903

904 from matplotlib.backends.backend_pdf import PdfPages

905

906 diff = da - db

907 bins = 50

908

909 with PdfPages(fname) as pdf:

910

911 fig = plt.figure()

912 plt.grid()

913 plt.hist(da, bins=bins)

914 plt.title(

915 f"{names[0]} - (N={len(da)}; M={numpy.median(da):.3f}; "

916 f"$\\mu$={numpy.mean(da):.3f}; $\\sigma$={numpy.std(da, ddof=1):.3f})"

917 )

918 pdf.savefig()

919 plt.close(fig)

920

921 fig = plt.figure()

922 plt.grid()

923 plt.hist(db, bins=bins)

924 plt.title(

925 f"{names[1]} - (N={len(db)}; M={numpy.median(db):.3f}; "

926 f"$\\mu$={numpy.mean(db):.3f}; $\\sigma$={numpy.std(db, ddof=1):.3f})"

927 )

928 pdf.savefig()

929 plt.close(fig)

930

931 fig = plt.figure()

932 plt.grid()

933 plt.boxplot([da, db], labels=names)

934 plt.title(f"Boxplots (N={len(da)})")

935 pdf.savefig()

936 plt.close(fig)

937

938 fig = plt.figure()

939 plt.grid()

940 plt.boxplot(diff, labels=(f"{names[0]} - {names[1]}",))

941 plt.title(f"Paired Differences (N={len(da)})")

942 pdf.savefig()

943 plt.close(fig)

944

945 fig = plt.figure()

946 plt.grid()

947 plt.hist(diff, bins=bins)

948 plt.title(

949 f"Paired Differences "

950 f"(N={len(diff)}; M={numpy.median(diff):.3f}; "

951 f"$\\mu$={numpy.mean(diff):.3f}; "

952 f"$\\sigma$={numpy.std(diff, ddof=1):.3f})"

953 )

954 pdf.savefig()

955 plt.close(fig)

956

957 p = scipy.stats.pearsonr(da, db)

958 fig = plt.figure()

959 plt.grid()

960 plt.scatter(da, db, marker=".", color="black")

961 plt.xlabel(f"{names[0]}")

962 plt.ylabel(f"{names[1]}")

963 plt.title(f"Scatter (p={p[0]:.3f})")

964 pdf.savefig()

965 plt.close(fig)

200 statements 23 run 177 missing 0 excluded

200 statements