Coverage for src/deepdraw/engine/significance.py: 12%

3# SPDX-License-Identifier: GPL-3.0-or-later

5import logging

6import multiprocessing

7import os

8import textwrap

10import h5py

11import numpy

12import scipy.stats

13import tabulate

14import torch.nn

16from tqdm import tqdm

18from .evaluator import sample_measures_for_threshold

20logger = logging.getLogger(__name__)

22PERFORMANCE_FIGURES = [

23 "precision",

24 "recall",

25 "specificity",

26 "accuracy",

27 "jaccard",

28 "f1_score",

29]

30"""List of performance figures supported by this module, in order."""

33def _performance_summary(size, winperf, winsize, winstride, figure):

34 """Generates an array that represents the performance per pixel of the

35 original image.

37 The returned array corresponds to a stacked version of performances for

38 each pixel in the original image taking into consideration the sliding

39 window performances, their size and stride.

42 Parameters

43 ==========

45 size : tuple

46 A two tuple with the original height and width of the image being

47 analyzed

49 winperf : numpy.ndarray

50 A 3D array with shape ``(N, H, W)``, where ``N`` represents the number

51 of performance measures supported by this module, ``(H,W)`` is the

52 total number of vertical and horizontal sliding windows.

54 winsize : tuple

55 A two tuple that indicates the size of the sliding window (height,

56 width)

58 winstride : tuple

59 A two tuple that indicates the stride of the sliding window (height,

60 width)

62 figure : str

63 Name of the performance figure to use for the summary

66 Returns

67 =======

69 n : numpy.ndarray

70 A 2D numpy array containing the number of performance scores for every

71 pixel in the original image

73 avg : numpy.ndarray

74 A 2D numpy array containing the average performances for every pixel on

75 the input image considering the sliding window sizes and strides

76 applied to the image

78 std : numpy.ndarray

79 A 2D numpy array containing the (unbiased) standard deviations for the

80 provided performance figure, for every pixel on the input image

81 considering the sliding window sizes and strides applied to the image

82 """

84 # first, estimate how many overlaps we have per pixel in the original image

86 # we calculate the required padding so that the last windows on the left

87 # and bottom size of predictions/ground-truth data are zero padded, and

88 # torch unfolding works exactly. The last windows on the left and bottom

89 # parts of the image may be extended with zeros.

90 final_size = list(size)

91 rem = (size[0] - winsize[0]) % winstride[0]

92 if rem != 0:

93 final_size[0] += winstride[0] - rem

94 rem = (size[1] - winsize[1]) % winstride[1]

95 if rem != 0:

96 final_size[1] += winstride[1] - rem

97 n = numpy.zeros(final_size, dtype=int)

99 # calculates the stacked performance

100 layers = int(

101 numpy.ceil(winsize[0] / winstride[0])

102 * numpy.ceil(winsize[1] / winstride[1])

103 )

104 # figindex = PERFORMANCE_FIGURES.index(figure)

105 perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype)

106 n = -1 * numpy.ones(final_size, dtype=int)

107 data = winperf[PERFORMANCE_FIGURES.index(figure)]

108 for j in range(data.shape[0]):

109 yup = slice(winstride[0] * j, (winstride[0] * j) + winsize[0], 1)

110 for i in range(data.shape[1]):

111 xup = slice(winstride[1] * i, (winstride[1] * i) + winsize[1], 1)

112 nup = n[yup, xup]

113 nup += 1

114 yr, xr = numpy.meshgrid(

115 range(yup.start, yup.stop, yup.step),

116 range(xup.start, xup.stop, xup.step),

117 indexing="ij",

118 )

119 perf[nup.flat, yr.flat, xr.flat] = data[j, i]

120

121 # for each element in the ``perf``matrix, calculates avg and std.

122 n += 1 # adjust for starting at -1 before

123 avg = perf.sum(axis=0) / n

124 # calculate variances

125 std = numpy.zeros_like(avg)

126 for k in range(2, n.max() + 1):

127 std[n == k] = perf[:k].std(axis=0, ddof=1)[n == k]

128

129 # returns only valid bounds wrt to the original image

130 return (

131 n[: size[0], : size[1]],

132 avg[: size[0], : size[1]],

133 std[: size[0], : size[1]],

134 )

135

136

137def _winperf_measures(pred, gt, mask, threshold, size, stride):

138 """Calculates measures on sliding windows of a single sample.

139

140 Parameters

141 ----------

142

143 pred : torch.Tensor

144 pixel-wise predictions

145

146 gt : torch.Tensor

147 ground-truth (annotations)

148

149 mask : torch.Tensor

150 mask for the region of interest

151

152 threshold : float

153 threshold to use for evaluating individual sliding window performances

154

155 size : tuple

156 size (vertical, horizontal) for windows for which we will calculate

157 partial performances based on the threshold and existing ground-truth

158

159 stride : tuple

160 strides (vertical, horizontal) for windows for which we will calculate

161 partial performances based on the threshold and existing ground-truth

162

163

164 Returns

165 -------

166

167 measures : numpy.ndarray

168

169 A 3D float array with all supported performance entries for each

170 sliding window.

171

172 The first dimension of the array is therefore 6. The other two

173 dimensions correspond to resulting size of the sliding window operation

174 applied to the input data and taking into consideration the sliding

175 window size and the stride.

176 """

177

178 # we calculate the required padding so that the last windows on the left

179 # and bottom size of predictions/ground-truth data are zero padded, and

180 # torch unfolding works exactly. The last windows on the left and bottom

181 # parts of the image may be extended with zeros.

182 padding = (0, 0)

183 rem = (pred.shape[1] - size[1]) % stride[1]

184 if rem != 0:

185 padding = (0, (stride[1] - rem))

186 rem = (pred.shape[0] - size[0]) % stride[0]

187 if rem != 0:

188 padding += (0, (stride[0] - rem))

189

190 pred_padded = torch.nn.functional.pad(

191 pred, padding, mode="constant", value=0.0

192 )

193 gt_padded = torch.nn.functional.pad(

194 gt.squeeze(0), padding, mode="constant", value=0.0

195 )

196 mask_padded = torch.nn.functional.pad(

197 mask.squeeze(0), padding, mode="constant", value=1.0

198 )

199

200 # this will create as many views as required

201 pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold(

202 1, size[1], stride[1]

203 )

204 gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold(

205 1, size[1], stride[1]

206 )

207 mask_windows = mask_padded.unfold(0, size[0], stride[0]).unfold(

208 1, size[1], stride[1]

209 )

210 assert pred_windows.shape == gt_windows.shape

211 assert gt_windows.shape == mask_windows.shape

212 ylen, xlen, _, _ = pred_windows.shape

213

214 retval = numpy.array(

215 [

216 sample_measures_for_threshold(

217 pred_windows[j, i, :, :],

218 gt_windows[j, i, :, :],

219 mask_windows[j, i, :, :],

220 threshold,

221 )

222 for j in range(ylen)

223 for i in range(xlen)

224 ]

225 )

226 return retval.transpose(1, 0).reshape(6, ylen, xlen)

227

228

229def _visual_dataset_performance(stem, img, n, avg, std, outdir):

230 """Runs a visual performance assessment for each entry in a given dataset.

231

232 Parameters

233 ----------

234

235 stem : str

236 The input file stem, for which a figure will be saved in ``outdir``,

237 in PDF format

238

239 img : pytorch.Tensor

240 A 3D tensor containing the original image that was analyzed

241

242 n : numpy.ndarray

243 A 2D integer array with the same size as `img` that indicates how many

244 overlapping windows are available for each pixel in the image

245

246 avg : numpy.ndarray

247 A 2D floating-point array with the average performance per pixel

248 calculated over all overlapping windows for that particular pixel

249

250 std : numpy.ndarray

251 A 2D floating-point array with the unbiased standard-deviation

252 (``ddof=1``) performance per pixel calculated over all overlapping

253 windows for that particular pixel

254

255 outdir : str

256 The base directory where to save output PDF images generated by this

257 procedure. The value of ``stem`` will be suffixed to this output

258 directory using a standard path join. The output filename will have a

259 ``.pdf`` extension.

260 """

261

262 import matplotlib.pyplot as plt

263

264 figsize = img.shape[1:]

265

266 fig, axs = plt.subplots(2, 2)

267 axs[0, 0].imshow(img.numpy().transpose(1, 2, 0))

268 axs[0, 0].set_title("Original image")

269

270 pos01 = axs[0, 1].imshow(

271 n[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

272 )

273 fig.colorbar(pos01, ax=axs[0, 1], orientation="vertical")

274 axs[0, 1].set_title("# Overlapping Windows")

275

276 pos10 = axs[1, 0].imshow(

277 avg[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

278 )

279 fig.colorbar(pos10, ax=axs[1, 0], orientation="vertical")

280 axs[1, 0].set_title("Average Performance")

281

282 pos11 = axs[1, 1].imshow(

283 std[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"

284 )

285 fig.colorbar(pos11, ax=axs[1, 1], orientation="vertical")

286 axs[1, 1].set_title("Performance Std. Dev.")

287

288 plt.tight_layout()

289

290 fname = os.path.join(outdir, stem + ".pdf")

291 os.makedirs(os.path.dirname(fname), exist_ok=True)

292 fig.savefig(fname)

293 plt.close(fig)

294

295

296def _winperf_for_sample(

297 basedir,

298 threshold,

299 size,

300 stride,

301 dataset,

302 k,

303 figure,

304 outdir,

305):

306 """Evaluates sliding window performances per sample.

307

308 Parameters

309 ----------

310

311 basedir : str

312 folder where predictions for the dataset images has been previously

313 stored

314

315 threshold : :py:class:`float`

316 this should be a threshold (floating point) to apply to prediction maps

317 to decide on positives and negatives.

318

319 size : tuple

320 size (vertical, horizontal) for windows for which we will calculate

321 partial performances based on the threshold and existing ground-truth

322

323 stride : tuple

324 strides (vertical, horizontal) for windows for which we will calculate

325 partial performances based on the threshold and existing ground-truth

326

327 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

328 datasets to iterate on

329

330 k : int

331 the sample number (order inside the dataset, starting from zero), to

332 calculate sliding window performances for

333

334 figure : str

335 the performance figure to use for calculating sliding window micro

336 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be

337 a supported performance figure as defined in

338 :py:attr:`PERFORMANCE_FIGURES`

339

340 outdir : str

341 path were to save a visual representation of sliding window

342 performances. If set to ``None``, then do not save those to disk.

343

344

345 Returns

346 -------

347

348 stem : str

349 The input file stem, that was just analyzed

350

351 data : dict

352 A dictionary containing the following fields:

353

354 * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window

355 performance figures

356 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as

357 the original image pertaining to the analyzed sample, that indicates

358 how many overlapping windows are available for each pixel in the

359 image

360 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the

361 average performance per pixel calculated over all overlapping windows

362 for that particular pixel

363 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the

364 unbiased standard-deviation (``ddof=1``) performance per pixel

365 calculated over all overlapping windows for that particular pixel

366 """

367

368 sample = dataset[k]

369 with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f:

370 pred = torch.from_numpy(f["array"][:])

371 mask = None if len(sample) < 4 else sample[3]

372 winperf = _winperf_measures(pred, sample[2], mask, threshold, size, stride)

373 n, avg, std = _performance_summary(

374 sample[1].shape[1:], winperf, size, stride, figure

375 )

376 if outdir is not None:

377 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)

378 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)

379

380

381def sliding_window_performances(

382 dataset,

383 name,

384 predictions_folder,

385 threshold,

386 size,

387 stride,

388 figure,

389 nproc=1,

390 outdir=None,

391):

392 """Evaluates sliding window performances for a whole dataset.

393

394 Parameters

395 ---------

396

397 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

398 datasets to iterate on

399

400 name : str

401 the local name of this dataset (e.g. ``train``, or ``test``), to be

402 used when saving measures files.

403

404 predictions_folder : str

405 folder where predictions for the dataset images has been previously

406 stored

407

408 threshold : :py:class:`float`

409 this should be a threshold (floating point) to apply to prediction maps

410 to decide on positives and negatives.

411

412 size : tuple

413 size (vertical, horizontal) for windows for which we will calculate

414 partial performances based on the threshold and existing ground-truth

415

416 stride : tuple

417 strides (vertical, horizontal) for windows for which we will calculate

418 partial performances based on the threshold and existing ground-truth

419

420 figure : str

421 the performance figure to use for calculating sliding window micro

422 performances

423

424 nproc : :py:class:`int`, Optional

425 the number of processing cores to use for performance evaluation.

426 Setting this number to a value ``<= 0`` will cause the function to use

427 all available computing cores. Otherwise, limit to the number of cores

428 specified. If set to the value of ``1``, then do not use

429 multiprocessing.

430

431 outdir : :py:class:`str`, Optional

432 path were to save a visual representation of sliding window

433 performances. If set to ``None``, then do not save those to disk.

434

435

436 Returns

437 -------

438

439 d : dict

440 A dictionary in which keys are filename stems and values are

441 dictionaries with the following contents:

442

443 ``winperf``: numpy.ndarray

444

445 ``n`` : numpy.ndarray

446 A 2D numpy array containing the number of performance scores for

447 every pixel in the original image

448

449 ``avg`` : :py:class:`numpy.ndarray`

450 A 2D numpy array containing the average performances for every

451 pixel on the input image considering the sliding window sizes and

452 strides applied to the image

453

454 ``std`` : :py:class:`numpy.ndarray`

455 A 2D numpy array containing the (unbiased) standard deviations for

456 the provided performance figure, for every pixel on the input image

457 considering the sliding window sizes and strides applied to the

458 image

459 """

460

461 use_predictions_folder = os.path.join(predictions_folder, name)

462 if not os.path.exists(use_predictions_folder):

463 use_predictions_folder = predictions_folder

464

465 with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar:

466 # we avoid the multiprocessing module if nproc==1

467 # so it is easier to run ipdb

468 if nproc != 1:

469 if nproc <= 0:

470 nproc = multiprocessing.cpu_count()

471 pool = multiprocessing.Pool(nproc)

472 results = [

473 pool.apply_async(

474 _winperf_for_sample,

475 args=(

476 use_predictions_folder,

477 threshold,

478 size,

479 stride,

480 dataset[name],

481 k,

482 figure,

483 outdir,

484 ),

485 callback=lambda x: pbar.update(),

486 )

487 for k in range(len(dataset[name]))

488 ]

489 pool.close()

490 pool.join()

491 data = [k.get() for k in results]

492 else:

493 data = []

494 for k in pbar:

495 winperf = _winperf_for_sample(

496 use_predictions_folder,

497 threshold,

498 size,

499 stride,

500 dataset[name],

501 k,

502 figure,

503 outdir,

504 )

505 data.append(winperf)

506

507 return dict(data)

508

509

510def _visual_performances_for_sample(

511 size, stride, dataset, k, winperf, figure, outdir

512):

513 """Displays sliding windows performances per sample.

514

515 This is a simplified version of :py:func:`_winper_for_sample`

516 in which the sliding window performances are not recalculated and used as

517 input. It can be used in case you have the sliding window performances

518 stored in disk or if you're evaluating differences between sliding windows

519 of 2 different systems.

520

521

522 Parameters

523 ----------

524

525 size : tuple

526 size (vertical, horizontal) for windows for which we will calculate

527 partial performances based on the threshold and existing ground-truth

528

529 stride : tuple

530 strides (vertical, horizontal) for windows for which we will calculate

531 partial performances based on the threshold and existing ground-truth

532

533 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

534 datasets to iterate on

535

536 k : int

537 the sample number (order inside the dataset, starting from zero), to

538 calculate sliding window performances for

539

540 winperf : numpy.ndarray

541 the previously calculated sliding window performances to use for this

542 assessment.

543

544 figure : str

545 the performance figure to use for calculating sliding window micro

546 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be

547 a supported performance figure as defined in

548 :py:attr:`PERFORMANCE_FIGURES`

549

550 outdir : :py:class:`str`

551 path were to save a visual representation of sliding window

552 performances. If set to ``None``, then do not save those to disk.

553

554

555 Returns

556 -------

557

558 stem : str

559 The input file stem, that was just analyzed

560

561 data : dict

562 A dictionary containing the following fields:

563

564 * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding

565 window performance figures. Notice this is just a copy of the input

566 sliding window performance figures with the same name.

567 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as

568 the original image pertaining to the analyzed sample, that indicates

569 how many overlapping windows are available for each pixel in the

570 image

571 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the

572 average performance per pixel calculated over all overlapping windows

573 for that particular pixel

574 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the

575 unbiased standard-deviation (``ddof=1``) performance per pixel

576 calculated over all overlapping windows for that particular pixel

577 """

578

579 sample = dataset[k]

580 n, avg, std = _performance_summary(

581 sample[1].shape[1:], winperf, size, stride, figure

582 )

583 if outdir is not None:

584 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)

585 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)

586

587

588def visual_performances(

589 dataset,

590 name,

591 winperfs,

592 size,

593 stride,

594 figure,

595 nproc=1,

596 outdir=None,

597):

598 """Displays the performances for for a whole dataset.

599

600 This is a simplified version of :py:func:`sliding_window_performances` in

601 which the sliding window performances are not recalculated and used as

602 input. It can be used in case you have the sliding window performances

603 stored in disk or if you're evaluating differences between sliding windows

604 of 2 different systems.

605

606

607 Parameters

608 ---------

609

610 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`

611 datasets to iterate on

612

613 name : str

614 the local name of this dataset (e.g. ``train``, or ``test``), to be

615 used when saving measures files.

616

617 winperfs : dict

618 a dictionary mapping dataset stems to arrays containing the sliding

619 window performances to be evaluated

620

621 size : tuple

622 size (vertical, horizontal) for windows for which we will calculate

623 partial performances based on the threshold and existing ground-truth

624

625 stride : tuple

626 strides (vertical, horizontal) for windows for which we will calculate

627 partial performances based on the threshold and existing ground-truth

628

629 figure : str

630 the performance figure to use for calculating sliding window micro

631 performances

632

633 nproc : :py:class:`int`, Optional

634 the number of processing cores to use for performance evaluation.

635 Setting this number to a value ``<= 0`` will cause the function to use

636 all available computing cores. Otherwise, limit to the number of cores

637 specified. If set to the value of ``1``, then do not use

638 multiprocessing.

639

640 outdir : :py:class:`str`, Optional

641 path were to save a visual representation of sliding window

642 performances. If set to ``None``, then do not save those to disk.

643

644

645 Returns

646 -------

647

648 d : dict

649 A dictionary in which keys are filename stems and values are

650 dictionaries with the following contents:

651

652 ``winperf``: numpy.ndarray

653 A 3D float array with all the sliding window performances for the

654 input image

655

656 ``n`` : numpy.ndarray

657 A 2D numpy array containing the number of performance scores for

658 every pixel in the original image

659

660 ``avg`` : numpy.ndarray

661 A 2D numpy array containing the average performances for every

662 pixel on the input image considering the sliding window sizes and

663 strides applied to the image

664

665 ``std`` : :py:class:`numpy.ndarray`

666 A 2D numpy array containing the (unbiased) standard deviations for

667 the provided performance figure, for every pixel on the input image

668 considering the sliding window sizes and strides applied to the

669 image

670 """

671

672 stems = list(dataset[name].keys())

673

674 with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:

675 # we avoid the multiprocessing module if nproc==1

676 # so it is easier to run ipdb

677 if nproc != 1:

678 if nproc <= 0:

679 nproc = multiprocessing.cpu_count()

680 pool = multiprocessing.Pool(nproc)

681 results = [

682 pool.apply_async(

683 _visual_performances_for_sample,

684 args=(

685 size,

686 stride,

687 dataset[name],

688 k,

689 winperfs[stems[k]],

690 figure,

691 outdir,

692 ),

693 callback=lambda x: pbar.update(),

694 )

695 for k in range(len(dataset[name]))

696 ]

697 pool.close()

698 pool.join()

699 data = [k.get() for k in results]

700 else:

701 data = []

702 for k in pbar:

703 winperf = _visual_performances_for_sample(

704 size,

705 stride,

706 dataset[name],

707 k,

708 winperfs[stems[k]],

709 figure,

710 outdir,

711 )

712 data.append(winperf)

713

714 return dict(data)

715

716

717def index_of_outliers(c):

718 """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values.

719

720 The IQR measures the midspread or where 50% of a normal distribution would

721 sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a

722 symmetrical range that would encompass most of the data, characterizing

723 outliers (outside of that range). Check out `this Wikipedia page

724 <https://en.wikipedia.org/wiki/Interquartile_range>` for more details.

725

726

727 Parameters

728 ----------

729

730 c : numpy.ndarray

731 A 1D float array

732

733

734 Returns

735 -------

736

737 indexes : numpy.ndarray

738 Indexes of the input column that are considered outliers in the

739 distribution (outside the 1.5 Interquartile Range).

740 """

741

742 l, h = numpy.quantile(c, (0.25, 0.75))

743 iqr = h - l

744 limits = (l - 1.5 * iqr, h + 1.5 * iqr)

745 return (c < limits[0]) | (c > limits[1])

746

747

748def write_analysis_text(names, da, db, f):

749 """Writes a text file containing the most important statistics.

750

751 Compares sliding window performances in ``da`` and ``db`` taking into

752 consideration their statistical properties. A significance test is applied

753 to check whether observed differences in the statistics of both

754 distributions is significant.

755

756

757 Parameters

758 ==========

759

760 names : tuple

761 A tuple containing two strings which are the names of the systems being

762 analyzed

763

764 da : numpy.ndarray

765 A 1D numpy array containing all the performance figures per sliding

766 window analyzed and organized in a particular order (raster), for the

767 first system (first entry of ``names``)

768

769 db : numpy.ndarray

770 A 1D numpy array containing all the performance figures per sliding

771 window analyzed and organized in a particular order (raster), for the

772 second system (second entry of ``names``)

773

774 f : file

775 An open file that will be used dump the analysis to

776 """

777

778 diff = da - db

779 f.write("Basic statistics from distributions:\n")

780

781 headers = [

782 "system",

783 "samples",

784 "median",

785 "average",

786 "std.dev.",

787 "normaltest (p)",

788 ]

789 table = [

790 [

791 names[0],

792 len(da),

793 numpy.median(da),

794 numpy.mean(da),

795 numpy.std(da, ddof=1),

796 scipy.stats.normaltest(da)[1],

797 ],

798 [

799 names[1],

800 len(db),

801 numpy.median(db),

802 numpy.mean(db),

803 numpy.std(db, ddof=1),

804 scipy.stats.normaltest(db)[1],

805 ],

806 [

807 "differences",

808 len(diff),

809 numpy.median(diff),

810 numpy.mean(diff),

811 numpy.std(diff, ddof=1),

812 scipy.stats.normaltest(diff)[1],

813 ],

814 ]

815 tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")

816 f.write(textwrap.indent(tdata, " "))

817 f.write("\n")

818

819 # Note: dependent variable = sliding window performance figure in our case

820 # Assumptions of a Paired T-test:

821 # * The dependent variable must be continuous (interval/ratio). [OK]

822 # * The observations are independent of one another. [OK]

823 # * The dependent variable should be approximately normally distributed. [!!!]

824 # * The dependent variable should not contain any outliers. [OK]

825

826 if (diff == 0.0).all():

827 logger.error(

828 "Differences are exactly zero between both "

829 "sliding window distributions, for **all** samples. "

830 "Statistical significance tests are not meaningful in "

831 "this context and will be skipped. This typically "

832 "indicates an issue with the setup of prediction folders "

833 "(duplicated?)"

834 )

835 return

836

837 f.write("\nPaired significance tests:\n")

838 w, p = scipy.stats.ttest_rel(da, db)

839 f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")

840

841 f.write(" * Wilcoxon:\n")

842

843 w, p = scipy.stats.wilcoxon(diff)

844 f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n")

845

846 w, p = scipy.stats.wilcoxon(diff, alternative="greater")

847 f.write(

848 f" * H0 = med({names[0]}) < med({names[1]}): "

849 f"W = {w:g}, p = {p:.5f}\n"

850 )

851

852 w, p = scipy.stats.wilcoxon(diff, alternative="less")

853 f.write(

854 f" * H0 = med({names[0]}) > med({names[1]}): "

855 f"W = {w:g}, p = {p:.5f}\n"

856 )

857

858

859def write_analysis_figures(names, da, db, fname):

860 """Writes a PDF containing most important plots for analysis.

861

862 Parameters

863 ==========

864

865 names : tuple

866 A tuple containing two strings which are the names of the systems being

867 analyzed

868

869 da : numpy.ndarray

870 A 1D numpy array containing all the performance figures per sliding

871 window analyzed and organized in a particular order (raster), for the

872 first system (first entry of ``names``)

873

874 db : numpy.ndarray

875 A 1D numpy array containing all the performance figures per sliding

876 window analyzed and organized in a particular order (raster), for the

877 second system (second entry of ``names``)

878

879 fname : str

880 The filename to use for storing the summarized performance figures

881 """

882

883 import matplotlib.pyplot as plt

884

885 from matplotlib.backends.backend_pdf import PdfPages

886

887 diff = da - db

888 bins = 50

889

890 with PdfPages(fname) as pdf:

891 fig = plt.figure()

892 plt.grid()

893 plt.hist(da, bins=bins)

894 plt.title(

895 f"{names[0]} - (N={len(da)}; M={numpy.median(da):.3f}; "

896 f"$\\mu$={numpy.mean(da):.3f}; $\\sigma$={numpy.std(da, ddof=1):.3f})"

897 )

898 pdf.savefig()

899 plt.close(fig)

900

901 fig = plt.figure()

902 plt.grid()

903 plt.hist(db, bins=bins)

904 plt.title(

905 f"{names[1]} - (N={len(db)}; M={numpy.median(db):.3f}; "

906 f"$\\mu$={numpy.mean(db):.3f}; $\\sigma$={numpy.std(db, ddof=1):.3f})"

907 )

908 pdf.savefig()

909 plt.close(fig)

910

911 fig = plt.figure()

912 plt.grid()

913 plt.boxplot([da, db], labels=names)

914 plt.title(f"Boxplots (N={len(da)})")

915 pdf.savefig()

916 plt.close(fig)

917

918 fig = plt.figure()

919 plt.grid()

920 plt.boxplot(diff, labels=(f"{names[0]} - {names[1]}",))

921 plt.title(f"Paired Differences (N={len(da)})")

922 pdf.savefig()

923 plt.close(fig)

924

925 fig = plt.figure()

926 plt.grid()

927 plt.hist(diff, bins=bins)

928 plt.title(

929 f"Paired Differences "

930 f"(N={len(diff)}; M={numpy.median(diff):.3f}; "

931 f"$\\mu$={numpy.mean(diff):.3f}; "

932 f"$\\sigma$={numpy.std(diff, ddof=1):.3f})"

933 )

934 pdf.savefig()

935 plt.close(fig)

936

937 p = scipy.stats.pearsonr(da, db)

938 fig = plt.figure()

939 plt.grid()

940 plt.scatter(da, db, marker=".", color="black")

941 plt.xlabel(f"{names[0]}")

942 plt.ylabel(f"{names[1]}")

943 plt.title(f"Scatter (p={p[0]:.3f})")

944 pdf.savefig()

945 plt.close(fig)