Coverage for src/deepdraw/engine/significance.py: 12%
201 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
2#
3# SPDX-License-Identifier: GPL-3.0-or-later
5import logging
6import multiprocessing
7import os
8import textwrap
10import h5py
11import numpy
12import scipy.stats
13import tabulate
14import torch.nn
16from tqdm import tqdm
18from .evaluator import sample_measures_for_threshold
20logger = logging.getLogger(__name__)
22PERFORMANCE_FIGURES = [
23 "precision",
24 "recall",
25 "specificity",
26 "accuracy",
27 "jaccard",
28 "f1_score",
29]
30"""List of performance figures supported by this module, in order."""
33def _performance_summary(size, winperf, winsize, winstride, figure):
34 """Generates an array that represents the performance per pixel of the
35 original image.
37 The returned array corresponds to a stacked version of performances for
38 each pixel in the original image taking into consideration the sliding
39 window performances, their size and stride.
42 Parameters
43 ==========
45 size : tuple
46 A two tuple with the original height and width of the image being
47 analyzed
49 winperf : numpy.ndarray
50 A 3D array with shape ``(N, H, W)``, where ``N`` represents the number
51 of performance measures supported by this module, ``(H,W)`` is the
52 total number of vertical and horizontal sliding windows.
54 winsize : tuple
55 A two tuple that indicates the size of the sliding window (height,
56 width)
58 winstride : tuple
59 A two tuple that indicates the stride of the sliding window (height,
60 width)
62 figure : str
63 Name of the performance figure to use for the summary
66 Returns
67 =======
69 n : numpy.ndarray
70 A 2D numpy array containing the number of performance scores for every
71 pixel in the original image
73 avg : numpy.ndarray
74 A 2D numpy array containing the average performances for every pixel on
75 the input image considering the sliding window sizes and strides
76 applied to the image
78 std : numpy.ndarray
79 A 2D numpy array containing the (unbiased) standard deviations for the
80 provided performance figure, for every pixel on the input image
81 considering the sliding window sizes and strides applied to the image
82 """
84 # first, estimate how many overlaps we have per pixel in the original image
86 # we calculate the required padding so that the last windows on the left
87 # and bottom size of predictions/ground-truth data are zero padded, and
88 # torch unfolding works exactly. The last windows on the left and bottom
89 # parts of the image may be extended with zeros.
90 final_size = list(size)
91 rem = (size[0] - winsize[0]) % winstride[0]
92 if rem != 0:
93 final_size[0] += winstride[0] - rem
94 rem = (size[1] - winsize[1]) % winstride[1]
95 if rem != 0:
96 final_size[1] += winstride[1] - rem
97 n = numpy.zeros(final_size, dtype=int)
99 # calculates the stacked performance
100 layers = int(
101 numpy.ceil(winsize[0] / winstride[0])
102 * numpy.ceil(winsize[1] / winstride[1])
103 )
104 # figindex = PERFORMANCE_FIGURES.index(figure)
105 perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype)
106 n = -1 * numpy.ones(final_size, dtype=int)
107 data = winperf[PERFORMANCE_FIGURES.index(figure)]
108 for j in range(data.shape[0]):
109 yup = slice(winstride[0] * j, (winstride[0] * j) + winsize[0], 1)
110 for i in range(data.shape[1]):
111 xup = slice(winstride[1] * i, (winstride[1] * i) + winsize[1], 1)
112 nup = n[yup, xup]
113 nup += 1
114 yr, xr = numpy.meshgrid(
115 range(yup.start, yup.stop, yup.step),
116 range(xup.start, xup.stop, xup.step),
117 indexing="ij",
118 )
119 perf[nup.flat, yr.flat, xr.flat] = data[j, i]
121 # for each element in the ``perf``matrix, calculates avg and std.
122 n += 1 # adjust for starting at -1 before
123 avg = perf.sum(axis=0) / n
124 # calculate variances
125 std = numpy.zeros_like(avg)
126 for k in range(2, n.max() + 1):
127 std[n == k] = perf[:k].std(axis=0, ddof=1)[n == k]
129 # returns only valid bounds wrt to the original image
130 return (
131 n[: size[0], : size[1]],
132 avg[: size[0], : size[1]],
133 std[: size[0], : size[1]],
134 )
137def _winperf_measures(pred, gt, mask, threshold, size, stride):
138 """Calculates measures on sliding windows of a single sample.
140 Parameters
141 ----------
143 pred : torch.Tensor
144 pixel-wise predictions
146 gt : torch.Tensor
147 ground-truth (annotations)
149 mask : torch.Tensor
150 mask for the region of interest
152 threshold : float
153 threshold to use for evaluating individual sliding window performances
155 size : tuple
156 size (vertical, horizontal) for windows for which we will calculate
157 partial performances based on the threshold and existing ground-truth
159 stride : tuple
160 strides (vertical, horizontal) for windows for which we will calculate
161 partial performances based on the threshold and existing ground-truth
164 Returns
165 -------
167 measures : numpy.ndarray
169 A 3D float array with all supported performance entries for each
170 sliding window.
172 The first dimension of the array is therefore 6. The other two
173 dimensions correspond to resulting size of the sliding window operation
174 applied to the input data and taking into consideration the sliding
175 window size and the stride.
176 """
178 # we calculate the required padding so that the last windows on the left
179 # and bottom size of predictions/ground-truth data are zero padded, and
180 # torch unfolding works exactly. The last windows on the left and bottom
181 # parts of the image may be extended with zeros.
182 padding = (0, 0)
183 rem = (pred.shape[1] - size[1]) % stride[1]
184 if rem != 0:
185 padding = (0, (stride[1] - rem))
186 rem = (pred.shape[0] - size[0]) % stride[0]
187 if rem != 0:
188 padding += (0, (stride[0] - rem))
190 pred_padded = torch.nn.functional.pad(
191 pred, padding, mode="constant", value=0.0
192 )
193 gt_padded = torch.nn.functional.pad(
194 gt.squeeze(0), padding, mode="constant", value=0.0
195 )
196 mask_padded = torch.nn.functional.pad(
197 mask.squeeze(0), padding, mode="constant", value=1.0
198 )
200 # this will create as many views as required
201 pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold(
202 1, size[1], stride[1]
203 )
204 gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold(
205 1, size[1], stride[1]
206 )
207 mask_windows = mask_padded.unfold(0, size[0], stride[0]).unfold(
208 1, size[1], stride[1]
209 )
210 assert pred_windows.shape == gt_windows.shape
211 assert gt_windows.shape == mask_windows.shape
212 ylen, xlen, _, _ = pred_windows.shape
214 retval = numpy.array(
215 [
216 sample_measures_for_threshold(
217 pred_windows[j, i, :, :],
218 gt_windows[j, i, :, :],
219 mask_windows[j, i, :, :],
220 threshold,
221 )
222 for j in range(ylen)
223 for i in range(xlen)
224 ]
225 )
226 return retval.transpose(1, 0).reshape(6, ylen, xlen)
229def _visual_dataset_performance(stem, img, n, avg, std, outdir):
230 """Runs a visual performance assessment for each entry in a given dataset.
232 Parameters
233 ----------
235 stem : str
236 The input file stem, for which a figure will be saved in ``outdir``,
237 in PDF format
239 img : pytorch.Tensor
240 A 3D tensor containing the original image that was analyzed
242 n : numpy.ndarray
243 A 2D integer array with the same size as `img` that indicates how many
244 overlapping windows are available for each pixel in the image
246 avg : numpy.ndarray
247 A 2D floating-point array with the average performance per pixel
248 calculated over all overlapping windows for that particular pixel
250 std : numpy.ndarray
251 A 2D floating-point array with the unbiased standard-deviation
252 (``ddof=1``) performance per pixel calculated over all overlapping
253 windows for that particular pixel
255 outdir : str
256 The base directory where to save output PDF images generated by this
257 procedure. The value of ``stem`` will be suffixed to this output
258 directory using a standard path join. The output filename will have a
259 ``.pdf`` extension.
260 """
262 import matplotlib.pyplot as plt
264 figsize = img.shape[1:]
266 fig, axs = plt.subplots(2, 2)
267 axs[0, 0].imshow(img.numpy().transpose(1, 2, 0))
268 axs[0, 0].set_title("Original image")
270 pos01 = axs[0, 1].imshow(
271 n[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
272 )
273 fig.colorbar(pos01, ax=axs[0, 1], orientation="vertical")
274 axs[0, 1].set_title("# Overlapping Windows")
276 pos10 = axs[1, 0].imshow(
277 avg[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
278 )
279 fig.colorbar(pos10, ax=axs[1, 0], orientation="vertical")
280 axs[1, 0].set_title("Average Performance")
282 pos11 = axs[1, 1].imshow(
283 std[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
284 )
285 fig.colorbar(pos11, ax=axs[1, 1], orientation="vertical")
286 axs[1, 1].set_title("Performance Std. Dev.")
288 plt.tight_layout()
290 fname = os.path.join(outdir, stem + ".pdf")
291 os.makedirs(os.path.dirname(fname), exist_ok=True)
292 fig.savefig(fname)
293 plt.close(fig)
296def _winperf_for_sample(
297 basedir,
298 threshold,
299 size,
300 stride,
301 dataset,
302 k,
303 figure,
304 outdir,
305):
306 """Evaluates sliding window performances per sample.
308 Parameters
309 ----------
311 basedir : str
312 folder where predictions for the dataset images has been previously
313 stored
315 threshold : :py:class:`float`
316 this should be a threshold (floating point) to apply to prediction maps
317 to decide on positives and negatives.
319 size : tuple
320 size (vertical, horizontal) for windows for which we will calculate
321 partial performances based on the threshold and existing ground-truth
323 stride : tuple
324 strides (vertical, horizontal) for windows for which we will calculate
325 partial performances based on the threshold and existing ground-truth
327 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
328 datasets to iterate on
330 k : int
331 the sample number (order inside the dataset, starting from zero), to
332 calculate sliding window performances for
334 figure : str
335 the performance figure to use for calculating sliding window micro
336 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be
337 a supported performance figure as defined in
338 :py:attr:`PERFORMANCE_FIGURES`
340 outdir : str
341 path were to save a visual representation of sliding window
342 performances. If set to ``None``, then do not save those to disk.
345 Returns
346 -------
348 stem : str
349 The input file stem, that was just analyzed
351 data : dict
352 A dictionary containing the following fields:
354 * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window
355 performance figures
356 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
357 the original image pertaining to the analyzed sample, that indicates
358 how many overlapping windows are available for each pixel in the
359 image
360 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
361 average performance per pixel calculated over all overlapping windows
362 for that particular pixel
363 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
364 unbiased standard-deviation (``ddof=1``) performance per pixel
365 calculated over all overlapping windows for that particular pixel
366 """
368 sample = dataset[k]
369 with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f:
370 pred = torch.from_numpy(f["array"][:])
371 mask = None if len(sample) < 4 else sample[3]
372 winperf = _winperf_measures(pred, sample[2], mask, threshold, size, stride)
373 n, avg, std = _performance_summary(
374 sample[1].shape[1:], winperf, size, stride, figure
375 )
376 if outdir is not None:
377 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)
378 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)
381def sliding_window_performances(
382 dataset,
383 name,
384 predictions_folder,
385 threshold,
386 size,
387 stride,
388 figure,
389 nproc=1,
390 outdir=None,
391):
392 """Evaluates sliding window performances for a whole dataset.
394 Parameters
395 ---------
397 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
398 datasets to iterate on
400 name : str
401 the local name of this dataset (e.g. ``train``, or ``test``), to be
402 used when saving measures files.
404 predictions_folder : str
405 folder where predictions for the dataset images has been previously
406 stored
408 threshold : :py:class:`float`
409 this should be a threshold (floating point) to apply to prediction maps
410 to decide on positives and negatives.
412 size : tuple
413 size (vertical, horizontal) for windows for which we will calculate
414 partial performances based on the threshold and existing ground-truth
416 stride : tuple
417 strides (vertical, horizontal) for windows for which we will calculate
418 partial performances based on the threshold and existing ground-truth
420 figure : str
421 the performance figure to use for calculating sliding window micro
422 performances
424 nproc : :py:class:`int`, Optional
425 the number of processing cores to use for performance evaluation.
426 Setting this number to a value ``<= 0`` will cause the function to use
427 all available computing cores. Otherwise, limit to the number of cores
428 specified. If set to the value of ``1``, then do not use
429 multiprocessing.
431 outdir : :py:class:`str`, Optional
432 path were to save a visual representation of sliding window
433 performances. If set to ``None``, then do not save those to disk.
436 Returns
437 -------
439 d : dict
440 A dictionary in which keys are filename stems and values are
441 dictionaries with the following contents:
443 ``winperf``: numpy.ndarray
445 ``n`` : numpy.ndarray
446 A 2D numpy array containing the number of performance scores for
447 every pixel in the original image
449 ``avg`` : :py:class:`numpy.ndarray`
450 A 2D numpy array containing the average performances for every
451 pixel on the input image considering the sliding window sizes and
452 strides applied to the image
454 ``std`` : :py:class:`numpy.ndarray`
455 A 2D numpy array containing the (unbiased) standard deviations for
456 the provided performance figure, for every pixel on the input image
457 considering the sliding window sizes and strides applied to the
458 image
459 """
461 use_predictions_folder = os.path.join(predictions_folder, name)
462 if not os.path.exists(use_predictions_folder):
463 use_predictions_folder = predictions_folder
465 with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar:
466 # we avoid the multiprocessing module if nproc==1
467 # so it is easier to run ipdb
468 if nproc != 1:
469 if nproc <= 0:
470 nproc = multiprocessing.cpu_count()
471 pool = multiprocessing.Pool(nproc)
472 results = [
473 pool.apply_async(
474 _winperf_for_sample,
475 args=(
476 use_predictions_folder,
477 threshold,
478 size,
479 stride,
480 dataset[name],
481 k,
482 figure,
483 outdir,
484 ),
485 callback=lambda x: pbar.update(),
486 )
487 for k in range(len(dataset[name]))
488 ]
489 pool.close()
490 pool.join()
491 data = [k.get() for k in results]
492 else:
493 data = []
494 for k in pbar:
495 winperf = _winperf_for_sample(
496 use_predictions_folder,
497 threshold,
498 size,
499 stride,
500 dataset[name],
501 k,
502 figure,
503 outdir,
504 )
505 data.append(winperf)
507 return dict(data)
510def _visual_performances_for_sample(
511 size, stride, dataset, k, winperf, figure, outdir
512):
513 """Displays sliding windows performances per sample.
515 This is a simplified version of :py:func:`_winper_for_sample`
516 in which the sliding window performances are not recalculated and used as
517 input. It can be used in case you have the sliding window performances
518 stored in disk or if you're evaluating differences between sliding windows
519 of 2 different systems.
522 Parameters
523 ----------
525 size : tuple
526 size (vertical, horizontal) for windows for which we will calculate
527 partial performances based on the threshold and existing ground-truth
529 stride : tuple
530 strides (vertical, horizontal) for windows for which we will calculate
531 partial performances based on the threshold and existing ground-truth
533 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
534 datasets to iterate on
536 k : int
537 the sample number (order inside the dataset, starting from zero), to
538 calculate sliding window performances for
540 winperf : numpy.ndarray
541 the previously calculated sliding window performances to use for this
542 assessment.
544 figure : str
545 the performance figure to use for calculating sliding window micro
546 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be
547 a supported performance figure as defined in
548 :py:attr:`PERFORMANCE_FIGURES`
550 outdir : :py:class:`str`
551 path were to save a visual representation of sliding window
552 performances. If set to ``None``, then do not save those to disk.
555 Returns
556 -------
558 stem : str
559 The input file stem, that was just analyzed
561 data : dict
562 A dictionary containing the following fields:
564 * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding
565 window performance figures. Notice this is just a copy of the input
566 sliding window performance figures with the same name.
567 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
568 the original image pertaining to the analyzed sample, that indicates
569 how many overlapping windows are available for each pixel in the
570 image
571 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
572 average performance per pixel calculated over all overlapping windows
573 for that particular pixel
574 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
575 unbiased standard-deviation (``ddof=1``) performance per pixel
576 calculated over all overlapping windows for that particular pixel
577 """
579 sample = dataset[k]
580 n, avg, std = _performance_summary(
581 sample[1].shape[1:], winperf, size, stride, figure
582 )
583 if outdir is not None:
584 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)
585 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)
588def visual_performances(
589 dataset,
590 name,
591 winperfs,
592 size,
593 stride,
594 figure,
595 nproc=1,
596 outdir=None,
597):
598 """Displays the performances for for a whole dataset.
600 This is a simplified version of :py:func:`sliding_window_performances` in
601 which the sliding window performances are not recalculated and used as
602 input. It can be used in case you have the sliding window performances
603 stored in disk or if you're evaluating differences between sliding windows
604 of 2 different systems.
607 Parameters
608 ---------
610 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
611 datasets to iterate on
613 name : str
614 the local name of this dataset (e.g. ``train``, or ``test``), to be
615 used when saving measures files.
617 winperfs : dict
618 a dictionary mapping dataset stems to arrays containing the sliding
619 window performances to be evaluated
621 size : tuple
622 size (vertical, horizontal) for windows for which we will calculate
623 partial performances based on the threshold and existing ground-truth
625 stride : tuple
626 strides (vertical, horizontal) for windows for which we will calculate
627 partial performances based on the threshold and existing ground-truth
629 figure : str
630 the performance figure to use for calculating sliding window micro
631 performances
633 nproc : :py:class:`int`, Optional
634 the number of processing cores to use for performance evaluation.
635 Setting this number to a value ``<= 0`` will cause the function to use
636 all available computing cores. Otherwise, limit to the number of cores
637 specified. If set to the value of ``1``, then do not use
638 multiprocessing.
640 outdir : :py:class:`str`, Optional
641 path were to save a visual representation of sliding window
642 performances. If set to ``None``, then do not save those to disk.
645 Returns
646 -------
648 d : dict
649 A dictionary in which keys are filename stems and values are
650 dictionaries with the following contents:
652 ``winperf``: numpy.ndarray
653 A 3D float array with all the sliding window performances for the
654 input image
656 ``n`` : numpy.ndarray
657 A 2D numpy array containing the number of performance scores for
658 every pixel in the original image
660 ``avg`` : numpy.ndarray
661 A 2D numpy array containing the average performances for every
662 pixel on the input image considering the sliding window sizes and
663 strides applied to the image
665 ``std`` : :py:class:`numpy.ndarray`
666 A 2D numpy array containing the (unbiased) standard deviations for
667 the provided performance figure, for every pixel on the input image
668 considering the sliding window sizes and strides applied to the
669 image
670 """
672 stems = list(dataset[name].keys())
674 with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:
675 # we avoid the multiprocessing module if nproc==1
676 # so it is easier to run ipdb
677 if nproc != 1:
678 if nproc <= 0:
679 nproc = multiprocessing.cpu_count()
680 pool = multiprocessing.Pool(nproc)
681 results = [
682 pool.apply_async(
683 _visual_performances_for_sample,
684 args=(
685 size,
686 stride,
687 dataset[name],
688 k,
689 winperfs[stems[k]],
690 figure,
691 outdir,
692 ),
693 callback=lambda x: pbar.update(),
694 )
695 for k in range(len(dataset[name]))
696 ]
697 pool.close()
698 pool.join()
699 data = [k.get() for k in results]
700 else:
701 data = []
702 for k in pbar:
703 winperf = _visual_performances_for_sample(
704 size,
705 stride,
706 dataset[name],
707 k,
708 winperfs[stems[k]],
709 figure,
710 outdir,
711 )
712 data.append(winperf)
714 return dict(data)
717def index_of_outliers(c):
718 """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values.
720 The IQR measures the midspread or where 50% of a normal distribution would
721 sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a
722 symmetrical range that would encompass most of the data, characterizing
723 outliers (outside of that range). Check out `this Wikipedia page
724 <https://en.wikipedia.org/wiki/Interquartile_range>` for more details.
727 Parameters
728 ----------
730 c : numpy.ndarray
731 A 1D float array
734 Returns
735 -------
737 indexes : numpy.ndarray
738 Indexes of the input column that are considered outliers in the
739 distribution (outside the 1.5 Interquartile Range).
740 """
742 l, h = numpy.quantile(c, (0.25, 0.75))
743 iqr = h - l
744 limits = (l - 1.5 * iqr, h + 1.5 * iqr)
745 return (c < limits[0]) | (c > limits[1])
748def write_analysis_text(names, da, db, f):
749 """Writes a text file containing the most important statistics.
751 Compares sliding window performances in ``da`` and ``db`` taking into
752 consideration their statistical properties. A significance test is applied
753 to check whether observed differences in the statistics of both
754 distributions is significant.
757 Parameters
758 ==========
760 names : tuple
761 A tuple containing two strings which are the names of the systems being
762 analyzed
764 da : numpy.ndarray
765 A 1D numpy array containing all the performance figures per sliding
766 window analyzed and organized in a particular order (raster), for the
767 first system (first entry of ``names``)
769 db : numpy.ndarray
770 A 1D numpy array containing all the performance figures per sliding
771 window analyzed and organized in a particular order (raster), for the
772 second system (second entry of ``names``)
774 f : file
775 An open file that will be used dump the analysis to
776 """
778 diff = da - db
779 f.write("Basic statistics from distributions:\n")
781 headers = [
782 "system",
783 "samples",
784 "median",
785 "average",
786 "std.dev.",
787 "normaltest (p)",
788 ]
789 table = [
790 [
791 names[0],
792 len(da),
793 numpy.median(da),
794 numpy.mean(da),
795 numpy.std(da, ddof=1),
796 scipy.stats.normaltest(da)[1],
797 ],
798 [
799 names[1],
800 len(db),
801 numpy.median(db),
802 numpy.mean(db),
803 numpy.std(db, ddof=1),
804 scipy.stats.normaltest(db)[1],
805 ],
806 [
807 "differences",
808 len(diff),
809 numpy.median(diff),
810 numpy.mean(diff),
811 numpy.std(diff, ddof=1),
812 scipy.stats.normaltest(diff)[1],
813 ],
814 ]
815 tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")
816 f.write(textwrap.indent(tdata, " "))
817 f.write("\n")
819 # Note: dependent variable = sliding window performance figure in our case
820 # Assumptions of a Paired T-test:
821 # * The dependent variable must be continuous (interval/ratio). [OK]
822 # * The observations are independent of one another. [OK]
823 # * The dependent variable should be approximately normally distributed. [!!!]
824 # * The dependent variable should not contain any outliers. [OK]
826 if (diff == 0.0).all():
827 logger.error(
828 "Differences are exactly zero between both "
829 "sliding window distributions, for **all** samples. "
830 "Statistical significance tests are not meaningful in "
831 "this context and will be skipped. This typically "
832 "indicates an issue with the setup of prediction folders "
833 "(duplicated?)"
834 )
835 return
837 f.write("\nPaired significance tests:\n")
838 w, p = scipy.stats.ttest_rel(da, db)
839 f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")
841 f.write(" * Wilcoxon:\n")
843 w, p = scipy.stats.wilcoxon(diff)
844 f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n")
846 w, p = scipy.stats.wilcoxon(diff, alternative="greater")
847 f.write(
848 f" * H0 = med({names[0]}) < med({names[1]}): "
849 f"W = {w:g}, p = {p:.5f}\n"
850 )
852 w, p = scipy.stats.wilcoxon(diff, alternative="less")
853 f.write(
854 f" * H0 = med({names[0]}) > med({names[1]}): "
855 f"W = {w:g}, p = {p:.5f}\n"
856 )
859def write_analysis_figures(names, da, db, fname):
860 """Writes a PDF containing most important plots for analysis.
862 Parameters
863 ==========
865 names : tuple
866 A tuple containing two strings which are the names of the systems being
867 analyzed
869 da : numpy.ndarray
870 A 1D numpy array containing all the performance figures per sliding
871 window analyzed and organized in a particular order (raster), for the
872 first system (first entry of ``names``)
874 db : numpy.ndarray
875 A 1D numpy array containing all the performance figures per sliding
876 window analyzed and organized in a particular order (raster), for the
877 second system (second entry of ``names``)
879 fname : str
880 The filename to use for storing the summarized performance figures
881 """
883 import matplotlib.pyplot as plt
885 from matplotlib.backends.backend_pdf import PdfPages
887 diff = da - db
888 bins = 50
890 with PdfPages(fname) as pdf:
891 fig = plt.figure()
892 plt.grid()
893 plt.hist(da, bins=bins)
894 plt.title(
895 f"{names[0]} - (N={len(da)}; M={numpy.median(da):.3f}; "
896 f"$\\mu$={numpy.mean(da):.3f}; $\\sigma$={numpy.std(da, ddof=1):.3f})"
897 )
898 pdf.savefig()
899 plt.close(fig)
901 fig = plt.figure()
902 plt.grid()
903 plt.hist(db, bins=bins)
904 plt.title(
905 f"{names[1]} - (N={len(db)}; M={numpy.median(db):.3f}; "
906 f"$\\mu$={numpy.mean(db):.3f}; $\\sigma$={numpy.std(db, ddof=1):.3f})"
907 )
908 pdf.savefig()
909 plt.close(fig)
911 fig = plt.figure()
912 plt.grid()
913 plt.boxplot([da, db], labels=names)
914 plt.title(f"Boxplots (N={len(da)})")
915 pdf.savefig()
916 plt.close(fig)
918 fig = plt.figure()
919 plt.grid()
920 plt.boxplot(diff, labels=(f"{names[0]} - {names[1]}",))
921 plt.title(f"Paired Differences (N={len(da)})")
922 pdf.savefig()
923 plt.close(fig)
925 fig = plt.figure()
926 plt.grid()
927 plt.hist(diff, bins=bins)
928 plt.title(
929 f"Paired Differences "
930 f"(N={len(diff)}; M={numpy.median(diff):.3f}; "
931 f"$\\mu$={numpy.mean(diff):.3f}; "
932 f"$\\sigma$={numpy.std(diff, ddof=1):.3f})"
933 )
934 pdf.savefig()
935 plt.close(fig)
937 p = scipy.stats.pearsonr(da, db)
938 fig = plt.figure()
939 plt.grid()
940 plt.scatter(da, db, marker=".", color="black")
941 plt.xlabel(f"{names[0]}")
942 plt.ylabel(f"{names[1]}")
943 plt.title(f"Scatter (p={p[0]:.3f})")
944 pdf.savefig()
945 plt.close(fig)