Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# coding=utf-8
4import logging
5import multiprocessing
6import os
7import textwrap
9import h5py
10import numpy
11import scipy.stats
12import tabulate
13import torch.nn
15from tqdm import tqdm
17from .evaluator import sample_measures_for_threshold
19logger = logging.getLogger(__name__)
21PERFORMANCE_FIGURES = [
22 "precision",
23 "recall",
24 "specificity",
25 "accuracy",
26 "jaccard",
27 "f1_score",
28]
29"""List of performance figures supported by this module, in order"""
32def _performance_summary(size, winperf, winsize, winstride, figure):
33 """Generates an array that represents the performance per pixel of the
34 original image
36 The returned array corresponds to a stacked version of performances for
37 each pixel in the original image taking into consideration the sliding
38 window performances, their size and stride.
41 Parameters
42 ==========
44 size : tuple
45 A two tuple with the original height and width of the image being
46 analyzed
48 winperf : numpy.ndarray
49 A 3D array with shape ``(N, H, W)``, where ``N`` represents the number
50 of performance measures supported by this module, ``(H,W)`` is the
51 total number of vertical and horizontal sliding windows.
53 winsize : tuple
54 A two tuple that indicates the size of the sliding window (height,
55 width)
57 winstride : tuple
58 A two tuple that indicates the stride of the sliding window (height,
59 width)
61 figure : str
62 Name of the performance figure to use for the summary
65 Returns
66 =======
68 n : numpy.ndarray
69 A 2D numpy array containing the number of performance scores for every
70 pixel in the original image
72 avg : numpy.ndarray
73 A 2D numpy array containing the average performances for every pixel on
74 the input image considering the sliding window sizes and strides
75 applied to the image
77 std : numpy.ndarray
78 A 2D numpy array containing the (unbiased) standard deviations for the
79 provided performance figure, for every pixel on the input image
80 considering the sliding window sizes and strides applied to the image
82 """
84 # first, estimate how many overlaps we have per pixel in the original image
86 # we calculate the required padding so that the last windows on the left
87 # and bottom size of predictions/ground-truth data are zero padded, and
88 # torch unfolding works exactly. The last windows on the left and bottom
89 # parts of the image may be extended with zeros.
90 final_size = list(size)
91 rem = (size[0] - winsize[0]) % winstride[0]
92 if rem != 0:
93 final_size[0] += winstride[0] - rem
94 rem = (size[1] - winsize[1]) % winstride[1]
95 if rem != 0:
96 final_size[1] += winstride[1] - rem
97 n = numpy.zeros(final_size, dtype=int)
99 # calculates the stacked performance
100 layers = int(
101 numpy.ceil(winsize[0] / winstride[0])
102 * numpy.ceil(winsize[1] / winstride[1])
103 )
104 # figindex = PERFORMANCE_FIGURES.index(figure)
105 perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype)
106 n = -1 * numpy.ones(final_size, dtype=int)
107 data = winperf[PERFORMANCE_FIGURES.index(figure)]
108 for j in range(data.shape[0]):
109 yup = slice(winstride[0] * j, (winstride[0] * j) + winsize[0], 1)
110 for i in range(data.shape[1]):
111 xup = slice(winstride[1] * i, (winstride[1] * i) + winsize[1], 1)
112 nup = n[yup, xup]
113 nup += 1
114 yr, xr = numpy.meshgrid(
115 range(yup.start, yup.stop, yup.step),
116 range(xup.start, xup.stop, xup.step),
117 indexing="ij",
118 )
119 perf[nup.flat, yr.flat, xr.flat] = data[j, i]
121 # for each element in the ``perf``matrix, calculates avg and std.
122 n += 1 # adjust for starting at -1 before
123 avg = perf.sum(axis=0) / n
124 # calculate variances
125 std = numpy.zeros_like(avg)
126 for k in range(2, n.max() + 1):
127 std[n == k] = perf[:k].std(axis=0, ddof=1)[n == k]
129 # returns only valid bounds wrt to the original image
130 return (
131 n[: size[0], : size[1]],
132 avg[: size[0], : size[1]],
133 std[: size[0], : size[1]],
134 )
137def _winperf_measures(pred, gt, mask, threshold, size, stride):
138 """
139 Calculates measures on sliding windows of a single sample
142 Parameters
143 ----------
145 pred : torch.Tensor
146 pixel-wise predictions
148 gt : torch.Tensor
149 ground-truth (annotations)
151 mask : torch.Tensor
152 mask for the region of interest
154 threshold : float
155 threshold to use for evaluating individual sliding window performances
157 size : tuple
158 size (vertical, horizontal) for windows for which we will calculate
159 partial performances based on the threshold and existing ground-truth
161 stride : tuple
162 strides (vertical, horizontal) for windows for which we will calculate
163 partial performances based on the threshold and existing ground-truth
166 Returns
167 -------
169 measures : numpy.ndarray
171 A 3D float array with all supported performance entries for each
172 sliding window.
174 The first dimension of the array is therefore 6. The other two
175 dimensions correspond to resulting size of the sliding window operation
176 applied to the input data and taking into consideration the sliding
177 window size and the stride.
179 """
181 # we calculate the required padding so that the last windows on the left
182 # and bottom size of predictions/ground-truth data are zero padded, and
183 # torch unfolding works exactly. The last windows on the left and bottom
184 # parts of the image may be extended with zeros.
185 padding = (0, 0)
186 rem = (pred.shape[1] - size[1]) % stride[1]
187 if rem != 0:
188 padding = (0, (stride[1] - rem))
189 rem = (pred.shape[0] - size[0]) % stride[0]
190 if rem != 0:
191 padding += (0, (stride[0] - rem))
193 pred_padded = torch.nn.functional.pad(
194 pred, padding, mode="constant", value=0.0
195 )
196 gt_padded = torch.nn.functional.pad(
197 gt.squeeze(0), padding, mode="constant", value=0.0
198 )
199 mask_padded = torch.nn.functional.pad(
200 mask.squeeze(0), padding, mode="constant", value=1.0
201 )
203 # this will create as many views as required
204 pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold(
205 1, size[1], stride[1]
206 )
207 gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold(
208 1, size[1], stride[1]
209 )
210 mask_windows = mask_padded.unfold(0, size[0], stride[0]).unfold(
211 1, size[1], stride[1]
212 )
213 assert pred_windows.shape == gt_windows.shape
214 assert gt_windows.shape == mask_windows.shape
215 ylen, xlen, _, _ = pred_windows.shape
217 retval = numpy.array(
218 [
219 sample_measures_for_threshold(
220 pred_windows[j, i, :, :],
221 gt_windows[j, i, :, :],
222 mask_windows[j, i, :, :],
223 threshold,
224 )
225 for j in range(ylen)
226 for i in range(xlen)
227 ]
228 )
229 return retval.transpose(1, 0).reshape(6, ylen, xlen)
232def _visual_dataset_performance(stem, img, n, avg, std, outdir):
233 """Runs a visual performance assessment for each entry in a given dataset
236 Parameters
237 ----------
239 stem : str
240 The input file stem, for which a figure will be saved in ``outdir``,
241 in PDF format
243 img : pytorch.Tensor
244 A 3D tensor containing the original image that was analyzed
246 n : numpy.ndarray
247 A 2D integer array with the same size as `img` that indicates how many
248 overlapping windows are available for each pixel in the image
250 avg : numpy.ndarray
251 A 2D floating-point array with the average performance per pixel
252 calculated over all overlapping windows for that particular pixel
254 std : numpy.ndarray
255 A 2D floating-point array with the unbiased standard-deviation
256 (``ddof=1``) performance per pixel calculated over all overlapping
257 windows for that particular pixel
259 outdir : str
260 The base directory where to save output PDF images generated by this
261 procedure. The value of ``stem`` will be suffixed to this output
262 directory using a standard path join. The output filename will have a
263 ``.pdf`` extension.
265 """
267 import matplotlib.pyplot as plt
269 figsize = img.shape[1:]
271 fig, axs = plt.subplots(2, 2)
272 axs[0, 0].imshow(img.numpy().transpose(1, 2, 0))
273 axs[0, 0].set_title("Original image")
275 pos01 = axs[0, 1].imshow(
276 n[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
277 )
278 fig.colorbar(pos01, ax=axs[0, 1], orientation="vertical")
279 axs[0, 1].set_title("# Overlapping Windows")
281 pos10 = axs[1, 0].imshow(
282 avg[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
283 )
284 fig.colorbar(pos10, ax=axs[1, 0], orientation="vertical")
285 axs[1, 0].set_title("Average Performance")
287 pos11 = axs[1, 1].imshow(
288 std[: figsize[0], : figsize[1]], cmap="Greys_r", interpolation="none"
289 )
290 fig.colorbar(pos11, ax=axs[1, 1], orientation="vertical")
291 axs[1, 1].set_title("Performance Std. Dev.")
293 plt.tight_layout()
295 fname = os.path.join(outdir, stem + ".pdf")
296 os.makedirs(os.path.dirname(fname), exist_ok=True)
297 fig.savefig(fname)
298 plt.close(fig)
301def _winperf_for_sample(
302 basedir,
303 threshold,
304 size,
305 stride,
306 dataset,
307 k,
308 figure,
309 outdir,
310):
311 """
312 Evaluates sliding window performances per sample
315 Parameters
316 ----------
318 basedir : str
319 folder where predictions for the dataset images has been previously
320 stored
322 threshold : :py:class:`float`
323 this should be a threshold (floating point) to apply to prediction maps
324 to decide on positives and negatives.
326 size : tuple
327 size (vertical, horizontal) for windows for which we will calculate
328 partial performances based on the threshold and existing ground-truth
330 stride : tuple
331 strides (vertical, horizontal) for windows for which we will calculate
332 partial performances based on the threshold and existing ground-truth
334 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
335 datasets to iterate on
337 k : int
338 the sample number (order inside the dataset, starting from zero), to
339 calculate sliding window performances for
341 figure : str
342 the performance figure to use for calculating sliding window micro
343 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be
344 a supported performance figure as defined in
345 :py:attr:`PERFORMANCE_FIGURES`
347 outdir : str
348 path were to save a visual representation of sliding window
349 performances. If set to ``None``, then do not save those to disk.
352 Returns
353 -------
355 stem : str
356 The input file stem, that was just analyzed
358 data : dict
359 A dictionary containing the following fields:
361 * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window
362 performance figures
363 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
364 the original image pertaining to the analyzed sample, that indicates
365 how many overlapping windows are available for each pixel in the
366 image
367 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
368 average performance per pixel calculated over all overlapping windows
369 for that particular pixel
370 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
371 unbiased standard-deviation (``ddof=1``) performance per pixel
372 calculated over all overlapping windows for that particular pixel
374 """
376 sample = dataset[k]
377 with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f:
378 pred = torch.from_numpy(f["array"][:])
379 mask = None if len(sample) < 4 else sample[3]
380 winperf = _winperf_measures(pred, sample[2], mask, threshold, size, stride)
381 n, avg, std = _performance_summary(
382 sample[1].shape[1:], winperf, size, stride, figure
383 )
384 if outdir is not None:
385 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)
386 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)
389def sliding_window_performances(
390 dataset,
391 name,
392 predictions_folder,
393 threshold,
394 size,
395 stride,
396 figure,
397 nproc=1,
398 outdir=None,
399):
400 """
401 Evaluates sliding window performances for a whole dataset
404 Parameters
405 ---------
407 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
408 datasets to iterate on
410 name : str
411 the local name of this dataset (e.g. ``train``, or ``test``), to be
412 used when saving measures files.
414 predictions_folder : str
415 folder where predictions for the dataset images has been previously
416 stored
418 threshold : :py:class:`float`
419 this should be a threshold (floating point) to apply to prediction maps
420 to decide on positives and negatives.
422 size : tuple
423 size (vertical, horizontal) for windows for which we will calculate
424 partial performances based on the threshold and existing ground-truth
426 stride : tuple
427 strides (vertical, horizontal) for windows for which we will calculate
428 partial performances based on the threshold and existing ground-truth
430 figure : str
431 the performance figure to use for calculating sliding window micro
432 performances
434 nproc : :py:class:`int`, Optional
435 the number of processing cores to use for performance evaluation.
436 Setting this number to a value ``<= 0`` will cause the function to use
437 all available computing cores. Otherwise, limit to the number of cores
438 specified. If set to the value of ``1``, then do not use
439 multiprocessing.
441 outdir : :py:class:`str`, Optional
442 path were to save a visual representation of sliding window
443 performances. If set to ``None``, then do not save those to disk.
446 Returns
447 -------
449 d : dict
450 A dictionary in which keys are filename stems and values are
451 dictionaries with the following contents:
453 ``winperf``: numpy.ndarray
455 ``n`` : numpy.ndarray
456 A 2D numpy array containing the number of performance scores for
457 every pixel in the original image
459 ``avg`` : :py:class:`numpy.ndarray`
460 A 2D numpy array containing the average performances for every
461 pixel on the input image considering the sliding window sizes and
462 strides applied to the image
464 ``std`` : :py:class:`numpy.ndarray`
465 A 2D numpy array containing the (unbiased) standard deviations for
466 the provided performance figure, for every pixel on the input image
467 considering the sliding window sizes and strides applied to the
468 image
470 """
472 use_predictions_folder = os.path.join(predictions_folder, name)
473 if not os.path.exists(use_predictions_folder):
474 use_predictions_folder = predictions_folder
476 with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar:
477 # we avoid the multiprocessing module if nproc==1
478 # so it is easier to run ipdb
479 if nproc != 1:
480 if nproc <= 0:
481 nproc = multiprocessing.cpu_count()
482 pool = multiprocessing.Pool(nproc)
483 results = [
484 pool.apply_async(
485 _winperf_for_sample,
486 args=(
487 use_predictions_folder,
488 threshold,
489 size,
490 stride,
491 dataset[name],
492 k,
493 figure,
494 outdir,
495 ),
496 callback=lambda x: pbar.update(),
497 )
498 for k in range(len(dataset[name]))
499 ]
500 pool.close()
501 pool.join()
502 data = [k.get() for k in results]
503 else:
504 data = []
505 for k in pbar:
506 winperf = _winperf_for_sample(
507 use_predictions_folder,
508 threshold,
509 size,
510 stride,
511 dataset[name],
512 k,
513 figure,
514 outdir,
515 )
516 data.append(winperf)
518 return dict(data)
521def _visual_performances_for_sample(
522 size, stride, dataset, k, winperf, figure, outdir
523):
524 """
525 Displays sliding windows performances per sample
527 This is a simplified version of :py:func:`_winper_for_sample`
528 in which the sliding window performances are not recalculated and used as
529 input. It can be used in case you have the sliding window performances
530 stored in disk or if you're evaluating differences between sliding windows
531 of 2 different systems.
534 Parameters
535 ----------
537 size : tuple
538 size (vertical, horizontal) for windows for which we will calculate
539 partial performances based on the threshold and existing ground-truth
541 stride : tuple
542 strides (vertical, horizontal) for windows for which we will calculate
543 partial performances based on the threshold and existing ground-truth
545 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
546 datasets to iterate on
548 k : int
549 the sample number (order inside the dataset, starting from zero), to
550 calculate sliding window performances for
552 winperf : numpy.ndarray
553 the previously calculated sliding window performances to use for this
554 assessment.
556 figure : str
557 the performance figure to use for calculating sliding window micro
558 performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be
559 a supported performance figure as defined in
560 :py:attr:`PERFORMANCE_FIGURES`
562 outdir : :py:class:`str`
563 path were to save a visual representation of sliding window
564 performances. If set to ``None``, then do not save those to disk.
567 Returns
568 -------
570 stem : str
571 The input file stem, that was just analyzed
573 data : dict
574 A dictionary containing the following fields:
576 * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding
577 window performance figures. Notice this is just a copy of the input
578 sliding window performance figures with the same name.
579 * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
580 the original image pertaining to the analyzed sample, that indicates
581 how many overlapping windows are available for each pixel in the
582 image
583 * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
584 average performance per pixel calculated over all overlapping windows
585 for that particular pixel
586 * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
587 unbiased standard-deviation (``ddof=1``) performance per pixel
588 calculated over all overlapping windows for that particular pixel
590 """
592 sample = dataset[k]
593 n, avg, std = _performance_summary(
594 sample[1].shape[1:], winperf, size, stride, figure
595 )
596 if outdir is not None:
597 _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir)
598 return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std)
601def visual_performances(
602 dataset,
603 name,
604 winperfs,
605 size,
606 stride,
607 figure,
608 nproc=1,
609 outdir=None,
610):
611 """
612 Displays the performances for for a whole dataset
614 This is a simplified version of :py:func:`sliding_window_performances` in
615 which the sliding window performances are not recalculated and used as
616 input. It can be used in case you have the sliding window performances
617 stored in disk or if you're evaluating differences between sliding windows
618 of 2 different systems.
621 Parameters
622 ---------
624 dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
625 datasets to iterate on
627 name : str
628 the local name of this dataset (e.g. ``train``, or ``test``), to be
629 used when saving measures files.
631 winperfs : dict
632 a dictionary mapping dataset stems to arrays containing the sliding
633 window performances to be evaluated
635 size : tuple
636 size (vertical, horizontal) for windows for which we will calculate
637 partial performances based on the threshold and existing ground-truth
639 stride : tuple
640 strides (vertical, horizontal) for windows for which we will calculate
641 partial performances based on the threshold and existing ground-truth
643 figure : str
644 the performance figure to use for calculating sliding window micro
645 performances
647 nproc : :py:class:`int`, Optional
648 the number of processing cores to use for performance evaluation.
649 Setting this number to a value ``<= 0`` will cause the function to use
650 all available computing cores. Otherwise, limit to the number of cores
651 specified. If set to the value of ``1``, then do not use
652 multiprocessing.
654 outdir : :py:class:`str`, Optional
655 path were to save a visual representation of sliding window
656 performances. If set to ``None``, then do not save those to disk.
659 Returns
660 -------
662 d : dict
663 A dictionary in which keys are filename stems and values are
664 dictionaries with the following contents:
666 ``winperf``: numpy.ndarray
667 A 3D float array with all the sliding window performances for the
668 input image
670 ``n`` : numpy.ndarray
671 A 2D numpy array containing the number of performance scores for
672 every pixel in the original image
674 ``avg`` : numpy.ndarray
675 A 2D numpy array containing the average performances for every
676 pixel on the input image considering the sliding window sizes and
677 strides applied to the image
679 ``std`` : :py:class:`numpy.ndarray`
680 A 2D numpy array containing the (unbiased) standard deviations for
681 the provided performance figure, for every pixel on the input image
682 considering the sliding window sizes and strides applied to the
683 image
685 """
687 stems = list(dataset[name].keys())
689 with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:
690 # we avoid the multiprocessing module if nproc==1
691 # so it is easier to run ipdb
692 if nproc != 1:
693 if nproc <= 0:
694 nproc = multiprocessing.cpu_count()
695 pool = multiprocessing.Pool(nproc)
696 results = [
697 pool.apply_async(
698 _visual_performances_for_sample,
699 args=(
700 size,
701 stride,
702 dataset[name],
703 k,
704 winperfs[stems[k]],
705 figure,
706 outdir,
707 ),
708 callback=lambda x: pbar.update(),
709 )
710 for k in range(len(dataset[name]))
711 ]
712 pool.close()
713 pool.join()
714 data = [k.get() for k in results]
715 else:
716 data = []
717 for k in pbar:
718 winperf = _visual_performances_for_sample(
719 size,
720 stride,
721 dataset[name],
722 k,
723 winperfs[stems[k]],
724 figure,
725 outdir,
726 )
727 data.append(winperf)
729 return dict(data)
732def index_of_outliers(c):
733 """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values
735 The IQR measures the midspread or where 50% of a normal distribution would
736 sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a
737 symmetrical range that would encompass most of the data, characterizing
738 outliers (outside of that range). Check out `this Wikipedia page
739 <https://en.wikipedia.org/wiki/Interquartile_range>` for more details.
742 Parameters
743 ----------
745 c : numpy.ndarray
746 A 1D float array
749 Returns
750 -------
752 indexes : numpy.ndarray
753 Indexes of the input column that are considered outliers in the
754 distribution (outside the 1.5 Interquartile Range).
756 """
758 l, h = numpy.quantile(c, (0.25, 0.75))
759 iqr = h - l
760 limits = (l - 1.5 * iqr, h + 1.5 * iqr)
761 return (c < limits[0]) | (c > limits[1])
764def write_analysis_text(names, da, db, f):
765 """Writes a text file containing the most important statistics
767 Compares sliding window performances in ``da`` and ``db`` taking into
768 consideration their statistical properties. A significance test is applied
769 to check whether observed differences in the statistics of both
770 distributions is significant.
773 Parameters
774 ==========
776 names : tuple
777 A tuple containing two strings which are the names of the systems being
778 analyzed
780 da : numpy.ndarray
781 A 1D numpy array containing all the performance figures per sliding
782 window analyzed and organized in a particular order (raster), for the
783 first system (first entry of ``names``)
785 db : numpy.ndarray
786 A 1D numpy array containing all the performance figures per sliding
787 window analyzed and organized in a particular order (raster), for the
788 second system (second entry of ``names``)
790 f : file
791 An open file that will be used dump the analysis to
793 """
795 diff = da - db
796 f.write("Basic statistics from distributions:\n")
798 headers = [
799 "system",
800 "samples",
801 "median",
802 "average",
803 "std.dev.",
804 "normaltest (p)",
805 ]
806 table = [
807 [
808 names[0],
809 len(da),
810 numpy.median(da),
811 numpy.mean(da),
812 numpy.std(da, ddof=1),
813 scipy.stats.normaltest(da)[1],
814 ],
815 [
816 names[1],
817 len(db),
818 numpy.median(db),
819 numpy.mean(db),
820 numpy.std(db, ddof=1),
821 scipy.stats.normaltest(db)[1],
822 ],
823 [
824 "differences",
825 len(diff),
826 numpy.median(diff),
827 numpy.mean(diff),
828 numpy.std(diff, ddof=1),
829 scipy.stats.normaltest(diff)[1],
830 ],
831 ]
832 tdata = tabulate.tabulate(table, headers, tablefmt="rst", floatfmt=".3f")
833 f.write(textwrap.indent(tdata, " "))
834 f.write("\n")
836 # Note: dependent variable = sliding window performance figure in our case
837 # Assumptions of a Paired T-test:
838 # * The dependent variable must be continuous (interval/ratio). [OK]
839 # * The observations are independent of one another. [OK]
840 # * The dependent variable should be approximately normally distributed. [!!!]
841 # * The dependent variable should not contain any outliers. [OK]
843 if (diff == 0.0).all():
844 logger.error(
845 "Differences are exactly zero between both "
846 "sliding window distributions, for **all** samples. "
847 "Statistical significance tests are not meaningful in "
848 "this context and will be skipped. This typically "
849 "indicates an issue with the setup of prediction folders "
850 "(duplicated?)"
851 )
852 return
854 f.write("\nPaired significance tests:\n")
855 w, p = scipy.stats.ttest_rel(da, db)
856 f.write(f" * Paired T (H0: same distro): S = {w:g}, p = {p:.5f}\n")
858 f.write(" * Wilcoxon:\n")
860 w, p = scipy.stats.wilcoxon(diff)
861 f.write(f" * H0 = same distro: W = {w:g}, p = {p:.5f}\n")
863 w, p = scipy.stats.wilcoxon(diff, alternative="greater")
864 f.write(
865 f" * H0 = med({names[0]}) < med({names[1]}): "
866 f"W = {w:g}, p = {p:.5f}\n"
867 )
869 w, p = scipy.stats.wilcoxon(diff, alternative="less")
870 f.write(
871 f" * H0 = med({names[0]}) > med({names[1]}): "
872 f"W = {w:g}, p = {p:.5f}\n"
873 )
876def write_analysis_figures(names, da, db, fname):
877 """Writes a PDF containing most important plots for analysis
880 Parameters
881 ==========
883 names : tuple
884 A tuple containing two strings which are the names of the systems being
885 analyzed
887 da : numpy.ndarray
888 A 1D numpy array containing all the performance figures per sliding
889 window analyzed and organized in a particular order (raster), for the
890 first system (first entry of ``names``)
892 db : numpy.ndarray
893 A 1D numpy array containing all the performance figures per sliding
894 window analyzed and organized in a particular order (raster), for the
895 second system (second entry of ``names``)
897 fname : str
898 The filename to use for storing the summarized performance figures
900 """
902 import matplotlib.pyplot as plt
904 from matplotlib.backends.backend_pdf import PdfPages
906 diff = da - db
907 bins = 50
909 with PdfPages(fname) as pdf:
911 fig = plt.figure()
912 plt.grid()
913 plt.hist(da, bins=bins)
914 plt.title(
915 f"{names[0]} - (N={len(da)}; M={numpy.median(da):.3f}; "
916 f"$\\mu$={numpy.mean(da):.3f}; $\\sigma$={numpy.std(da, ddof=1):.3f})"
917 )
918 pdf.savefig()
919 plt.close(fig)
921 fig = plt.figure()
922 plt.grid()
923 plt.hist(db, bins=bins)
924 plt.title(
925 f"{names[1]} - (N={len(db)}; M={numpy.median(db):.3f}; "
926 f"$\\mu$={numpy.mean(db):.3f}; $\\sigma$={numpy.std(db, ddof=1):.3f})"
927 )
928 pdf.savefig()
929 plt.close(fig)
931 fig = plt.figure()
932 plt.grid()
933 plt.boxplot([da, db], labels=names)
934 plt.title(f"Boxplots (N={len(da)})")
935 pdf.savefig()
936 plt.close(fig)
938 fig = plt.figure()
939 plt.grid()
940 plt.boxplot(diff, labels=(f"{names[0]} - {names[1]}",))
941 plt.title(f"Paired Differences (N={len(da)})")
942 pdf.savefig()
943 plt.close(fig)
945 fig = plt.figure()
946 plt.grid()
947 plt.hist(diff, bins=bins)
948 plt.title(
949 f"Paired Differences "
950 f"(N={len(diff)}; M={numpy.median(diff):.3f}; "
951 f"$\\mu$={numpy.mean(diff):.3f}; "
952 f"$\\sigma$={numpy.std(diff, ddof=1):.3f})"
953 )
954 pdf.savefig()
955 plt.close(fig)
957 p = scipy.stats.pearsonr(da, db)
958 fig = plt.figure()
959 plt.grid()
960 plt.scatter(da, db, marker=".", color="black")
961 plt.xlabel(f"{names[0]}")
962 plt.ylabel(f"{names[1]}")
963 plt.title(f"Scatter (p={p[0]:.3f})")
964 pdf.savefig()
965 plt.close(fig)