1#!/usr/bin/env python
2# coding=utf-8
3
4
5"""Tests for Montgomery County CXR dataset"""
6
7import numpy
8import pytest
9
10from ...binseg.data.montgomery import dataset
11from .utils import count_bw
12
13
14def test_protocol_consistency():
15
16 subset = dataset.subsets("default")
17 assert len(subset) == 3
18
19 assert "train" in subset
20 assert len(subset["train"]) == 96
21 for s in subset["train"]:
22 assert s.key.startswith("CXR_png")
23
24 assert "validation" in subset
25 assert len(subset["validation"]) == 14
26 for s in subset["validation"]:
27 assert s.key.startswith("CXR_png")
28
29 assert "test" in subset
30 assert len(subset["test"]) == 28
31 for s in subset["test"]:
32 assert s.key.startswith("CXR_png")
33
34
35@pytest.mark.skip_if_rc_var_not_set("bob.ip.binseg.montgomery.datadir")
36def test_loading():
37
38 image_size_1 = (4892, 4020)
39 image_size_2 = (4020, 4892)
40
41 def _check_sample(s, bw_threshold_label):
42
43 data = s.data
44 assert isinstance(data, dict)
45 assert len(data) == 2
46
47 assert "data" in data
48 assert (
49 data["data"].size == image_size_1
50 or data["data"].size == image_size_2
51 )
52 assert data["data"].mode == "RGB"
53
54 assert "label" in data
55 assert (
56 data["label"].size == image_size_1
57 or data["label"].size == image_size_2
58 )
59 assert data["label"].mode == "1"
60
61 b, w = count_bw(data["label"])
62 assert (b + w) == numpy.prod(image_size_1), (
63 f"Counts of black + white ({b}+{w}) do not add up to total "
64 f"image size ({numpy.prod(image_size_1)}) at '{s.key}':label"
65 )
66 assert (w / b) < bw_threshold_label, (
67 f"The proportion between black and white pixels "
68 f"({w}/{b}={w/b:.3f}) is larger than the allowed threshold "
69 f"of {bw_threshold_label} at '{s.key}':label - this could "
70 f"indicate a loading problem!"
71 )
72
73 # to visualize images, uncomment the folowing code it should display an
74 # image with a faded background representing the original data, blended
75 # with green labels.
76 # from ..data.utils import overlayed_image
77 # display = overlayed_image(data["data"], data["label"])
78 # display.show()
79 # import ipdb; ipdb.set_trace()
80
81 return w / b
82
83 limit = None # use this to limit testing to first images only
84 subset = dataset.subsets("default")
85 proportions = [_check_sample(s, 0.67) for s in subset["train"][:limit]]
86 proportions = [_check_sample(s, 0.67) for s in subset["validation"][:limit]]
87 proportions = [_check_sample(s, 0.67) for s in subset["test"][:limit]]
88 del proportions # only to satisfy flake8
89
90
91@pytest.mark.skip_if_rc_var_not_set("bob.ip.binseg.montgomery.datadir")
92def test_check():
93 assert dataset.check() == 0