Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# coding=utf-8
5"""Tests for Shenzhen CXR dataset"""
7import numpy
8import pytest
10from ..data.shenzhen import dataset
11from .utils import count_bw
14def test_protocol_consistency():
16 subset = dataset.subsets("default")
17 assert len(subset) == 3
19 assert "train" in subset
20 assert len(subset["train"]) == 396
21 for s in subset["train"]:
22 assert s.key.startswith("ChinaSet_AllFiles")
24 assert "validation" in subset
25 assert len(subset["validation"]) == 56
26 for s in subset["validation"]:
27 assert s.key.startswith("ChinaSet_AllFiles")
29 assert "test" in subset
30 assert len(subset["test"]) == 114
31 for s in subset["test"]:
32 assert s.key.startswith("ChinaSet_AllFiles")
35@pytest.mark.skip_if_rc_var_not_set("bob.ip.binseg.shenzhen.datadir")
36def test_loading():
38 min_image_size = (1130, 948)
39 max_image_size = (3001, 3001)
41 def _check_sample(s, bw_threshold_label):
43 data = s.data
44 assert isinstance(data, dict)
45 assert len(data) == 2
47 assert "data" in data
48 assert data["data"].mode == "RGB"
50 assert "label" in data
51 assert data["label"].mode == "1"
53 b, w = count_bw(data["label"])
54 assert (b + w) >= numpy.prod(min_image_size), (
55 f"Counts of black + white ({b}+{w}) lower than smallest image total"
56 f"image size ({numpy.prod(min_image_size)}) at '{s.key}':label"
57 )
58 assert (b + w) <= numpy.prod(max_image_size), (
59 f"Counts of black + white ({b}+{w}) higher than largest image total"
60 f"image size ({numpy.prod(max_image_size)}) at '{s.key}':label"
61 )
62 assert (w / b) < bw_threshold_label, (
63 f"The proportion between black and white pixels "
64 f"({w}/{b}={w/b:.3f}) is larger than the allowed threshold "
65 f"of {bw_threshold_label} at '{s.key}':label - this could "
66 f"indicate a loading problem!"
67 )
69 return w / b
71 limit = None # use this to limit testing to first images only
72 subset = dataset.subsets("default")
73 proportions = [_check_sample(s, 0.77) for s in subset["train"][:limit]]
74 proportions = [_check_sample(s, 0.77) for s in subset["validation"][:limit]]
75 proportions = [_check_sample(s, 0.77) for s in subset["test"][:limit]]
76 del proportions # only to satisfy flake8
79@pytest.mark.skip_if_rc_var_not_set("bob.ip.binseg.shenzhen.datadir")
80def test_check():
81 assert dataset.check() == 0