Coverage for src/deepdraw/data/shenzhen/__init__.py: 93%
14 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
2#
3# SPDX-License-Identifier: GPL-3.0-or-later
5"""Shenzhen No.3 People’s Hospital dataset for Lung Segmentation.
7The database includes 336 cases with manifestation of tuberculosis, and 326
8normal cases. It contains a total of 662 images. Image size varies for each
9X-ray. It is approximately 3K x 3K. One set of ground-truth lung annotations is
10available for 566 of the 662 images.
12* Reference: [SHENZHEN-2014]_
13* Original resolution (height x width): Approximately 3K x 3K (varies)
14* Configuration resolution: 512 x 512 (after rescaling)
15* Split reference: [GAAL-2020]_
16* Protocol ``default``:
18 * Training samples: 396 (including labels)
19 * Validation samples: 56 (including labels)
20 * Test samples: 114 (including labels)
21"""
23import os
25import pkg_resources
27from ...data.dataset import JSONDataset
28from ...utils.rc import load_rc
29from ..loader import load_pil_1, load_pil_rgb, make_delayed
31_protocols = [
32 pkg_resources.resource_filename(__name__, "default.json"),
33]
35_root_path = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir))
38def _raw_data_loader(sample):
39 return dict(
40 data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
41 label=load_pil_1(os.path.join(_root_path, sample["label"])),
42 )
45def _loader(context, sample):
46 # "context" is ignored in this case - database is homogeneous
47 # we returned delayed samples to avoid loading all images at once
48 return make_delayed(sample, _raw_data_loader)
51dataset = JSONDataset(
52 protocols=_protocols, fieldnames=("data", "label"), loader=_loader
53)
55"""Shenzhen CXR dataset object"""