Coverage for src/deepdraw/data/shenzhen/__init__.py: 93%

14 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-11-30 15:00 +0100

1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> 

2# 

3# SPDX-License-Identifier: GPL-3.0-or-later 

4 

5"""Shenzhen No.3 People’s Hospital dataset for Lung Segmentation. 

6 

7The database includes 336 cases with manifestation of tuberculosis, and 326 

8normal cases. It contains a total of 662 images. Image size varies for each 

9X-ray. It is approximately 3K x 3K. One set of ground-truth lung annotations is 

10available for 566 of the 662 images. 

11 

12* Reference: [SHENZHEN-2014]_ 

13* Original resolution (height x width): Approximately 3K x 3K (varies) 

14* Configuration resolution: 512 x 512 (after rescaling) 

15* Split reference: [GAAL-2020]_ 

16* Protocol ``default``: 

17 

18 * Training samples: 396 (including labels) 

19 * Validation samples: 56 (including labels) 

20 * Test samples: 114 (including labels) 

21""" 

22 

23import os 

24 

25import pkg_resources 

26 

27from ...data.dataset import JSONDataset 

28from ...utils.rc import load_rc 

29from ..loader import load_pil_1, load_pil_rgb, make_delayed 

30 

31_protocols = [ 

32 pkg_resources.resource_filename(__name__, "default.json"), 

33] 

34 

35_root_path = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir)) 

36 

37 

38def _raw_data_loader(sample): 

39 return dict( 

40 data=load_pil_rgb(os.path.join(_root_path, sample["data"])), 

41 label=load_pil_1(os.path.join(_root_path, sample["label"])), 

42 ) 

43 

44 

45def _loader(context, sample): 

46 # "context" is ignored in this case - database is homogeneous 

47 # we returned delayed samples to avoid loading all images at once 

48 return make_delayed(sample, _raw_data_loader) 

49 

50 

51dataset = JSONDataset( 

52 protocols=_protocols, fieldnames=("data", "label"), loader=_loader 

53) 

54 

55"""Shenzhen CXR dataset object"""