Coverage for /scratch/builds/bob/bob.med.tb/miniconda/conda-bld/bob.med.tb_1637571489937/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho/lib/python3.8/site-packages/bob/med/tb/data/nih_cxr14_re/__init__.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

13 statements  

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4"""NIH CXR14 (relabeled) dataset for computer-aided diagnosis 

5 

6This dataset was extracted from the clinical PACS database at the National  

7Institutes of Health Clinical Center (USA) and represents 60% of all  

8their radiographs. It contains labels for fourteen common radiological  

9signs in this order: cardiomegaly, emphysema, effusion, hernia, infiltration, 

10mass, nodule, atelectasis, pneumothorax, pleural thickening, pneumonia,  

11fibrosis, edema and consolidation.  

12This is the relabeled version created in the CheXNeXt study. 

13 

14* Reference: [NIH-CXR14-2017]_ 

15* Original resolution (height x width or width x height): 1024 x 1024 

16* Labels: [CHEXNEXT-2018]_ 

17* Split reference: [CHEXNEXT-2018]_ 

18* Protocol ``default``: 

19 

20 * Training samples: 98'637 (including labels) 

21 * Validation samples: 6'350 (including labels) 

22 * Test samples: 0 

23 

24* Protocol `ìdiap``: 

25 * Images path adapted to Idiap infrastructure 

26 

27""" 

28 

29import os 

30import pkg_resources 

31 

32import bob.extension 

33 

34from ..dataset import JSONDataset 

35from ..loader import load_pil_rgb, make_delayed 

36 

37_protocols = [ 

38 pkg_resources.resource_filename(__name__, "default.json"), 

39 pkg_resources.resource_filename(__name__, "idiap.json"), 

40 pkg_resources.resource_filename(__name__, "cardiomegaly_idiap.json"), 

41] 

42 

43 

44def _raw_data_loader(sample): 

45 return dict( 

46 data=load_pil_rgb( 

47 os.path.join( 

48 bob.extension.rc.get( 

49 "bob.med.tb.nih_cxr14_re.datadir", os.path.realpath(os.curdir) 

50 ), 

51 sample["data"], 

52 ) 

53 ), 

54 label=sample["label"], 

55 ) 

56 

57 

58def _loader(context, sample): 

59 # "context" is ignored in this case - database is homogeneous 

60 # we returned delayed samples to avoid loading all images at once 

61 return make_delayed(sample, _raw_data_loader) 

62 

63 

64dataset = JSONDataset( 

65 protocols=_protocols, fieldnames=("data", "label"), loader=_loader, 

66) 

67"""NIH CXR14 (relabeled) dataset object"""