1#!/usr/bin/env python
2# coding=utf-8
3
4"""Shenzhen dataset for computer-aided diagnosis
5
6The standard digital image database for Tuberculosis is created by the
7National Library of Medicine, Maryland, USA in collaboration with Shenzhen
8No.3 People’s Hospital, Guangdong Medical College, Shenzhen, China.
9The Chest X-rays are from out-patient clinics, and were captured as part of
10the daily routine using Philips DR Digital Diagnose systems.
11
12* Reference: [MONTGOMERY-SHENZHEN-2014]_
13* Original resolution (height x width or width x height): 3000 x 3000 or less
14* Split reference: none
15* Protocol ``default``:
16
17 * Training samples: 64% of TB and healthy CXR (including labels)
18 * Validation samples: 16% of TB and healthy CXR (including labels)
19 * Test samples: 20% of TB and healthy CXR (including labels)
20
21"""
22
23import os
24import pkg_resources
25
26import bob.extension
27
28from ..dataset import JSONDataset
29from ..loader import load_pil_baw, make_delayed
30
31_protocols = [
32 pkg_resources.resource_filename(__name__, "default.json"),
33 pkg_resources.resource_filename(__name__, "fold_0.json"),
34 pkg_resources.resource_filename(__name__, "fold_1.json"),
35 pkg_resources.resource_filename(__name__, "fold_2.json"),
36 pkg_resources.resource_filename(__name__, "fold_3.json"),
37 pkg_resources.resource_filename(__name__, "fold_4.json"),
38 pkg_resources.resource_filename(__name__, "fold_5.json"),
39 pkg_resources.resource_filename(__name__, "fold_6.json"),
40 pkg_resources.resource_filename(__name__, "fold_7.json"),
41 pkg_resources.resource_filename(__name__, "fold_8.json"),
42 pkg_resources.resource_filename(__name__, "fold_9.json"),
43]
44
45def _raw_data_loader(sample):
46 return dict(
47 data=load_pil_baw(os.path.join(bob.extension.rc.get(
48 "bob.med.tb.shenzhen.datadir", os.path.realpath(os.curdir)
49 ), sample["data"])),
50 label=sample["label"],
51 )
52
53
54def _loader(context, sample):
55 # "context" is ignored in this case - database is homogeneous
56 # we returned delayed samples to avoid loading all images at once
57 return make_delayed(sample, _raw_data_loader)
58
59
60dataset = JSONDataset(
61 protocols=_protocols,
62 fieldnames=("data", "label"),
63 loader=_loader,
64)
65"""Shenzhen dataset object"""