1#!/usr/bin/env python
2# coding=utf-8
3
4"""Shenzhen dataset for computer-aided diagnosis
5(extended with DensenetRS predictions)
6
7The standard digital image database for Tuberculosis is created by the
8National Library of Medicine, Maryland, USA in collaboration with Shenzhen
9No.3 People’s Hospital, Guangdong Medical College, Shenzhen, China.
10The Chest X-rays are from out-patient clinics, and were captured as part of
11the daily routine using Philips DR Digital Diagnose systems.
12
13* Reference: [MONTGOMERY-SHENZHEN-2014]_
14* Original resolution (height x width or width x height): 3000 x 3000 or less
15* Split reference: none
16* Protocol ``default``:
17
18 * Training samples: 64% of TB and healthy CXR (including labels)
19 * Validation samples: 16% of TB and healthy CXR (including labels)
20 * Test samples: 20% of TB and healthy CXR (including labels)
21
22"""
23
24import os
25import pkg_resources
26
27import bob.extension
28
29from ..dataset import JSONDataset
30from ..loader import make_delayed
31
32_protocols = [
33 pkg_resources.resource_filename(__name__, "default.json"),
34 pkg_resources.resource_filename(__name__, "fold_0.json"),
35 pkg_resources.resource_filename(__name__, "fold_1.json"),
36 pkg_resources.resource_filename(__name__, "fold_2.json"),
37 pkg_resources.resource_filename(__name__, "fold_3.json"),
38 pkg_resources.resource_filename(__name__, "fold_4.json"),
39 pkg_resources.resource_filename(__name__, "fold_5.json"),
40 pkg_resources.resource_filename(__name__, "fold_6.json"),
41 pkg_resources.resource_filename(__name__, "fold_7.json"),
42 pkg_resources.resource_filename(__name__, "fold_8.json"),
43 pkg_resources.resource_filename(__name__, "fold_9.json"),
44]
45
46def _raw_data_loader(sample):
47 return dict(
48 data=sample["data"],
49 label=sample["label"]
50 )
51
52
53def _loader(context, sample):
54 # "context" is ignored in this case - database is homogeneous
55 # we returned delayed samples to avoid loading all images at once
56 return make_delayed(sample, _raw_data_loader, key=sample["filename"])
57
58
59dataset = JSONDataset(
60 protocols=_protocols,
61 fieldnames=("filename", "label", "data"),
62 loader=_loader,
63)
64"""Extended Shenzhen dataset object"""