1#!/usr/bin/env python
2# coding=utf-8
3
4"""TB-POC dataset for computer-aided diagnosis
5
6* Reference: [TB-POC-2018]_
7* Original resolution (height x width or width x height): 2048 x 2500
8* Split reference: none
9* Stratified kfold protocol:
10
11 * Training samples: 72% of TB and healthy CXR (including labels)
12 * Validation samples: 18% of TB and healthy CXR (including labels)
13 * Test samples: 10% of TB and healthy CXR (including labels)
14
15"""
16
17import os
18import pkg_resources
19
20import bob.extension
21
22from ..dataset import JSONDataset
23from ..loader import load_pil_baw, make_delayed
24
25_protocols = [
26 pkg_resources.resource_filename(__name__, "fold_0.json"),
27 pkg_resources.resource_filename(__name__, "fold_1.json"),
28 pkg_resources.resource_filename(__name__, "fold_2.json"),
29 pkg_resources.resource_filename(__name__, "fold_3.json"),
30 pkg_resources.resource_filename(__name__, "fold_4.json"),
31 pkg_resources.resource_filename(__name__, "fold_5.json"),
32 pkg_resources.resource_filename(__name__, "fold_6.json"),
33 pkg_resources.resource_filename(__name__, "fold_7.json"),
34 pkg_resources.resource_filename(__name__, "fold_8.json"),
35 pkg_resources.resource_filename(__name__, "fold_9.json"),
36]
37
38def _raw_data_loader(sample):
39 return dict(
40 data=sample["data"],
41 label=sample["label"]
42 )
43
44
45def _loader(context, sample):
46 # "context" is ignored in this case - database is homogeneous
47 # we returned delayed samples to avoid loading all images at once
48 return make_delayed(sample, _raw_data_loader, key=sample["filename"])
49
50
51dataset = JSONDataset(
52 protocols=_protocols,
53 fieldnames=("filename", "label", "data"),
54 loader=_loader,
55)
56"""Extended TB-POC dataset object"""