1#!/usr/bin/env python
2# coding=utf-8
3
4"""HIV-TB dataset for computer-aided diagnosis (only BMP files)
5
6* Reference: [HIV-TB-2019]_
7* Original resolution (height x width or width x height): 2048 x 2500
8* Split reference: none
9* Stratified kfold protocol:
10
11 * Training samples: 72% of TB and healthy CXR (including labels)
12 * Validation samples: 18% of TB and healthy CXR (including labels)
13 * Test samples: 10% of TB and healthy CXR (including labels)
14
15"""
16
17import os
18import pkg_resources
19
20import bob.extension
21
22from ..dataset import JSONDataset
23from ..loader import load_pil_baw, make_delayed
24
25_protocols = [
26 pkg_resources.resource_filename(__name__, "fold_0.json"),
27 pkg_resources.resource_filename(__name__, "fold_1.json"),
28 pkg_resources.resource_filename(__name__, "fold_2.json"),
29 pkg_resources.resource_filename(__name__, "fold_3.json"),
30 pkg_resources.resource_filename(__name__, "fold_4.json"),
31 pkg_resources.resource_filename(__name__, "fold_5.json"),
32 pkg_resources.resource_filename(__name__, "fold_6.json"),
33 pkg_resources.resource_filename(__name__, "fold_7.json"),
34 pkg_resources.resource_filename(__name__, "fold_8.json"),
35 pkg_resources.resource_filename(__name__, "fold_9.json"),
36]
37
38def _raw_data_loader(sample):
39 return dict(
40 data=load_pil_baw(os.path.join(
41 bob.extension.rc.get(
42 "bob.med.tb.hivtb.datadir", os.path.realpath(os.curdir)
43 ), sample["data"])),
44 label=sample["label"],
45 )
46
47
48def _loader(context, sample):
49 # "context" is ignored in this case - database is homogeneous
50 # we returned delayed samples to avoid loading all images at once
51 return make_delayed(sample, _raw_data_loader)
52
53
54dataset = JSONDataset(
55 protocols=_protocols,
56 fieldnames=("data", "label"),
57 loader=_loader,
58)
59"""HIV-TB dataset object"""