1#!/usr/bin/env python
2# coding=utf-8
3
4"""Padchest TB dataset for computer-aided diagnosis
5
6A large chest x-ray image dataset with multi-label annotated reports.
7This dataset includes more than 160,000 images from 67,000 patients that were
8interpreted and reported by radiologists at Hospital San Juan (Spain) from 2009
9to 2017, covering six different position views and additional information on
10image acquisition and patient demography.
11
12We keep only "PA" images here and only the "Tuberculosis" subset with an
13equivalent number of "normal" images.
14
15* Reference: [PADCHEST-2019]_
16* Original resolution: variable, original size
17* Labels: [PADCHEST-2019]_
18* Split reference: 64%/16%/20%
19* Protocol ``default``:
20
21 * Training samples: 160
22 * Validation samples: 40
23 * Test samples: 50
24
25* Protocol `ìdiap``:
26 * Images path adapted to Idiap infrastructure
27
28* Labels: DensenetRS predictions
29"""
30
31import os
32import pkg_resources
33
34import bob.extension
35
36from ..dataset import JSONDataset
37from ..loader import make_delayed
38
39_protocols = [
40 pkg_resources.resource_filename(__name__, "tb_idiap.json"),
41]
42
43
44def _raw_data_loader(sample):
45 return dict(
46 data=sample["data"],
47 label=sample["label"]
48 )
49
50
51def _loader(context, sample):
52 # "context" is ignored in this case - database is homogeneous
53 # we returned delayed samples to avoid loading all images at once
54 return make_delayed(sample, _raw_data_loader, key=sample["filename"])
55
56
57dataset = JSONDataset(
58 protocols=_protocols,
59 fieldnames=("filename", "label", "data"),
60 loader=_loader,
61)
62"""Padchest dataset object"""