1#!/usr/bin/env python
2# coding=utf-8
3
4import os
5import pkg_resources
6import importlib
7import click
8
9from bob.extension import rc
10from bob.extension.scripts.click_helper import (
11 verbosity_option,
12 AliasedGroup,
13)
14
15import logging
16logger = logging.getLogger(__name__)
17
18def _get_supported_datasets():
19 """Returns a list of supported dataset names
20 """
21
22 basedir = pkg_resources.resource_filename(__name__, '')
23 basedir = os.path.join(os.path.dirname(basedir), 'data')
24
25 retval = []
26 for k in os.listdir(basedir):
27 candidate = os.path.join(basedir, k)
28 if os.path.isdir(candidate) and '__init__.py' in os.listdir(candidate):
29 retval.append(k)
30 return retval
31
32def _get_installed_datasets():
33 """Returns a list of installed datasets as regular expressions
34
35 * group(0): the name of the key for the dataset directory
36 * group("name"): the short name for the dataset
37
38 """
39
40 import re
41 dataset_re = re.compile(r'^bob\.med\.tb\.(?P<name>[^\.]+)\.datadir$')
42 return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)]
43
44@click.group(cls=AliasedGroup)
45def dataset():
46 """Commands for listing and verifying datasets"""
47 pass
48
49
50@dataset.command(
51 epilog="""Examples:
52
53\b
54 1. To install a dataset, set up its data directory ("datadir"). For
55 example, to setup access to Montgomery files you downloaded locally at
56 the directory "/path/to/montgomery/files", do the following:
57\b
58 $ bob config set "bob.med.tb.montgomery.datadir" "/path/to/montgomery/files"
59
60 Notice this setting **is** case-sensitive.
61
62 2. List all raw datasets supported (and configured):
63
64 $ bob tb dataset list
65
66""",
67)
68@verbosity_option()
69def list(**kwargs):
70 """Lists all supported and configured datasets"""
71
72 supported = _get_supported_datasets()
73 installed = _get_installed_datasets()
74 installed = dict((k.group("name"), k.group(0)) for k in installed)
75
76 click.echo("Supported datasets:")
77 for k in supported:
78 if k in installed:
79 click.echo(f"- {k}: {installed[k]} = \"{rc.get(installed[k])}\"")
80 else:
81 click.echo(f"* {k}: bob.med.tb.{k}.datadir (not set)")
82
83
84@dataset.command(
85 epilog="""Examples:
86
87 1. Check if all files of the Montgomery dataset can be loaded:
88
89 $ bob tb dataset check -vv montgomery
90
91 2. Check if all files of multiple installed datasets can be loaded:
92
93 $ bob tb dataset check -vv montgomery shenzhen
94
95 3. Check if all files of all installed datasets can be loaded:
96
97 $ bob tb dataset check
98""",
99)
100@click.argument(
101 'dataset',
102 nargs=-1,
103 )
104@click.option(
105 "--limit",
106 "-l",
107 help="Limit check to the first N samples in each dataset, making the "
108 "check sensibly faster. Set it to zero to check everything.",
109 required=True,
110 type=click.IntRange(0),
111 default=0,
112)
113@verbosity_option()
114def check(dataset, limit, **kwargs):
115 """Checks file access on one or more datasets"""
116
117 to_check = _get_installed_datasets()
118
119 if dataset: #check only some
120 to_check = [k for k in to_check if k.group("name") in dataset]
121
122 if not to_check:
123 click.echo("No configured datasets matching specifications")
124 click.echo("Try bob tb dataset list --help to get help in "
125 "configuring a dataset")
126 else:
127 errors = 0
128 for k in to_check:
129 click.echo(f"Checking \"{k.group('name')}\" dataset...")
130 module = importlib.import_module(f"...data.{k.group('name')}",
131 __name__)
132 errors += module.dataset.check(limit)
133 if not errors:
134 click.echo(f"No errors reported")