Coverage for src/deepdraw/script/dataset.py: 91%
56 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
« prev ^ index » next coverage.py v7.3.1, created at 2023-11-30 15:00 +0100
1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
2#
3# SPDX-License-Identifier: GPL-3.0-or-later
5from __future__ import annotations
7import importlib.resources
8import os
10import click
12from clapper.click import AliasedGroup, verbosity_option
13from clapper.logging import setup
15logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
18def _get_supported_datasets():
19 """Returns a list of supported dataset names."""
20 basedir = importlib.resources.files(__name__.split(".", 1)[0]).joinpath(
21 "data/"
22 )
24 retval = []
25 for candidate in basedir.iterdir():
26 if candidate.is_dir() and "__init__.py" in os.listdir(str(candidate)):
27 retval.append(candidate.name)
29 return set(retval)
32def _get_installed_datasets() -> dict[str, str]:
33 """Returns a list of installed datasets as regular expressions.
35 * group(0): the name of the key for the dataset directory
36 * group("name"): the short name for the dataset
37 """
38 from deepdraw.utils.rc import load_rc
40 return dict(load_rc().get("datadir", {}))
43@click.group(cls=AliasedGroup)
44def dataset() -> None:
45 """Commands for listing and verifying datasets."""
46 pass
49@dataset.command(
50 epilog="""Examples:
52\b
53 1. To install a dataset, set up its data directory ("datadir"). For
54 example, to setup access to Montgomery files you downloaded locally at
55 the directory "/path/to/montgomery/files", edit the RC file (typically
56 ``$HOME/.config/deepdraw.toml``), and add a line like the following:
58 .. code:: toml
60 [datadir]
61 montgomery = "/path/to/montgomery/files"
63 .. note::
65 This setting **is** case-sensitive.
67\b
68 2. List all raw datasets supported (and configured):
70 .. code:: sh
72 $ deepdraw dataset list
73""",
74)
75@verbosity_option(logger=logger, expose_value=False)
76def list():
77 """Lists all supported and configured datasets."""
78 supported = _get_supported_datasets()
79 installed = _get_installed_datasets()
81 click.echo("Supported datasets:")
82 for k in sorted(supported):
83 if k in installed:
84 click.echo(f'- {k}: "{installed[k]}"')
85 else:
86 click.echo(f"* {k}: datadir.{k} (not set)")
89@dataset.command(
90 epilog="""Examples:
92\b
93 1. Check if all files of the Montgomery dataset can be loaded:
95 .. code:: sh
97 deepdraw dataset check -vv montgomery
99\b
100 2. Check if all files of multiple installed datasets can be loaded:
102 .. code:: sh
104 deepdraw dataset check -vv montgomery shenzhen
106\b
107 3. Check if all files of all installed datasets can be loaded:
109 .. code:: sh
111 deepdraw dataset check
113""",
114)
115@click.argument(
116 "dataset",
117 nargs=-1,
118)
119@click.option(
120 "--limit",
121 "-l",
122 help="Limit check to the first N samples in each dataset, making the "
123 "check sensibly faster. Set it to zero to check everything.",
124 required=True,
125 type=click.IntRange(0),
126 default=0,
127)
128@verbosity_option(logger=logger, expose_value=False)
129def check(dataset, limit):
130 """Checks file access on one or more datasets."""
131 import importlib
133 to_check = _get_installed_datasets()
134 supported = _get_supported_datasets()
135 dataset = set(dataset)
137 if dataset:
138 assert supported.issuperset(
139 dataset
140 ), f"Unsupported datasets: {dataset-supported}"
141 else:
142 dataset = supported
144 if dataset:
145 delete = [k for k in to_check.keys() if k not in dataset]
146 for k in delete:
147 del to_check[k]
149 if not to_check:
150 click.secho(
151 "WARNING: No configured datasets matching specifications",
152 fg="yellow",
153 bold=True,
154 )
155 click.echo(
156 "Try deepdraw dataset list --help to get help in "
157 "configuring a dataset"
158 )
159 else:
160 errors = 0
161 for k in to_check.keys():
162 click.echo(f'Checking "{k}" dataset...')
163 module = importlib.import_module(f"...data.{k}", __name__)
164 errors += module.dataset.check(limit)
165 if not errors:
166 click.echo("No errors reported")