Coverage for src/deepdraw/script/dataset.py: 91%

56 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-11-30 15:00 +0100

1# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> 

2# 

3# SPDX-License-Identifier: GPL-3.0-or-later 

4 

5from __future__ import annotations 

6 

7import importlib.resources 

8import os 

9 

10import click 

11 

12from clapper.click import AliasedGroup, verbosity_option 

13from clapper.logging import setup 

14 

15logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") 

16 

17 

18def _get_supported_datasets(): 

19 """Returns a list of supported dataset names.""" 

20 basedir = importlib.resources.files(__name__.split(".", 1)[0]).joinpath( 

21 "data/" 

22 ) 

23 

24 retval = [] 

25 for candidate in basedir.iterdir(): 

26 if candidate.is_dir() and "__init__.py" in os.listdir(str(candidate)): 

27 retval.append(candidate.name) 

28 

29 return set(retval) 

30 

31 

32def _get_installed_datasets() -> dict[str, str]: 

33 """Returns a list of installed datasets as regular expressions. 

34 

35 * group(0): the name of the key for the dataset directory 

36 * group("name"): the short name for the dataset 

37 """ 

38 from deepdraw.utils.rc import load_rc 

39 

40 return dict(load_rc().get("datadir", {})) 

41 

42 

43@click.group(cls=AliasedGroup) 

44def dataset() -> None: 

45 """Commands for listing and verifying datasets.""" 

46 pass 

47 

48 

49@dataset.command( 

50 epilog="""Examples: 

51 

52\b 

53 1. To install a dataset, set up its data directory ("datadir"). For 

54 example, to setup access to Montgomery files you downloaded locally at 

55 the directory "/path/to/montgomery/files", edit the RC file (typically 

56 ``$HOME/.config/deepdraw.toml``), and add a line like the following: 

57 

58 .. code:: toml 

59 

60 [datadir] 

61 montgomery = "/path/to/montgomery/files" 

62 

63 .. note:: 

64 

65 This setting **is** case-sensitive. 

66 

67\b 

68 2. List all raw datasets supported (and configured): 

69 

70 .. code:: sh 

71 

72 $ deepdraw dataset list 

73""", 

74) 

75@verbosity_option(logger=logger, expose_value=False) 

76def list(): 

77 """Lists all supported and configured datasets.""" 

78 supported = _get_supported_datasets() 

79 installed = _get_installed_datasets() 

80 

81 click.echo("Supported datasets:") 

82 for k in sorted(supported): 

83 if k in installed: 

84 click.echo(f'- {k}: "{installed[k]}"') 

85 else: 

86 click.echo(f"* {k}: datadir.{k} (not set)") 

87 

88 

89@dataset.command( 

90 epilog="""Examples: 

91 

92\b 

93 1. Check if all files of the Montgomery dataset can be loaded: 

94 

95 .. code:: sh 

96 

97 deepdraw dataset check -vv montgomery 

98 

99\b 

100 2. Check if all files of multiple installed datasets can be loaded: 

101 

102 .. code:: sh 

103 

104 deepdraw dataset check -vv montgomery shenzhen 

105 

106\b 

107 3. Check if all files of all installed datasets can be loaded: 

108 

109 .. code:: sh 

110 

111 deepdraw dataset check 

112 

113""", 

114) 

115@click.argument( 

116 "dataset", 

117 nargs=-1, 

118) 

119@click.option( 

120 "--limit", 

121 "-l", 

122 help="Limit check to the first N samples in each dataset, making the " 

123 "check sensibly faster. Set it to zero to check everything.", 

124 required=True, 

125 type=click.IntRange(0), 

126 default=0, 

127) 

128@verbosity_option(logger=logger, expose_value=False) 

129def check(dataset, limit): 

130 """Checks file access on one or more datasets.""" 

131 import importlib 

132 

133 to_check = _get_installed_datasets() 

134 supported = _get_supported_datasets() 

135 dataset = set(dataset) 

136 

137 if dataset: 

138 assert supported.issuperset( 

139 dataset 

140 ), f"Unsupported datasets: {dataset-supported}" 

141 else: 

142 dataset = supported 

143 

144 if dataset: 

145 delete = [k for k in to_check.keys() if k not in dataset] 

146 for k in delete: 

147 del to_check[k] 

148 

149 if not to_check: 

150 click.secho( 

151 "WARNING: No configured datasets matching specifications", 

152 fg="yellow", 

153 bold=True, 

154 ) 

155 click.echo( 

156 "Try deepdraw dataset list --help to get help in " 

157 "configuring a dataset" 

158 ) 

159 else: 

160 errors = 0 

161 for k in to_check.keys(): 

162 click.echo(f'Checking "{k}" dataset...') 

163 module = importlib.import_module(f"...data.{k}", __name__) 

164 errors += module.dataset.check(limit) 

165 if not errors: 

166 click.echo("No errors reported")