Coverage for /scratch/builds/bob/bob.med.tb/miniconda/conda-bld/bob.med.tb_1637571489937/_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho/lib/python3.8/site-packages/bob/med/tb/scripts/dataset.py: 81%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

53 statements  

1#!/usr/bin/env python 

2# coding=utf-8 

3 

4import os 

5import pkg_resources 

6import importlib 

7import click 

8 

9from bob.extension import rc 

10from bob.extension.scripts.click_helper import ( 

11 verbosity_option, 

12 AliasedGroup, 

13) 

14 

15import logging 

16logger = logging.getLogger(__name__) 

17 

18def _get_supported_datasets(): 

19 """Returns a list of supported dataset names 

20 """ 

21 

22 basedir = pkg_resources.resource_filename(__name__, '') 

23 basedir = os.path.join(os.path.dirname(basedir), 'data') 

24 

25 retval = [] 

26 for k in os.listdir(basedir): 

27 candidate = os.path.join(basedir, k) 

28 if os.path.isdir(candidate) and '__init__.py' in os.listdir(candidate): 

29 retval.append(k) 

30 return retval 

31 

32def _get_installed_datasets(): 

33 """Returns a list of installed datasets as regular expressions 

34 

35 * group(0): the name of the key for the dataset directory 

36 * group("name"): the short name for the dataset 

37 

38 """ 

39 

40 import re 

41 dataset_re = re.compile(r'^bob\.med\.tb\.(?P<name>[^\.]+)\.datadir$') 

42 return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)] 

43 

44@click.group(cls=AliasedGroup) 

45def dataset(): 

46 """Commands for listing and verifying datasets""" 

47 pass 

48 

49 

50@dataset.command( 

51 epilog="""Examples: 

52 

53\b 

54 1. To install a dataset, set up its data directory ("datadir"). For 

55 example, to setup access to Montgomery files you downloaded locally at 

56 the directory "/path/to/montgomery/files", do the following: 

57\b 

58 $ bob config set "bob.med.tb.montgomery.datadir" "/path/to/montgomery/files" 

59 

60 Notice this setting **is** case-sensitive. 

61 

62 2. List all raw datasets supported (and configured): 

63 

64 $ bob tb dataset list 

65 

66""", 

67) 

68@verbosity_option() 

69def list(**kwargs): 

70 """Lists all supported and configured datasets""" 

71 

72 supported = _get_supported_datasets() 

73 installed = _get_installed_datasets() 

74 installed = dict((k.group("name"), k.group(0)) for k in installed) 

75 

76 click.echo("Supported datasets:") 

77 for k in supported: 

78 if k in installed: 

79 click.echo(f"- {k}: {installed[k]} = \"{rc.get(installed[k])}\"") 

80 else: 

81 click.echo(f"* {k}: bob.med.tb.{k}.datadir (not set)") 

82 

83 

84@dataset.command( 

85 epilog="""Examples: 

86 

87 1. Check if all files of the Montgomery dataset can be loaded: 

88 

89 $ bob tb dataset check -vv montgomery 

90 

91 2. Check if all files of multiple installed datasets can be loaded: 

92 

93 $ bob tb dataset check -vv montgomery shenzhen 

94 

95 3. Check if all files of all installed datasets can be loaded: 

96 

97 $ bob tb dataset check 

98""", 

99) 

100@click.argument( 

101 'dataset', 

102 nargs=-1, 

103 ) 

104@click.option( 

105 "--limit", 

106 "-l", 

107 help="Limit check to the first N samples in each dataset, making the " 

108 "check sensibly faster. Set it to zero to check everything.", 

109 required=True, 

110 type=click.IntRange(0), 

111 default=0, 

112) 

113@verbosity_option() 

114def check(dataset, limit, **kwargs): 

115 """Checks file access on one or more datasets""" 

116 

117 to_check = _get_installed_datasets() 

118 

119 if dataset: #check only some 

120 to_check = [k for k in to_check if k.group("name") in dataset] 

121 

122 if not to_check: 

123 click.echo("No configured datasets matching specifications") 

124 click.echo("Try bob tb dataset list --help to get help in " 

125 "configuring a dataset") 

126 else: 

127 errors = 0 

128 for k in to_check: 

129 click.echo(f"Checking \"{k.group('name')}\" dataset...") 

130 module = importlib.import_module(f"...data.{k.group('name')}", 

131 __name__) 

132 errors += module.dataset.check(limit) 

133 if not errors: 

134 click.echo(f"No errors reported")