Coverage for src/bob/bio/spear/database/voxforge.py: 56%

43 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 22:04 +0100

1#!/usr/bin/env python 

2# @author: Yannick Dayer <yannick.dayer@idiap.ch> 

3# @date: Wed 16 Jun 2021 17:21:47 UTC+02 

4 

5import csv 

6import logging 

7 

8from pathlib import Path 

9 

10import click 

11 

12from clapper.click import verbosity_option 

13from tqdm import tqdm 

14 

15from bob.bio.base.database import CSVDatabase 

16from bob.bio.base.database.utils import download_file, search_and_open 

17from bob.bio.spear.database.utils import create_sample_loader, get_rc 

18 

19logger = logging.getLogger(__name__) 

20 

21 

22class VoxforgeDatabase(CSVDatabase): 

23 """VoxForge database definition.""" 

24 

25 name = "voxforge" 

26 category = "spear" 

27 dataset_protocols_name = "voxforge.tar.gz" 

28 dataset_protocols_urls = [ 

29 "https://www.idiap.ch/software/bob/databases/latest/spear/voxforge-9d4ab3a3.tar.gz", 

30 "http://www.idiap.ch/software/bob/databases/latest/spear/voxforge-9d4ab3a3.tar.gz", 

31 ] 

32 dataset_protocols_hash = "9d4ab3a3" 

33 

34 def __init__(self, protocol): 

35 super().__init__( 

36 name=self.name, 

37 protocol=protocol, 

38 transformer=create_sample_loader( 

39 data_path=get_rc()[f"bob.db.{self.name}.directory"], 

40 ), 

41 ) 

42 

43 

44@click.command( 

45 epilog="""Examples: 

46 

47\b 

48 $ bob db download-voxforge ./data/ 

49 

50\b 

51 $ bob db download-voxforge --protocol-definition bio-spear-voxforge.tar ./data/ 

52 

53""", 

54) 

55@click.option( 

56 "--protocol-definition", 

57 "-p", 

58 default=None, 

59 help=( 

60 "A path to a the protocol definition file of VoxForge. " 

61 "If omitted, will use the default protocol definition file at " 

62 "`https://www.idiap.ch/software/bob/databases/latest/spear`." 

63 ), 

64) 

65@click.option( 

66 "--force-download", 

67 "-f", 

68 is_flag=True, 

69 help="Download a file even if it already exists locally.", 

70) 

71@click.argument("destination") 

72@verbosity_option(logger=logger, expose_value=False) 

73def download_voxforge(protocol_definition, destination, force_download): 

74 """Downloads a series of VoxForge data files from their repository and untar them. 

75 

76 The files will be downloaded and saved in the `destination` folder then extracted. 

77 

78 The list of URLs is provided in the protocol definition file of Voxforge. 

79 """ 

80 

81 destination = Path(destination) 

82 destination.mkdir(exist_ok=True) 

83 

84 if protocol_definition is None: 

85 protocol_definition = VoxforgeDatabase.retrieve_dataset_protocols() 

86 

87 # Use the `Default` protocol 

88 protocol = "Default" 

89 

90 # Open the list file 

91 list_file = f"{protocol}/data_files_urls.csv" 

92 open_list_file = search_and_open(protocol_definition, list_file) 

93 

94 num_files = sum(1 for _ in open_list_file) - 1 

95 open_list_file.seek(0, 0) 

96 logger.info(f"{num_files} files are listed in {list_file}. Downloading...") 

97 

98 csv_list_file = csv.DictReader(open_list_file) 

99 

100 for row in tqdm(csv_list_file, total=num_files): 

101 full_filename = destination / row["filename"] 

102 if force_download or not full_filename.exists(): 

103 logger.debug(f"Downloading {row['filename']} from {row['url']}") 

104 download_file( 

105 urls=[row["url"]], 

106 destination_directory=full_filename.parent, 

107 destination_filename=full_filename.name, 

108 ) 

109 logger.debug(f"Downloaded to {full_filename}") 

110 

111 logger.info(f"Download of {num_files} files completed.") 

112 open_list_file.close()