Coverage for src/bob/bio/spear/database/voxforge.py: 56%
43 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 22:04 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 22:04 +0100
1#!/usr/bin/env python
2# @author: Yannick Dayer <yannick.dayer@idiap.ch>
3# @date: Wed 16 Jun 2021 17:21:47 UTC+02
5import csv
6import logging
8from pathlib import Path
10import click
12from clapper.click import verbosity_option
13from tqdm import tqdm
15from bob.bio.base.database import CSVDatabase
16from bob.bio.base.database.utils import download_file, search_and_open
17from bob.bio.spear.database.utils import create_sample_loader, get_rc
19logger = logging.getLogger(__name__)
22class VoxforgeDatabase(CSVDatabase):
23 """VoxForge database definition."""
25 name = "voxforge"
26 category = "spear"
27 dataset_protocols_name = "voxforge.tar.gz"
28 dataset_protocols_urls = [
29 "https://www.idiap.ch/software/bob/databases/latest/spear/voxforge-9d4ab3a3.tar.gz",
30 "http://www.idiap.ch/software/bob/databases/latest/spear/voxforge-9d4ab3a3.tar.gz",
31 ]
32 dataset_protocols_hash = "9d4ab3a3"
34 def __init__(self, protocol):
35 super().__init__(
36 name=self.name,
37 protocol=protocol,
38 transformer=create_sample_loader(
39 data_path=get_rc()[f"bob.db.{self.name}.directory"],
40 ),
41 )
44@click.command(
45 epilog="""Examples:
47\b
48 $ bob db download-voxforge ./data/
50\b
51 $ bob db download-voxforge --protocol-definition bio-spear-voxforge.tar ./data/
53""",
54)
55@click.option(
56 "--protocol-definition",
57 "-p",
58 default=None,
59 help=(
60 "A path to a the protocol definition file of VoxForge. "
61 "If omitted, will use the default protocol definition file at "
62 "`https://www.idiap.ch/software/bob/databases/latest/spear`."
63 ),
64)
65@click.option(
66 "--force-download",
67 "-f",
68 is_flag=True,
69 help="Download a file even if it already exists locally.",
70)
71@click.argument("destination")
72@verbosity_option(logger=logger, expose_value=False)
73def download_voxforge(protocol_definition, destination, force_download):
74 """Downloads a series of VoxForge data files from their repository and untar them.
76 The files will be downloaded and saved in the `destination` folder then extracted.
78 The list of URLs is provided in the protocol definition file of Voxforge.
79 """
81 destination = Path(destination)
82 destination.mkdir(exist_ok=True)
84 if protocol_definition is None:
85 protocol_definition = VoxforgeDatabase.retrieve_dataset_protocols()
87 # Use the `Default` protocol
88 protocol = "Default"
90 # Open the list file
91 list_file = f"{protocol}/data_files_urls.csv"
92 open_list_file = search_and_open(protocol_definition, list_file)
94 num_files = sum(1 for _ in open_list_file) - 1
95 open_list_file.seek(0, 0)
96 logger.info(f"{num_files} files are listed in {list_file}. Downloading...")
98 csv_list_file = csv.DictReader(open_list_file)
100 for row in tqdm(csv_list_file, total=num_files):
101 full_filename = destination / row["filename"]
102 if force_download or not full_filename.exists():
103 logger.debug(f"Downloading {row['filename']} from {row['url']}")
104 download_file(
105 urls=[row["url"]],
106 destination_directory=full_filename.parent,
107 destination_filename=full_filename.name,
108 )
109 logger.debug(f"Downloaded to {full_filename}")
111 logger.info(f"Download of {num_files} files completed.")
112 open_list_file.close()