Source code for bob.extension.download

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>

import os
import logging
logger = logging.getLogger(__name__)


def _unzip(zip_file, directory):
    import zipfile

    with zipfile.ZipFile(zip_file) as myzip:
        myzip.extractall(directory)


def _untar(tar_file, directory, ext):

    if ext in [".bz2" or ".tbz2"]:
        mode = "r:bz2"
    elif ext in [".gz" or ".tgz"]:
        mode = "r:gz"
    else:
        mode = "r"

    import tarfile
    with tarfile.open(name=tar_file, mode=mode) as t:
        t.extractall(directory)


def _unbz2(bz2_file):
    import bz2
    with bz2.BZ2File(bz2_file) as t:
        open(os.path.splitext(bz2_file)[0], 'wb').write(t.read())


[docs]def download_file(url, out_file): """Downloads a file from a given url Parameters ---------- url : str The url to download form. out_file : str Where to save the file. """ import sys if sys.version_info[0] < 3: # python2 technique for downloading a file from urllib2 import urlopen with open(out_file, 'wb') as f: response = urlopen(url) f.write(response.read()) else: # python3 technique for downloading a file from urllib.request import urlopen from shutil import copyfileobj with urlopen(url) as response: with open(out_file, 'wb') as f: copyfileobj(response, f)
[docs]def download_and_unzip(urls, filename): """ Download a file from a given URL list, save it somewhere and unzip/untar if necessary Example: download_and_unzip(["https://mytesturl.co/my_file_example.tag.bz2"], filename="~/my_file_example.tag.bz2") Parameters ---------- urls: list List containing the all the URLs. The function will try to download them in order filename: str File name (full path) where the downloaded file will be written and uncompressed """ # Just testing if string and wrap it in a list if it's the case if isinstance(urls, str): urls = [urls] for url in urls: try: logger.info( "Downloading from " "{} ...".format(url)) download_file(url, filename) break except Exception: logger.warning( "Could not download from the %s url", url, exc_info=True) else: # else is for the for loop if not os.path.isfile(filename): raise RuntimeError("Could not download the file.") # Uncompressing if it is the case ext = os.path.splitext(filename)[-1].lower() header = os.path.splitext(filename)[0].lower() if ext == ".zip": logger.info("Unziping in {0}".format(filename)) _unzip(filename, os.path.dirname(filename)) elif header[-4:] == ".tar" or ext in [".tgz", ".tbz2"]: logger.info("Untar/gzip in {0}".format(filename)) _untar(filename, os.path.dirname(filename), ext) elif ext == ".bz2": logger.info("Unbz2 in {0}".format(filename)) _unbz2(filename)