Source code for bob.bio.spear.extractor.cepstral_extended

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Pavel Korshunov <pavel.korshunov@idiap.ch>
# Fri 6 Nov 17:13:22 CEST 2015
#
# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from __future__ import print_function

import numpy
import bob
import bob.core

import logging
logger = logging.getLogger("bob.bio.spear")
logger.setLevel(logging.DEBUG)

from bob.bio.base.extractor import Extractor
from .. import utils


class CepstralExtended(Extractor):
    """Extract energy bands from spectrogram and VAD labels based on the modulation of the energy around 4 Hz"""

    def __init__(
            self,
            win_length_ms=20.,  # 20 ms
            win_shift_ms=10.,  # 10 ms
            n_filters=40,
            f_min=0.0,  # 0 Hz
            f_max=8000,  # 8 KHz - this is an important value. Normally it should be half of the sampling frequency
            pre_emphasis_coef=1.0,
            mel_scale=True,
            rect_filter=False,
            inverse_filter=False,
            delta_win=2,
            n_ceps=19,  # 0-->18,
            dct_norm=False,
            ssfc_features=False,
            scfc_features=False,
            scmc_features=False,
            with_delta=True,
            with_delta_delta=True,
            with_energy=False,
            normalize_spectrum=False,
            keep_only_deltas=True,
            log_filter=True,
            energy_filter=False,
            vad_filter="no_filter",  # we do apply any trim filter by default
            normalize_feature_vector = False,
            **kwargs
    ):
        # call base class constructor with its set of parameters
        Extractor.__init__(
            self,
            requires_training=False, split_training_data_by_client=False,
            **kwargs
        )
        # copy parameters
        self.win_length_ms = win_length_ms
        self.win_shift_ms = win_shift_ms
        self.n_filters = n_filters
        self.f_min = f_min
        self.f_max = f_max
        self.pre_emphasis_coef = pre_emphasis_coef
        self.mel_scale = mel_scale
        self.rect_filter = rect_filter
        self.inverse_filter = inverse_filter
        self.delta_win = delta_win
        self.n_ceps = n_ceps
        self.dct_norm = dct_norm
        self.ssfc_features = ssfc_features
        self.scfc_features = scfc_features
        self.scmc_features = scmc_features
        self.with_delta = with_delta
        self.with_delta_delta = with_delta_delta
        self.with_energy = with_energy
        self.normalize_spectrum = normalize_spectrum
        self.keep_only_deltas = keep_only_deltas
        self.log_filter = log_filter
        self.energy_filter = energy_filter
        self.vad_filter = vad_filter
        self.normalize_feature_vector = normalize_feature_vector

        # compute the size of the feature vector
        self.features_len = self.n_ceps
        if self.with_delta:
            self.features_len += self.n_ceps
        if self.with_delta_delta:
            self.features_len += self.n_ceps


[docs] def normalize_features(self, features): mean = numpy.mean(features, axis=0) std = numpy.std(features, axis=0) return numpy.divide(features-mean, std)
[docs] def compute_ceps(self, rate, data): ceps = bob.ap.Ceps(rate, self.win_length_ms, self.win_shift_ms, self.n_filters, self.n_ceps, self.f_min, self.f_max, self.delta_win, self.pre_emphasis_coef) ceps.dct_norm = self.dct_norm ceps.mel_scale = self.mel_scale # ceps.mel_scale = False ceps.rect_filter = self.rect_filter ceps.inverse_filter = self.inverse_filter ceps.with_energy = self.with_energy ceps.with_delta = self.with_delta ceps.with_delta_delta = self.with_delta_delta ceps.ssfc_features = self.ssfc_features ceps.scfc_features = self.scfc_features ceps.scmc_features = self.scmc_features ceps.normalize_spectrum = self.normalize_spectrum ceps.log_filter = self.log_filter ceps.energy_filter = self.energy_filter cepstral_features = ceps(data) if self.keep_only_deltas: # do not take the actual coefficients, only delta with delta-delta cepstral_features = cepstral_features[:, self.n_ceps:] return cepstral_features
def __call__(self, input_data, annotations=None): """labels speech (1) and non-speech (0) parts of the given input wave file using 4Hz modulation energy and energy, as well as, compute energy of the signal and split it in bands using on linear or mel-filters Input parameter: * input_signal[0] --> rate * input_signal[1] --> signal """ rate = input_data[0] wav_sample = input_data[1] labels = input_data[2] # results of the VAD preprocessor # remove trailing zeros the wav_sample # wav_sample = numpy.trim_zeros(wav_sample) # comment it out to align with VAD output if wav_sample.size: cepstral_coeff = self.compute_ceps(rate, wav_sample) # SSFC features are a frame shorter than labels, # since they are computed using the difference between neighboring frames if self.ssfc_features: labels = labels[1:] logger.info("- Extraction: size of cepstral features %s", str(cepstral_coeff.shape)) filtered_features = utils.vad_filter_features(labels, cepstral_coeff, self.vad_filter) logger.info("- Extraction: size of filtered cepstral features %s", str(filtered_features.shape)) if numpy.isnan(numpy.sum(filtered_features)): logger.error("- Extraction: cepstral coefficients have NaN values, returning zero-vector...") return numpy.array([numpy.zeros(self.features_len)]) if self.normalize_feature_vector: filtered_features = self.normalize_features(filtered_features) return numpy.asarray(filtered_features, dtype=numpy.float64) logger.error("- Extraction: WAV sample is empty") return numpy.array([numpy.zeros(self.features_len)]) extractor = CepstralExtended()