Source code for alex.components.vad.power

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import struct
import math


[docs]class PowerVAD():
    """ This is implementation of a simple power based voice activity detector.

    It only implements simple decisions whether input frame is speech of non speech.
    """
    def __init__(self, cfg):
        self.cfg = cfg
        self.power_threshold_adapted = self.cfg['VAD']['power']['threshold']
        self.in_frames = 0

[docs]    def decide(self, frame):
        """Returns whether the input segment is speech or non speech.

        The returned values can be in range from 0.0 to 1.0.
        It returns 1.0 for 100% speech segment and 0.0 for 100% non speech segment.
        """

        speech_segment = 0.0

        self.in_frames += 1

        a = struct.unpack('%dh' % (len(frame) / 2, ), frame)
        a = [abs(x) ** 2 for x in a]
        energy = math.sqrt(sum(a)) / len(a)

        if self.in_frames < self.cfg['VAD']['power']['adaptation_frames']:
            self.power_threshold_adapted = self.in_frames * \
                self.power_threshold_adapted
            self.power_threshold_adapted += energy
            self.power_threshold_adapted /= self.in_frames + 1

        if energy > self.cfg['VAD']['power']['threshold_multiplier'] * self.power_threshold_adapted:
            speech_segment = 1.0

        return speech_segment