Skip to content

Commit f552275

Browse files
authored
Merge pull request spokestack#17 from spokestack/wr-vad-trigger
Added VAD Trigger
2 parents 4248fbf + 403d979 commit f552275

2 files changed

Lines changed: 45 additions & 2 deletions

File tree

spokestack/vad/webrtc.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def __init__(
4646
self._run_length: int = 0
4747

4848
def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
49-
"""Processes a single frame of audio to detemine if voice is present
49+
""" Processes a single frame of audio to determine if voice is present
5050
5151
Args:
5252
context (SpeechContext): State based information that needs to be shared
@@ -78,3 +78,30 @@ def reset(self) -> None:
7878
def close(self) -> None:
7979
""" Close interface for use in pipeline """
8080
self.reset()
81+
82+
83+
class VoiceActivityTrigger:
84+
""" Voice Activity Detector trigger pipeline component """
85+
86+
def __init__(self) -> None:
87+
self._is_speech = False
88+
89+
def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
90+
""" Activates speech context whenever speech is detected
91+
92+
Args:
93+
context (SpeechContext): State based information that needs to be shared
94+
between pieces of the pipeline
95+
frame (np.ndarray): Single frame of PCM-16 audio from an input source
96+
97+
"""
98+
if context.is_speech != self._is_speech:
99+
if context.is_speech:
100+
context.is_active = True
101+
self._is_speech = context.is_speech
102+
103+
def close(self) -> None:
104+
self.reset()
105+
106+
def reset(self) -> None:
107+
self._is_speech = False

tests/vad/test_webrtc.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88
from spokestack.context import SpeechContext
9-
from spokestack.vad.webrtc import VoiceActivityDetector
9+
from spokestack.vad.webrtc import VoiceActivityDetector, VoiceActivityTrigger
1010

1111

1212
@patch("webrtcvad.Vad.is_speech", return_value=True)
@@ -79,3 +79,19 @@ def test_vad_fall_untriggered():
7979
detector(context, frame)
8080
assert not context.is_speech
8181
detector.close()
82+
83+
84+
def test_voice_activity_trigger():
85+
context = SpeechContext()
86+
trigger = VoiceActivityTrigger()
87+
88+
frame = np.zeros(160, np.int16)
89+
90+
trigger(context, frame)
91+
assert not context.is_active
92+
93+
context.is_speech = True
94+
trigger(context, frame)
95+
assert context.is_active
96+
97+
trigger.close()

0 commit comments

Comments
 (0)