Skip to content

Commit 89d4866

Browse files
authored
Addition of Pipeline Profiles (spokestack#22)
* Addition of Pipeline Profiles This changes introduces two profiles to the python library. Both profiles contain the speech pipeline including asr. The difference is in the activation trigger. WakewordASR uses the tflite wakeword for activation and VoiceActivityTriggerASR uses the vad to activate.
1 parent 39e9c4e commit 89d4866

11 files changed

Lines changed: 147 additions & 2 deletions

File tree

spokestack/activation_timeout.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ class ActivationTimeout:
1313
max_active (int): the maximum length of an activation (ms)
1414
"""
1515

16-
def __init__(self, frame_width=20, min_active=500, max_active=5000) -> None:
16+
def __init__(
17+
self, frame_width=20, min_active=500, max_active=5000, **kwargs
18+
) -> None:
1719

1820
self._min_active = min_active / frame_width
1921
self._max_active = max_active / frame_width

spokestack/asr/speech_recognizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(
3333
sample_rate: int = 16000,
3434
frame_width: int = 20,
3535
idle_timeout: int = 5000,
36+
**kwargs,
3637
) -> None:
3738

3839
self._client: CloudClient = CloudClient(

spokestack/io/pyaudio.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ class PyAudioInput:
1515
"""
1616

1717
def __init__(
18-
self, sample_rate: int, frame_width: int, exception_on_overflow: bool = True
18+
self,
19+
sample_rate: int,
20+
frame_width: int,
21+
exception_on_overflow: bool = True,
22+
**kwargs
1923
) -> None:
2024
self._frame_size = int(sample_rate / 1000 * frame_width)
2125
self._exception_on_overflow = exception_on_overflow

spokestack/profile/__init__.py

Whitespace-only changes.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
Pipeline profile with vad trigger and asr
3+
"""
4+
from spokestack.activation_timeout import ActivationTimeout
5+
from spokestack.asr.speech_recognizer import CloudSpeechRecognizer
6+
from spokestack.io.pyaudio import PyAudioInput
7+
from spokestack.pipeline import SpeechPipeline
8+
from spokestack.vad.webrtc import VoiceActivityDetector, VoiceActivityTrigger
9+
10+
11+
class VoiceActivityTriggerSpokestackASR:
12+
""" VAD Trigger ASR """
13+
14+
@staticmethod
15+
def create(
16+
spokestack_id: str,
17+
spokestack_secret: str,
18+
sample_rate: int = 16000,
19+
frame_width: int = 20,
20+
**kwargs
21+
) -> SpeechPipeline:
22+
"""
23+
24+
Args:
25+
spokestack_id (str): spokestack API id.
26+
spokestack_secret (str): Spokestack API secret.
27+
sample_rate (int): sample rate of the audio (Hz).
28+
frame_width (int): width of the audio frame: 10, 20, or 30 (ms).
29+
30+
Returns:
31+
SpeechPipeline instance with profile configuration.
32+
33+
"""
34+
pipeline = SpeechPipeline(
35+
input_source=PyAudioInput(
36+
sample_rate=sample_rate, frame_width=frame_width, **kwargs
37+
),
38+
stages=[
39+
VoiceActivityDetector(
40+
sample_rate=sample_rate, frame_width=frame_width, **kwargs
41+
),
42+
VoiceActivityTrigger(),
43+
ActivationTimeout(frame_width=frame_width, **kwargs),
44+
CloudSpeechRecognizer(
45+
spokestack_id=spokestack_id,
46+
spokestack_secret=spokestack_secret,
47+
sample_rate=sample_rate,
48+
frame_width=frame_width,
49+
**kwargs
50+
),
51+
],
52+
)
53+
return pipeline

spokestack/profile/wakeword_asr.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
Pipeline profile for pyaudio input, vad, wakeword, and asr
3+
"""
4+
from spokestack.activation_timeout import ActivationTimeout
5+
from spokestack.asr.speech_recognizer import CloudSpeechRecognizer
6+
from spokestack.io.pyaudio import PyAudioInput
7+
from spokestack.pipeline import SpeechPipeline
8+
from spokestack.vad.webrtc import VoiceActivityDetector
9+
from spokestack.wakeword.tflite import WakewordTrigger
10+
11+
12+
class WakewordSpokestackASR:
13+
""" TFLite wakeword with Spokestack speech recognition. """
14+
15+
@staticmethod
16+
def create(
17+
spokestack_id: str,
18+
spokestack_secret: str,
19+
sample_rate: int = 16000,
20+
frame_width: int = 20,
21+
model_dir: str = "",
22+
**kwargs,
23+
) -> SpeechPipeline:
24+
""" Creates a speech pipeline instance from profile
25+
26+
Args:
27+
spokestack_id (str): spokestack API id.
28+
spokestack_secret (str): Spokestack API secret.
29+
sample_rate (int): sample rate of the audio (Hz).
30+
frame_width (int): width of the audio frame: 10, 20, or 30 (ms).
31+
model_dir (str): Directory containing the tflite wakeword models.
32+
33+
Returns:
34+
35+
"""
36+
pipeline = SpeechPipeline(
37+
input_source=PyAudioInput(
38+
frame_width=frame_width, sample_rate=sample_rate, **kwargs
39+
),
40+
stages=[
41+
VoiceActivityDetector(
42+
frame_width=frame_width, sample_rate=sample_rate, **kwargs,
43+
),
44+
WakewordTrigger(model_dir=model_dir, **kwargs),
45+
ActivationTimeout(frame_width=frame_width, **kwargs),
46+
CloudSpeechRecognizer(
47+
spokestack_secret=spokestack_secret,
48+
spokestack_id=spokestack_id,
49+
**kwargs,
50+
),
51+
],
52+
)
53+
return pipeline

spokestack/vad/webrtc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
vad_rise_delay: int = 0,
3838
vad_fall_delay: int = 0,
3939
mode: int = QUALITY,
40+
**kwargs
4041
) -> None:
4142

4243
self._sample_rate: int = sample_rate

spokestack/wakeword/tflite.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def __init__(
3636
fft_hop_length: int = 10,
3737
model_dir: str = "",
3838
posterior_threshold: float = 0.5,
39+
**kwargs,
3940
) -> None:
4041

4142
self.pre_emphasis: float = pre_emphasis
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
This module contains the tests for vad_trigger_asr profile
3+
"""
4+
from unittest import mock
5+
6+
from spokestack.profile.vad_trigger_asr import VoiceActivityTriggerSpokestackASR
7+
8+
9+
@mock.patch("spokestack.profile.vad_trigger_asr.PyAudioInput")
10+
@mock.patch("spokestack.profile.vad_trigger_asr.SpeechPipeline")
11+
def test_activate(*args):
12+
pipeline = VoiceActivityTriggerSpokestackASR.create("", "")
13+
pipeline.start()
14+
pipeline.run()

tests/profile/test_wakeword_asr.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""
2+
This module contains the tests for the wakeword asr profile.
3+
"""
4+
from unittest import mock
5+
6+
from spokestack.profile.wakeword_asr import WakewordSpokestackASR
7+
8+
9+
@mock.patch("spokestack.profile.wakeword_asr.PyAudioInput")
10+
@mock.patch("spokestack.profile.wakeword_asr.WakewordTrigger")
11+
@mock.patch("spokestack.profile.wakeword_asr.SpeechPipeline")
12+
def test_activate(*args):
13+
pipeline = WakewordSpokestackASR.create("", "")
14+
pipeline.start()
15+
pipeline.run()

0 commit comments

Comments
 (0)