|
| 1 | +""" |
| 2 | +Pipeline profile for pyaudio input, vad, wakeword, and asr |
| 3 | +""" |
| 4 | +from spokestack.activation_timeout import ActivationTimeout |
| 5 | +from spokestack.asr.speech_recognizer import CloudSpeechRecognizer |
| 6 | +from spokestack.io.pyaudio import PyAudioInput |
| 7 | +from spokestack.pipeline import SpeechPipeline |
| 8 | +from spokestack.vad.webrtc import VoiceActivityDetector |
| 9 | +from spokestack.wakeword.tflite import WakewordTrigger |
| 10 | + |
| 11 | + |
| 12 | +class WakewordSpokestackASR: |
| 13 | + """ TFLite wakeword with Spokestack speech recognition. """ |
| 14 | + |
| 15 | + @staticmethod |
| 16 | + def create( |
| 17 | + spokestack_id: str, |
| 18 | + spokestack_secret: str, |
| 19 | + sample_rate: int = 16000, |
| 20 | + frame_width: int = 20, |
| 21 | + model_dir: str = "", |
| 22 | + **kwargs, |
| 23 | + ) -> SpeechPipeline: |
| 24 | + """ Creates a speech pipeline instance from profile |
| 25 | +
|
| 26 | + Args: |
| 27 | + spokestack_id (str): spokestack API id. |
| 28 | + spokestack_secret (str): Spokestack API secret. |
| 29 | + sample_rate (int): sample rate of the audio (Hz). |
| 30 | + frame_width (int): width of the audio frame: 10, 20, or 30 (ms). |
| 31 | + model_dir (str): Directory containing the tflite wakeword models. |
| 32 | +
|
| 33 | + Returns: |
| 34 | +
|
| 35 | + """ |
| 36 | + pipeline = SpeechPipeline( |
| 37 | + input_source=PyAudioInput( |
| 38 | + frame_width=frame_width, sample_rate=sample_rate, **kwargs |
| 39 | + ), |
| 40 | + stages=[ |
| 41 | + VoiceActivityDetector( |
| 42 | + frame_width=frame_width, sample_rate=sample_rate, **kwargs, |
| 43 | + ), |
| 44 | + WakewordTrigger(model_dir=model_dir, **kwargs), |
| 45 | + ActivationTimeout(frame_width=frame_width, **kwargs), |
| 46 | + CloudSpeechRecognizer( |
| 47 | + spokestack_secret=spokestack_secret, |
| 48 | + spokestack_id=spokestack_id, |
| 49 | + **kwargs, |
| 50 | + ), |
| 51 | + ], |
| 52 | + ) |
| 53 | + return pipeline |
0 commit comments