Skip to content

Commit 18ea450

Browse files
authored
Addition of module-level logging. (spokestack#19)
* Addition of module-level logging This change mirrors the android library by adding logging in the same modules.
1 parent 2ffed00 commit 18ea450

5 files changed

Lines changed: 42 additions & 7 deletions

File tree

spokestack/asr/speech_recognizer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22
This module contains the recognizer for cloud based ASR in
33
the speech pipeline
44
"""
5+
import logging
56

67
import numpy as np # type: ignore
78

89
from spokestack.asr.cloud_client import CloudClient
910
from spokestack.context import SpeechContext
1011

1112

13+
_LOG = logging.getLogger(__name__)
14+
15+
1216
class CloudSpeechRecognizer:
1317
""" Speech recognizer for use in the speech pipeline
1418
@@ -52,11 +56,14 @@ def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
5256
if context.is_active and not self._is_active:
5357
self._begin()
5458
self._send(frame)
59+
_LOG.debug("ready for speech")
5560
elif context.is_active:
5661
self._send(frame)
5762
self._receive(context)
63+
_LOG.debug("begin speech")
5864
elif self._is_active:
5965
self._commit()
66+
_LOG.debug("end speech")
6067
elif not self._client.is_final:
6168
self._receive(context)
6269
elif self._client.idle_count < self._client.idle_timeout:
@@ -86,8 +93,10 @@ def _receive(self, context):
8693
if self._client.is_final:
8794
if context.transcript:
8895
context.event("recognize")
96+
_LOG.debug("recognize event")
8997
else:
9098
context.event("timeout")
99+
_LOG.debug("timeout event")
91100

92101
def _commit(self) -> None:
93102
self._is_active = False

spokestack/context.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
This module contains a context class to manage
33
state between members of the processing pipeline
44
"""
5+
import logging
56
from typing import Callable
67

78

8-
class SpeechContext:
9-
"""Class for managing context of the speech pipeline.
9+
_LOG = logging.getLogger(__name__)
10+
1011

11-
Args:
12-
**kwargs
13-
"""
12+
class SpeechContext:
13+
""" Class for managing context of the speech pipeline. """
1414

1515
def __init__(self) -> None:
1616
self._is_speech: bool = False
@@ -79,8 +79,10 @@ def is_active(self, value: bool) -> None:
7979
self._is_active = value
8080
if value and not is_active:
8181
self.event("activate")
82+
_LOG.info("activate event")
8283
elif not value and is_active:
8384
self.event("deactivate")
85+
_LOG.info("deactivate event")
8486

8587
@property
8688
def transcript(self) -> str:

spokestack/nlu/tflite.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
any slots that are associated with that intent.
55
"""
66
import json
7+
import logging
78
import os
89
from importlib import import_module
910
from typing import List, Tuple
@@ -16,6 +17,9 @@
1617
from spokestack.nlu.result import Result
1718

1819

20+
_LOG = logging.getLogger(__name__)
21+
22+
1923
class TFLiteNLU:
2024
""" Abstraction for using TFLite NLU models
2125
@@ -60,12 +64,16 @@ def __call__(self, utterance: str) -> Result:
6064

6165
# slice off special tokens: [CLS], [SEP]
6266
tags = tags[: len(input_ids) - 2]
67+
_LOG.debug(f"{tags}")
6368
input_ids = input_ids[1:-1]
69+
_LOG.debug(f"{input_ids}")
6470
# retrieve slots from the tagged positions and decode slots back
6571
# into original values
6672
slots = [
6773
(token_id, tag[2:]) for token_id, tag in zip(input_ids, tags) if tag != "o"
6874
]
75+
_LOG.debug(f"{slots}")
76+
6977
slot_map: dict = {}
7078
for (token, tag) in slots:
7179
if tag in slot_map:
@@ -86,6 +94,7 @@ def __call__(self, utterance: str) -> Result:
8694
"parsed_value": parsed,
8795
"raw_value": slot_map[key],
8896
}
97+
_LOG.debug(f"parsed slots: {parsed_slots}")
8998
return Result(
9099
utterance=utterance,
91100
intent=intent,
@@ -123,6 +132,9 @@ def _decode(self, outputs) -> Tuple[str, List[str], float]:
123132
intent_posterior, tag_posterior = outputs
124133
intents, confidence = self._decode_intent(intent_posterior)
125134
tags = self._decode_tags(tag_posterior)
135+
_LOG.debug(f"decoded tags: {tags}")
136+
_LOG.debug(f"decoded intent: {intents}")
137+
_LOG.debug(f"confidence: {confidence}")
126138
return intents, tags, confidence
127139

128140
def _decode_tags(self, posterior):

spokestack/vad/webrtc.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
This module contains the webrtc component for
33
voice activity detection (vad)
44
"""
5+
import logging
56

67
import numpy as np # type: ignore
78
import webrtcvad # type: ignore
@@ -14,6 +15,8 @@
1415
AGGRESSIVE = 2
1516
VERY_AGGRESSIVE = 3
1617

18+
_LOG = logging.getLogger(__name__)
19+
1720

1821
class VoiceActivityDetector:
1922
"""This class detects the presence of voice in a frame of audio.
@@ -67,8 +70,10 @@ def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
6770
if self._run_value != context.is_speech:
6871
if self._run_value and self._run_length >= self._rise_length:
6972
context.is_speech = True
73+
_LOG.info("vad: true")
7074
if not self._run_value and self._run_length >= self._fall_length:
7175
context.is_speech = False
76+
_LOG.info("vad: false")
7277

7378
def reset(self) -> None:
7479
""" Resets the current state """

spokestack/wakeword/tflite.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
This module contains the class for detecting
33
the presence of keywords in an audio stream
44
"""
5+
import logging
56
import os
67

78
import numpy as np # type: ignore
@@ -11,6 +12,9 @@
1112
from spokestack.wakeword.ring_buffer import RingBuffer
1213

1314

15+
_LOG = logging.getLogger(__name__)
16+
17+
1418
class WakewordTrigger:
1519
""" Detects the presence of a wakeword in the audio input
1620
@@ -109,6 +113,8 @@ def __call__(self, context: SpeechContext, frame) -> None:
109113

110114
# reset on vad fall deactivation
111115
if vad_fall:
116+
if not context.is_active:
117+
_LOG.info(f"wake: {self._posterior_max}")
112118
self.reset()
113119

114120
def _sample(self, context: SpeechContext, frame) -> None:
@@ -176,10 +182,11 @@ def _detect(self, context: SpeechContext) -> None:
176182
frame = np.expand_dims(frame, 0)
177183
posterior = self.detect_model(frame)[0][0][0]
178184

179-
if posterior > self._posterior_threshold:
180-
context.is_active = True
181185
if posterior > self._posterior_max:
182186
self._posterior_max = posterior
187+
if posterior > self._posterior_threshold:
188+
context.is_active = True
189+
_LOG.info(f"wake: {self._posterior_max}")
183190

184191
def reset(self) -> None:
185192
""" Resets the currect WakewordDetector state """

0 commit comments

Comments
 (0)