Addition of module-level logging. (spokestack#19)

will-rice · web-flow · commit 18ea450a452a · 2020-10-12T10:56:50.000-04:00
* Addition of module-level logging

This change mirrors the android library by adding logging in the same modules.
diff --git a/spokestack/asr/speech_recognizer.py b/spokestack/asr/speech_recognizer.py
@@ -2,13 +2,17 @@
 This module contains the recognizer for cloud based ASR in
 the speech pipeline
 """
+import logging
 
 import numpy as np  # type: ignore
 
 from spokestack.asr.cloud_client import CloudClient
 from spokestack.context import SpeechContext
 
 
+_LOG = logging.getLogger(__name__)
+
+
 class CloudSpeechRecognizer:
     """ Speech recognizer for use in the speech pipeline
 
@@ -52,11 +56,14 @@ def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
         if context.is_active and not self._is_active:
             self._begin()
             self._send(frame)
+            _LOG.debug("ready for speech")
         elif context.is_active:
             self._send(frame)
             self._receive(context)
+            _LOG.debug("begin speech")
         elif self._is_active:
             self._commit()
+            _LOG.debug("end speech")
         elif not self._client.is_final:
             self._receive(context)
         elif self._client.idle_count < self._client.idle_timeout:
@@ -86,8 +93,10 @@ def _receive(self, context):
         if self._client.is_final:
             if context.transcript:
                 context.event("recognize")
+                _LOG.debug("recognize event")
             else:
                 context.event("timeout")
+                _LOG.debug("timeout event")
 
     def _commit(self) -> None:
         self._is_active = False
diff --git a/spokestack/context.py b/spokestack/context.py
@@ -2,15 +2,15 @@
 This module contains a context class to manage
 state between members of the processing pipeline
 """
+import logging
 from typing import Callable
 
 
-class SpeechContext:
-    """Class for managing context of the speech pipeline.
+_LOG = logging.getLogger(__name__)
+
 
-    Args:
-        **kwargs
-    """
+class SpeechContext:
+    """ Class for managing context of the speech pipeline. """
 
     def __init__(self) -> None:
         self._is_speech: bool = False
@@ -79,8 +79,10 @@ def is_active(self, value: bool) -> None:
         self._is_active = value
         if value and not is_active:
             self.event("activate")
+            _LOG.info("activate event")
         elif not value and is_active:
             self.event("deactivate")
+            _LOG.info("deactivate event")
 
     @property
     def transcript(self) -> str:
diff --git a/spokestack/nlu/tflite.py b/spokestack/nlu/tflite.py
@@ -4,6 +4,7 @@
 any slots that are associated with that intent.
 """
 import json
+import logging
 import os
 from importlib import import_module
 from typing import List, Tuple
@@ -16,6 +17,9 @@
 from spokestack.nlu.result import Result
 
 
+_LOG = logging.getLogger(__name__)
+
+
 class TFLiteNLU:
     """ Abstraction for using TFLite NLU models
 
@@ -60,12 +64,16 @@ def __call__(self, utterance: str) -> Result:
 
         # slice off special tokens: [CLS], [SEP]
         tags = tags[: len(input_ids) - 2]
+        _LOG.debug(f"{tags}")
         input_ids = input_ids[1:-1]
+        _LOG.debug(f"{input_ids}")
         # retrieve slots from the tagged positions and decode slots back
         # into original values
         slots = [
             (token_id, tag[2:]) for token_id, tag in zip(input_ids, tags) if tag != "o"
         ]
+        _LOG.debug(f"{slots}")
+
         slot_map: dict = {}
         for (token, tag) in slots:
             if tag in slot_map:
@@ -86,6 +94,7 @@ def __call__(self, utterance: str) -> Result:
                 "parsed_value": parsed,
                 "raw_value": slot_map[key],
             }
+        _LOG.debug(f"parsed slots: {parsed_slots}")
         return Result(
             utterance=utterance,
             intent=intent,
@@ -123,6 +132,9 @@ def _decode(self, outputs) -> Tuple[str, List[str], float]:
         intent_posterior, tag_posterior = outputs
         intents, confidence = self._decode_intent(intent_posterior)
         tags = self._decode_tags(tag_posterior)
+        _LOG.debug(f"decoded tags: {tags}")
+        _LOG.debug(f"decoded intent: {intents}")
+        _LOG.debug(f"confidence: {confidence}")
         return intents, tags, confidence
 
     def _decode_tags(self, posterior):
diff --git a/spokestack/vad/webrtc.py b/spokestack/vad/webrtc.py
@@ -2,6 +2,7 @@
 This module contains the webrtc component for
 voice activity detection (vad)
 """
+import logging
 
 import numpy as np  # type: ignore
 import webrtcvad  # type: ignore
@@ -14,6 +15,8 @@
 AGGRESSIVE = 2
 VERY_AGGRESSIVE = 3
 
+_LOG = logging.getLogger(__name__)
+
 
 class VoiceActivityDetector:
     """This class detects the presence of voice in a frame of audio.
@@ -67,8 +70,10 @@ def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
         if self._run_value != context.is_speech:
             if self._run_value and self._run_length >= self._rise_length:
                 context.is_speech = True
+                _LOG.info("vad: true")
             if not self._run_value and self._run_length >= self._fall_length:
                 context.is_speech = False
+                _LOG.info("vad: false")
 
     def reset(self) -> None:
         """ Resets the current state """
diff --git a/spokestack/wakeword/tflite.py b/spokestack/wakeword/tflite.py
@@ -2,6 +2,7 @@
 This module contains the class for detecting
 the presence of keywords in an audio stream
 """
+import logging
 import os
 
 import numpy as np  # type: ignore
@@ -11,6 +12,9 @@
 from spokestack.wakeword.ring_buffer import RingBuffer
 
 
+_LOG = logging.getLogger(__name__)
+
+
 class WakewordTrigger:
     """ Detects the presence of a wakeword in the audio input
 
@@ -109,6 +113,8 @@ def __call__(self, context: SpeechContext, frame) -> None:
 
         # reset on vad fall deactivation
         if vad_fall:
+            if not context.is_active:
+                _LOG.info(f"wake: {self._posterior_max}")
             self.reset()
 
     def _sample(self, context: SpeechContext, frame) -> None:
@@ -176,10 +182,11 @@ def _detect(self, context: SpeechContext) -> None:
         frame = np.expand_dims(frame, 0)
         posterior = self.detect_model(frame)[0][0][0]
 
-        if posterior > self._posterior_threshold:
-            context.is_active = True
         if posterior > self._posterior_max:
             self._posterior_max = posterior
+        if posterior > self._posterior_threshold:
+            context.is_active = True
+            _LOG.info(f"wake: {self._posterior_max}")
 
     def reset(self) -> None:
         """ Resets the currect WakewordDetector state """