|
@@ -7,16 +7,12 @@ The class employs the faster_whisper library to transcribe the recorded audio
|
|
|
into text using machine learning models, which can be run either on a GPU or
|
|
|
CPU. Voice activity detection (VAD) is built in, meaning the software can
|
|
|
automatically start or stop recording based on the presence or absence of
|
|
|
-speech. It integrates wake word detection through the pvporcupine library,
|
|
|
-allowing the software to initiate recording when a specific word or phrase
|
|
|
-is spoken. The system provides real-time feedback and can be further
|
|
|
+speech. The system provides real-time feedback and can be further
|
|
|
customized.
|
|
|
|
|
|
Features:
|
|
|
- Voice Activity Detection: Automatically starts/stops recording when speech
|
|
|
is detected or when speech ends.
|
|
|
-- Wake Word Detection: Starts recording when a specified wake word (or words)
|
|
|
- is detected.
|
|
|
- Event Callbacks: Customizable callbacks for when recording starts
|
|
|
or finishes.
|
|
|
- Fast Transcription: Returns the transcribed text from the audio as fast
|
|
@@ -30,15 +26,12 @@ from typing import Iterable, List, Optional, Union
|
|
|
import torch.multiprocessing as mp
|
|
|
import torch
|
|
|
from ctypes import c_bool
|
|
|
-from openwakeword.model import Model
|
|
|
from scipy.signal import resample
|
|
|
from scipy import signal
|
|
|
import signal as system_signal
|
|
|
import faster_whisper
|
|
|
-import openwakeword
|
|
|
import collections
|
|
|
import numpy as np
|
|
|
-import pvporcupine
|
|
|
import traceback
|
|
|
import threading
|
|
|
import webrtcvad
|
|
@@ -69,11 +62,7 @@ INIT_WEBRTC_SENSITIVITY = 3
|
|
|
INIT_POST_SPEECH_SILENCE_DURATION = 0.6
|
|
|
INIT_MIN_LENGTH_OF_RECORDING = 0.5
|
|
|
INIT_MIN_GAP_BETWEEN_RECORDINGS = 0
|
|
|
-INIT_WAKE_WORDS_SENSITIVITY = 0.6
|
|
|
INIT_PRE_RECORDING_BUFFER_DURATION = 1.0
|
|
|
-INIT_WAKE_WORD_ACTIVATION_DELAY = 0.0
|
|
|
-INIT_WAKE_WORD_TIMEOUT = 5.0
|
|
|
-INIT_WAKE_WORD_BUFFER_DURATION = 0.1
|
|
|
ALLOWED_LATENCY_LIMIT = 100
|
|
|
|
|
|
TIME_SLEEP = 0.02
|
|
@@ -238,22 +227,6 @@ class AudioToTextRecorder:
|
|
|
),
|
|
|
on_vad_detect_start=None,
|
|
|
on_vad_detect_stop=None,
|
|
|
-
|
|
|
- # Wake word parameters
|
|
|
- wakeword_backend: str = "pvporcupine",
|
|
|
- openwakeword_model_paths: str = None,
|
|
|
- openwakeword_inference_framework: str = "onnx",
|
|
|
- wake_words: str = "",
|
|
|
- wake_words_sensitivity: float = INIT_WAKE_WORDS_SENSITIVITY,
|
|
|
- wake_word_activation_delay: float = (
|
|
|
- INIT_WAKE_WORD_ACTIVATION_DELAY
|
|
|
- ),
|
|
|
- wake_word_timeout: float = INIT_WAKE_WORD_TIMEOUT,
|
|
|
- wake_word_buffer_duration: float = INIT_WAKE_WORD_BUFFER_DURATION,
|
|
|
- on_wakeword_detected=None,
|
|
|
- on_wakeword_timeout=None,
|
|
|
- on_wakeword_detection_start=None,
|
|
|
- on_wakeword_detection_end=None,
|
|
|
on_recorded_chunk=None,
|
|
|
debug_mode=False,
|
|
|
handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
|
|
@@ -270,8 +243,7 @@ class AudioToTextRecorder:
|
|
|
use_extended_logging: bool = False,
|
|
|
):
|
|
|
"""
|
|
|
- Initializes an audio recorder and transcription
|
|
|
- and wake word detection.
|
|
|
+ Initializes an audio recorder and transcription.
|
|
|
|
|
|
Args:
|
|
|
- model (str, default="tiny"): Specifies the size of the transcription
|
|
@@ -384,54 +356,6 @@ class AudioToTextRecorder:
|
|
|
be called when the system listens for voice activity.
|
|
|
- on_vad_detect_stop (callable, default=None): Callback function to be
|
|
|
called when the system stops listening for voice activity.
|
|
|
- - wakeword_backend (str, default="pvporcupine"): Specifies the backend
|
|
|
- library to use for wake word detection. Supported options include
|
|
|
- 'pvporcupine' for using the Porcupine wake word engine or 'oww' for
|
|
|
- using the OpenWakeWord engine.
|
|
|
- - openwakeword_model_paths (str, default=None): Comma-separated paths
|
|
|
- to model files for the openwakeword library. These paths point to
|
|
|
- custom models that can be used for wake word detection when the
|
|
|
- openwakeword library is selected as the wakeword_backend.
|
|
|
- - openwakeword_inference_framework (str, default="onnx"): Specifies
|
|
|
- the inference framework to use with the openwakeword library.
|
|
|
- Can be either 'onnx' for Open Neural Network Exchange format
|
|
|
- or 'tflite' for TensorFlow Lite.
|
|
|
- - wake_words (str, default=""): Comma-separated string of wake words to
|
|
|
- initiate recording when using the 'pvporcupine' wakeword backend.
|
|
|
- Supported wake words include: 'alexa', 'americano', 'blueberry',
|
|
|
- 'bumblebee', 'computer', 'grapefruits', 'grasshopper', 'hey google',
|
|
|
- 'hey siri', 'jarvis', 'ok google', 'picovoice', 'porcupine',
|
|
|
- 'terminator'. For the 'openwakeword' backend, wake words are
|
|
|
- automatically extracted from the provided model files, so specifying
|
|
|
- them here is not necessary.
|
|
|
- - wake_words_sensitivity (float, default=0.5): Sensitivity for wake
|
|
|
- word detection, ranging from 0 (least sensitive) to 1 (most
|
|
|
- sensitive). Default is 0.5.
|
|
|
- - wake_word_activation_delay (float, default=0): Duration in seconds
|
|
|
- after the start of monitoring before the system switches to wake
|
|
|
- word activation if no voice is initially detected. If set to
|
|
|
- zero, the system uses wake word activation immediately.
|
|
|
- - wake_word_timeout (float, default=5): Duration in seconds after a
|
|
|
- wake word is recognized. If no subsequent voice activity is
|
|
|
- detected within this window, the system transitions back to an
|
|
|
- inactive state, awaiting the next wake word or voice activation.
|
|
|
- - wake_word_buffer_duration (float, default=0.1): Duration in seconds
|
|
|
- to buffer audio data during wake word detection. This helps in
|
|
|
- cutting out the wake word from the recording buffer so it does not
|
|
|
- falsely get detected along with the following spoken text, ensuring
|
|
|
- cleaner and more accurate transcription start triggers.
|
|
|
- Increase this if parts of the wake word get detected as text.
|
|
|
- - on_wakeword_detected (callable, default=None): Callback function to
|
|
|
- be called when a wake word is detected.
|
|
|
- - on_wakeword_timeout (callable, default=None): Callback function to
|
|
|
- be called when the system goes back to an inactive state after when
|
|
|
- no speech was detected after wake word activation
|
|
|
- - on_wakeword_detection_start (callable, default=None): Callback
|
|
|
- function to be called when the system starts to listen for wake
|
|
|
- words
|
|
|
- - on_wakeword_detection_end (callable, default=None): Callback
|
|
|
- function to be called when the system stops to listen for
|
|
|
- wake words (e.g. because of timeout or wake word detected)
|
|
|
- on_recorded_chunk (callable, default=None): Callback function to be
|
|
|
called when a chunk of audio is recorded. The function is called
|
|
|
with the recorded audio chunk as its argument.
|
|
@@ -475,7 +399,7 @@ class AudioToTextRecorder:
|
|
|
|
|
|
Raises:
|
|
|
Exception: Errors related to initializing transcription
|
|
|
- model, wake word detection, or audio recording.
|
|
|
+ model or audio recording.
|
|
|
"""
|
|
|
|
|
|
self.language = language
|
|
@@ -483,10 +407,6 @@ class AudioToTextRecorder:
|
|
|
self.input_device_index = input_device_index
|
|
|
self.gpu_device_index = gpu_device_index
|
|
|
self.device = device
|
|
|
- self.wake_words = wake_words
|
|
|
- self.wake_word_activation_delay = wake_word_activation_delay
|
|
|
- self.wake_word_timeout = wake_word_timeout
|
|
|
- self.wake_word_buffer_duration = wake_word_buffer_duration
|
|
|
self.ensure_sentence_starting_uppercase = (
|
|
|
ensure_sentence_starting_uppercase
|
|
|
)
|
|
@@ -500,12 +420,8 @@ class AudioToTextRecorder:
|
|
|
self.post_speech_silence_duration = post_speech_silence_duration
|
|
|
self.on_recording_start = on_recording_start
|
|
|
self.on_recording_stop = on_recording_stop
|
|
|
- self.on_wakeword_detected = on_wakeword_detected
|
|
|
- self.on_wakeword_timeout = on_wakeword_timeout
|
|
|
self.on_vad_detect_start = on_vad_detect_start
|
|
|
self.on_vad_detect_stop = on_vad_detect_stop
|
|
|
- self.on_wakeword_detection_start = on_wakeword_detection_start
|
|
|
- self.on_wakeword_detection_end = on_wakeword_detection_end
|
|
|
self.on_recorded_chunk = on_recorded_chunk
|
|
|
self.on_transcription_start = on_transcription_start
|
|
|
self.enable_realtime_transcription = enable_realtime_transcription
|
|
@@ -532,7 +448,6 @@ class AudioToTextRecorder:
|
|
|
self.sample_rate = sample_rate
|
|
|
self.recording_start_time = 0
|
|
|
self.recording_stop_time = 0
|
|
|
- self.wake_word_detect_time = 0
|
|
|
self.silero_check_time = 0
|
|
|
self.silero_working = False
|
|
|
self.speech_end_silence_start = 0
|
|
@@ -542,7 +457,6 @@ class AudioToTextRecorder:
|
|
|
self.spinner = spinner
|
|
|
self.halo = None
|
|
|
self.state = "inactive"
|
|
|
- self.wakeword_detected = False
|
|
|
self.text_storage = []
|
|
|
self.realtime_stabilized_text = ""
|
|
|
self.realtime_stabilized_safetext = ""
|
|
@@ -559,7 +473,6 @@ class AudioToTextRecorder:
|
|
|
self.last_transcription_bytes_b64 = None
|
|
|
self.initial_prompt = initial_prompt
|
|
|
self.suppress_tokens = suppress_tokens
|
|
|
- self.use_wake_words = wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}
|
|
|
self.detected_language = None
|
|
|
self.detected_language_probability = 0
|
|
|
self.detected_realtime_language = None
|
|
@@ -689,86 +602,6 @@ class AudioToTextRecorder:
|
|
|
logging.debug("Faster_whisper realtime speech to text "
|
|
|
"transcription model initialized successfully")
|
|
|
|
|
|
- # Setup wake word detection
|
|
|
- if wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
|
- self.wakeword_backend = wakeword_backend
|
|
|
-
|
|
|
- self.wake_words_list = [
|
|
|
- word.strip() for word in wake_words.lower().split(',')
|
|
|
- ]
|
|
|
- self.wake_words_sensitivity = wake_words_sensitivity
|
|
|
- self.wake_words_sensitivities = [
|
|
|
- float(wake_words_sensitivity)
|
|
|
- for _ in range(len(self.wake_words_list))
|
|
|
- ]
|
|
|
-
|
|
|
- if self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
|
|
-
|
|
|
- try:
|
|
|
- self.porcupine = pvporcupine.create(
|
|
|
- keywords=self.wake_words_list,
|
|
|
- sensitivities=self.wake_words_sensitivities
|
|
|
- )
|
|
|
- self.buffer_size = self.porcupine.frame_length
|
|
|
- self.sample_rate = self.porcupine.sample_rate
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- logging.exception(
|
|
|
- "Error initializing porcupine "
|
|
|
- f"wake word detection engine: {e}"
|
|
|
- )
|
|
|
- raise
|
|
|
-
|
|
|
- logging.debug(
|
|
|
- "Porcupine wake word detection engine initialized successfully"
|
|
|
- )
|
|
|
-
|
|
|
- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
|
-
|
|
|
- openwakeword.utils.download_models()
|
|
|
-
|
|
|
- try:
|
|
|
- if openwakeword_model_paths:
|
|
|
- model_paths = openwakeword_model_paths.split(',')
|
|
|
- self.owwModel = Model(
|
|
|
- wakeword_models=model_paths,
|
|
|
- inference_framework=openwakeword_inference_framework
|
|
|
- )
|
|
|
- logging.info(
|
|
|
- "Successfully loaded wakeword model(s): "
|
|
|
- f"{openwakeword_model_paths}"
|
|
|
- )
|
|
|
- else:
|
|
|
- self.owwModel = Model(
|
|
|
- inference_framework=openwakeword_inference_framework)
|
|
|
-
|
|
|
- self.oww_n_models = len(self.owwModel.models.keys())
|
|
|
- if not self.oww_n_models:
|
|
|
- logging.error(
|
|
|
- "No wake word models loaded."
|
|
|
- )
|
|
|
-
|
|
|
- for model_key in self.owwModel.models.keys():
|
|
|
- logging.info(
|
|
|
- "Successfully loaded openwakeword model: "
|
|
|
- f"{model_key}"
|
|
|
- )
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- logging.exception(
|
|
|
- "Error initializing openwakeword "
|
|
|
- f"wake word detection engine: {e}"
|
|
|
- )
|
|
|
- raise
|
|
|
-
|
|
|
- logging.debug(
|
|
|
- "Open wake word detection engine initialized successfully"
|
|
|
- )
|
|
|
-
|
|
|
- else:
|
|
|
- logging.exception(f"Wakeword engine {self.wakeword_backend} unknown/unsupported. Please specify one of: pvporcupine, openwakeword.")
|
|
|
-
|
|
|
-
|
|
|
# Setup voice activity detection model WebRTC
|
|
|
try:
|
|
|
logging.info("Initializing WebRTC voice with "
|
|
@@ -1208,12 +1041,6 @@ class AudioToTextRecorder:
|
|
|
if audio_interface:
|
|
|
audio_interface.terminate()
|
|
|
|
|
|
- def wakeup(self):
|
|
|
- """
|
|
|
- If in wake work modus, wake up as if a wake word was spoken.
|
|
|
- """
|
|
|
- self.listen_start = time.time()
|
|
|
-
|
|
|
def abort(self):
|
|
|
self.start_recording_on_voice_activity = False
|
|
|
self.stop_recording_on_voice_deactivity = False
|
|
@@ -1347,46 +1174,6 @@ class AudioToTextRecorder:
|
|
|
logging.error(f"Error during transcription: {str(e)}")
|
|
|
raise e
|
|
|
|
|
|
- def _process_wakeword(self, data):
|
|
|
- """
|
|
|
- Processes audio data to detect wake words.
|
|
|
- """
|
|
|
- if self.wakeword_backend in {'pvp', 'pvporcupine'}:
|
|
|
- pcm = struct.unpack_from(
|
|
|
- "h" * self.buffer_size,
|
|
|
- data
|
|
|
- )
|
|
|
- porcupine_index = self.porcupine.process(pcm)
|
|
|
- if self.debug_mode:
|
|
|
- logging.info(f"wake words porcupine_index: {porcupine_index}")
|
|
|
- return self.porcupine.process(pcm)
|
|
|
-
|
|
|
- elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
|
|
|
- pcm = np.frombuffer(data, dtype=np.int16)
|
|
|
- prediction = self.owwModel.predict(pcm)
|
|
|
- max_score = -1
|
|
|
- max_index = -1
|
|
|
- wake_words_in_prediction = len(self.owwModel.prediction_buffer.keys())
|
|
|
- self.wake_words_sensitivities
|
|
|
- if wake_words_in_prediction:
|
|
|
- for idx, mdl in enumerate(self.owwModel.prediction_buffer.keys()):
|
|
|
- scores = list(self.owwModel.prediction_buffer[mdl])
|
|
|
- if scores[-1] >= self.wake_words_sensitivity and scores[-1] > max_score:
|
|
|
- max_score = scores[-1]
|
|
|
- max_index = idx
|
|
|
- if self.debug_mode:
|
|
|
- logging.info(f"wake words oww max_index, max_score: {max_index} {max_score}")
|
|
|
- return max_index
|
|
|
- else:
|
|
|
- if self.debug_mode:
|
|
|
- logging.info(f"wake words oww_index: -1")
|
|
|
- return -1
|
|
|
-
|
|
|
- if self.debug_mode:
|
|
|
- logging.info("wake words no match")
|
|
|
-
|
|
|
- return -1
|
|
|
-
|
|
|
def text(self,
|
|
|
on_transcription_finished=None,
|
|
|
):
|
|
@@ -1451,8 +1238,6 @@ class AudioToTextRecorder:
|
|
|
self.text_storage = []
|
|
|
self.realtime_stabilized_text = ""
|
|
|
self.realtime_stabilized_safetext = ""
|
|
|
- self.wakeword_detected = False
|
|
|
- self.wake_word_detect_time = 0
|
|
|
self.frames = []
|
|
|
self.new_frames.set()
|
|
|
self.is_recording = True
|
|
@@ -1497,12 +1282,10 @@ class AudioToTextRecorder:
|
|
|
|
|
|
def listen(self):
|
|
|
"""
|
|
|
- Puts recorder in immediate "listen" state.
|
|
|
- This is the state after a wake word detection, for example.
|
|
|
+ Puts recorder in "listen" state.
|
|
|
The recorder now "listens" for voice activation.
|
|
|
Once voice is detected we enter "recording" state.
|
|
|
"""
|
|
|
- self.listen_start = time.time()
|
|
|
self._set_state("listening")
|
|
|
self.start_recording_on_voice_activity = True
|
|
|
|
|
@@ -1626,9 +1409,6 @@ class AudioToTextRecorder:
|
|
|
logging.debug('Debug: Initializing variables')
|
|
|
time_since_last_buffer_message = 0
|
|
|
was_recording = False
|
|
|
- delay_was_passed = False
|
|
|
- wakeword_detected_time = None
|
|
|
- wakeword_samples_to_remove = None
|
|
|
self.allowed_to_early_transcribe = True
|
|
|
|
|
|
if self.use_extended_logging:
|
|
@@ -1717,88 +1497,17 @@ class AudioToTextRecorder:
|
|
|
if not self.is_recording:
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Handling not recording state')
|
|
|
- # Handle not recording state
|
|
|
- time_since_listen_start = (time.time() - self.listen_start
|
|
|
- if self.listen_start else 0)
|
|
|
-
|
|
|
- wake_word_activation_delay_passed = (
|
|
|
- time_since_listen_start >
|
|
|
- self.wake_word_activation_delay
|
|
|
- )
|
|
|
-
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Handling wake-word timeout callback')
|
|
|
- # Handle wake-word timeout callback
|
|
|
- if wake_word_activation_delay_passed \
|
|
|
- and not delay_was_passed:
|
|
|
-
|
|
|
- if self.use_wake_words and self.wake_word_activation_delay:
|
|
|
- if self.on_wakeword_timeout:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Calling on_wakeword_timeout')
|
|
|
- self.on_wakeword_timeout()
|
|
|
- delay_was_passed = wake_word_activation_delay_passed
|
|
|
+
|
|
|
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Setting state and spinner text')
|
|
|
# Set state and spinner text
|
|
|
- if not self.recording_stop_time:
|
|
|
- if self.use_wake_words \
|
|
|
- and wake_word_activation_delay_passed \
|
|
|
- and not self.wakeword_detected:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Setting state to "wakeword"')
|
|
|
- self._set_state("wakeword")
|
|
|
- else:
|
|
|
- if self.listen_start:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Setting state to "listening"')
|
|
|
- self._set_state("listening")
|
|
|
- else:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Setting state to "inactive"')
|
|
|
- self._set_state("inactive")
|
|
|
-
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Checking wake word conditions')
|
|
|
- if self.use_wake_words and wake_word_activation_delay_passed:
|
|
|
- try:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Processing wakeword')
|
|
|
- wakeword_index = self._process_wakeword(data)
|
|
|
-
|
|
|
- except struct.error:
|
|
|
- logging.error("Error unpacking audio data "
|
|
|
- "for wake word processing.")
|
|
|
- continue
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- logging.error(f"Wake word processing error: {e}")
|
|
|
- continue
|
|
|
-
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Checking if wake word detected')
|
|
|
- # If a wake word is detected
|
|
|
- if wakeword_index >= 0:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Wake word detected, updating variables')
|
|
|
- self.wake_word_detect_time = time.time()
|
|
|
- wakeword_detected_time = time.time()
|
|
|
- wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
|
- self.wakeword_detected = True
|
|
|
- if self.on_wakeword_detected:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Calling on_wakeword_detected')
|
|
|
- self.on_wakeword_detected()
|
|
|
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Checking voice activity conditions')
|
|
|
# Check for voice activity to
|
|
|
# trigger the start of recording
|
|
|
- if ((not self.use_wake_words
|
|
|
- or not wake_word_activation_delay_passed)
|
|
|
- and self.start_recording_on_voice_activity) \
|
|
|
- or self.wakeword_detected:
|
|
|
+ if self.start_recording_on_voice_activity:
|
|
|
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Checking if voice is active')
|
|
@@ -1837,25 +1546,6 @@ class AudioToTextRecorder:
|
|
|
else:
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Handling recording state')
|
|
|
- # If we are currently recording
|
|
|
- if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Removing wakeword samples')
|
|
|
- # Remove samples from the beginning of self.frames
|
|
|
- samples_removed = 0
|
|
|
- while wakeword_samples_to_remove > 0 and self.frames:
|
|
|
- frame = self.frames[0]
|
|
|
- frame_samples = len(frame) // 2 # Assuming 16-bit audio
|
|
|
- if wakeword_samples_to_remove >= frame_samples:
|
|
|
- self.frames.pop(0)
|
|
|
- samples_removed += frame_samples
|
|
|
- wakeword_samples_to_remove -= frame_samples
|
|
|
- else:
|
|
|
- self.frames[0] = frame[wakeword_samples_to_remove * 2:]
|
|
|
- samples_removed += wakeword_samples_to_remove
|
|
|
- samples_to_remove = 0
|
|
|
-
|
|
|
- wakeword_samples_to_remove = 0
|
|
|
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
|
|
@@ -1946,8 +1636,6 @@ class AudioToTextRecorder:
|
|
|
logging.debug('Debug: Resetting speech_end_silence_start')
|
|
|
self.speech_end_silence_start = 0
|
|
|
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Handling non-wake word scenario')
|
|
|
else:
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Setting failed_stop_attempt to True')
|
|
@@ -1966,20 +1654,6 @@ class AudioToTextRecorder:
|
|
|
if time.time() - self.silero_check_time > 0.1:
|
|
|
self.silero_check_time = 0
|
|
|
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Handling wake word timeout')
|
|
|
- # Handle wake word timeout (waited to long initiating
|
|
|
- # speech after wake word detection)
|
|
|
- if self.wake_word_detect_time and time.time() - \
|
|
|
- self.wake_word_detect_time > self.wake_word_timeout:
|
|
|
-
|
|
|
- self.wake_word_detect_time = 0
|
|
|
- if self.wakeword_detected and self.on_wakeword_timeout:
|
|
|
- if self.use_extended_logging:
|
|
|
- logging.debug('Debug: Calling on_wakeword_timeout')
|
|
|
- self.on_wakeword_timeout()
|
|
|
- self.wakeword_detected = False
|
|
|
-
|
|
|
if self.use_extended_logging:
|
|
|
logging.debug('Debug: Updating was_recording')
|
|
|
was_recording = self.is_recording
|
|
@@ -2337,9 +2011,6 @@ class AudioToTextRecorder:
|
|
|
if old_state == "listening":
|
|
|
if self.on_vad_detect_stop:
|
|
|
self.on_vad_detect_stop()
|
|
|
- elif old_state == "wakeword":
|
|
|
- if self.on_wakeword_detection_end:
|
|
|
- self.on_wakeword_detection_end()
|
|
|
|
|
|
# Execute callbacks based on transitioning TO a particular state
|
|
|
if new_state == "listening":
|
|
@@ -2348,12 +2019,6 @@ class AudioToTextRecorder:
|
|
|
self._set_spinner("speak now")
|
|
|
if self.spinner and self.halo:
|
|
|
self.halo._interval = 250
|
|
|
- elif new_state == "wakeword":
|
|
|
- if self.on_wakeword_detection_start:
|
|
|
- self.on_wakeword_detection_start()
|
|
|
- self._set_spinner(f"say {self.wake_words}")
|
|
|
- if self.spinner and self.halo:
|
|
|
- self.halo._interval = 500
|
|
|
elif new_state == "transcribing":
|
|
|
if self.on_transcription_start:
|
|
|
self.on_transcription_start()
|
|
@@ -2369,6 +2034,7 @@ class AudioToTextRecorder:
|
|
|
self.halo.stop()
|
|
|
self.halo = None
|
|
|
|
|
|
+
|
|
|
def _set_spinner(self, text):
|
|
|
"""
|
|
|
Update the spinner's text or create a new
|