|
@@ -1553,9 +1553,12 @@ class AudioToTextRecorder:
|
|
input for voice activity and accordingly starts/stops the recording.
|
|
input for voice activity and accordingly starts/stops the recording.
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Starting _recording_worker method')
|
|
logging.debug('Starting recording worker')
|
|
logging.debug('Starting recording worker')
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Entering try block')
|
|
try:
|
|
try:
|
|
|
|
+ logging.debug('Debug: Initializing variables')
|
|
time_since_last_buffer_message = 0
|
|
time_since_last_buffer_message = 0
|
|
was_recording = False
|
|
was_recording = False
|
|
delay_was_passed = False
|
|
delay_was_passed = False
|
|
@@ -1563,24 +1566,35 @@ class AudioToTextRecorder:
|
|
wakeword_samples_to_remove = None
|
|
wakeword_samples_to_remove = None
|
|
self.allowed_to_early_transcribe = True
|
|
self.allowed_to_early_transcribe = True
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Starting main loop')
|
|
# Continuously monitor audio for voice activity
|
|
# Continuously monitor audio for voice activity
|
|
while self.is_running:
|
|
while self.is_running:
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Entering inner try block')
|
|
try:
|
|
try:
|
|
|
|
+ logging.debug('Debug: Trying to get data from audio queue')
|
|
try:
|
|
try:
|
|
data = self.audio_queue.get(timeout=0.1)
|
|
data = self.audio_queue.get(timeout=0.1)
|
|
except queue.Empty:
|
|
except queue.Empty:
|
|
|
|
+ logging.debug('Debug: Queue is empty, checking if still running')
|
|
if not self.is_running:
|
|
if not self.is_running:
|
|
|
|
+ logging.debug('Debug: Not running, breaking loop')
|
|
break
|
|
break
|
|
|
|
+ logging.debug('Debug: Continuing to next iteration')
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking for on_recorded_chunk callback')
|
|
if self.on_recorded_chunk:
|
|
if self.on_recorded_chunk:
|
|
|
|
+ logging.debug('Debug: Calling on_recorded_chunk')
|
|
self.on_recorded_chunk(data)
|
|
self.on_recorded_chunk(data)
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if handle_buffer_overflow is True')
|
|
if self.handle_buffer_overflow:
|
|
if self.handle_buffer_overflow:
|
|
|
|
+ logging.debug('Debug: Handling buffer overflow')
|
|
# Handle queue overflow
|
|
# Handle queue overflow
|
|
if (self.audio_queue.qsize() >
|
|
if (self.audio_queue.qsize() >
|
|
self.allowed_latency_limit):
|
|
self.allowed_latency_limit):
|
|
|
|
+ logging.debug('Debug: Queue size exceeds limit, logging warnings')
|
|
logging.warning("!!! ### !!! ### !!!")
|
|
logging.warning("!!! ### !!! ### !!!")
|
|
logging.warning("Audio queue size exceeds "
|
|
logging.warning("Audio queue size exceeds "
|
|
"latency limit. Current size: "
|
|
"latency limit. Current size: "
|
|
@@ -1589,16 +1603,19 @@ class AudioToTextRecorder:
|
|
)
|
|
)
|
|
logging.warning("!!! ### !!! ### !!!")
|
|
logging.warning("!!! ### !!! ### !!!")
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Discarding old chunks if necessary')
|
|
while (self.audio_queue.qsize() >
|
|
while (self.audio_queue.qsize() >
|
|
self.allowed_latency_limit):
|
|
self.allowed_latency_limit):
|
|
|
|
|
|
data = self.audio_queue.get()
|
|
data = self.audio_queue.get()
|
|
|
|
|
|
except BrokenPipeError:
|
|
except BrokenPipeError:
|
|
|
|
+ logging.debug('Debug: Caught BrokenPipeError')
|
|
logging.error("BrokenPipeError _recording_worker")
|
|
logging.error("BrokenPipeError _recording_worker")
|
|
self.is_running = False
|
|
self.is_running = False
|
|
break
|
|
break
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Updating time_since_last_buffer_message')
|
|
# Feed the extracted data to the audio_queue
|
|
# Feed the extracted data to the audio_queue
|
|
if time_since_last_buffer_message:
|
|
if time_since_last_buffer_message:
|
|
time_passed = time.time() - time_since_last_buffer_message
|
|
time_passed = time.time() - time_since_last_buffer_message
|
|
@@ -1608,90 +1625,116 @@ class AudioToTextRecorder:
|
|
else:
|
|
else:
|
|
time_since_last_buffer_message = time.time()
|
|
time_since_last_buffer_message = time.time()
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Initializing failed_stop_attempt')
|
|
failed_stop_attempt = False
|
|
failed_stop_attempt = False
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if not recording')
|
|
if not self.is_recording:
|
|
if not self.is_recording:
|
|
|
|
+ logging.debug('Debug: Handling not recording state')
|
|
# Handle not recording state
|
|
# Handle not recording state
|
|
time_since_listen_start = (time.time() - self.listen_start
|
|
time_since_listen_start = (time.time() - self.listen_start
|
|
- if self.listen_start else 0)
|
|
|
|
|
|
+ if self.listen_start else 0)
|
|
|
|
|
|
wake_word_activation_delay_passed = (
|
|
wake_word_activation_delay_passed = (
|
|
time_since_listen_start >
|
|
time_since_listen_start >
|
|
self.wake_word_activation_delay
|
|
self.wake_word_activation_delay
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Handling wake-word timeout callback')
|
|
# Handle wake-word timeout callback
|
|
# Handle wake-word timeout callback
|
|
if wake_word_activation_delay_passed \
|
|
if wake_word_activation_delay_passed \
|
|
and not delay_was_passed:
|
|
and not delay_was_passed:
|
|
|
|
|
|
if self.use_wake_words and self.wake_word_activation_delay:
|
|
if self.use_wake_words and self.wake_word_activation_delay:
|
|
if self.on_wakeword_timeout:
|
|
if self.on_wakeword_timeout:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_timeout')
|
|
self.on_wakeword_timeout()
|
|
self.on_wakeword_timeout()
|
|
delay_was_passed = wake_word_activation_delay_passed
|
|
delay_was_passed = wake_word_activation_delay_passed
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Setting state and spinner text')
|
|
# Set state and spinner text
|
|
# Set state and spinner text
|
|
if not self.recording_stop_time:
|
|
if not self.recording_stop_time:
|
|
if self.use_wake_words \
|
|
if self.use_wake_words \
|
|
and wake_word_activation_delay_passed \
|
|
and wake_word_activation_delay_passed \
|
|
and not self.wakeword_detected:
|
|
and not self.wakeword_detected:
|
|
|
|
+ logging.debug('Debug: Setting state to "wakeword"')
|
|
self._set_state("wakeword")
|
|
self._set_state("wakeword")
|
|
else:
|
|
else:
|
|
if self.listen_start:
|
|
if self.listen_start:
|
|
|
|
+ logging.debug('Debug: Setting state to "listening"')
|
|
self._set_state("listening")
|
|
self._set_state("listening")
|
|
else:
|
|
else:
|
|
|
|
+ logging.debug('Debug: Setting state to "inactive"')
|
|
self._set_state("inactive")
|
|
self._set_state("inactive")
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking wake word conditions')
|
|
if self.use_wake_words and wake_word_activation_delay_passed:
|
|
if self.use_wake_words and wake_word_activation_delay_passed:
|
|
try:
|
|
try:
|
|
|
|
+ logging.debug('Debug: Processing wakeword')
|
|
wakeword_index = self._process_wakeword(data)
|
|
wakeword_index = self._process_wakeword(data)
|
|
|
|
|
|
except struct.error:
|
|
except struct.error:
|
|
|
|
+ logging.debug('Debug: Caught struct.error')
|
|
logging.error("Error unpacking audio data "
|
|
logging.error("Error unpacking audio data "
|
|
- "for wake word processing.")
|
|
|
|
|
|
+ "for wake word processing.")
|
|
continue
|
|
continue
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
|
+ logging.debug('Debug: Caught general exception')
|
|
logging.error(f"Wake word processing error: {e}")
|
|
logging.error(f"Wake word processing error: {e}")
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if wake word detected')
|
|
# If a wake word is detected
|
|
# If a wake word is detected
|
|
if wakeword_index >= 0:
|
|
if wakeword_index >= 0:
|
|
|
|
+ logging.debug('Debug: Wake word detected, updating variables')
|
|
self.wake_word_detect_time = time.time()
|
|
self.wake_word_detect_time = time.time()
|
|
wakeword_detected_time = time.time()
|
|
wakeword_detected_time = time.time()
|
|
wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
self.wakeword_detected = True
|
|
self.wakeword_detected = True
|
|
if self.on_wakeword_detected:
|
|
if self.on_wakeword_detected:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_detected')
|
|
self.on_wakeword_detected()
|
|
self.on_wakeword_detected()
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking voice activity conditions')
|
|
# Check for voice activity to
|
|
# Check for voice activity to
|
|
# trigger the start of recording
|
|
# trigger the start of recording
|
|
if ((not self.use_wake_words
|
|
if ((not self.use_wake_words
|
|
- or not wake_word_activation_delay_passed)
|
|
|
|
|
|
+ or not wake_word_activation_delay_passed)
|
|
and self.start_recording_on_voice_activity) \
|
|
and self.start_recording_on_voice_activity) \
|
|
or self.wakeword_detected:
|
|
or self.wakeword_detected:
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if voice is active')
|
|
if self._is_voice_active():
|
|
if self._is_voice_active():
|
|
|
|
+ logging.debug('Debug: Voice activity detected')
|
|
logging.info("voice activity detected")
|
|
logging.info("voice activity detected")
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Starting recording')
|
|
self.start()
|
|
self.start()
|
|
|
|
|
|
self.start_recording_on_voice_activity = False
|
|
self.start_recording_on_voice_activity = False
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Adding buffered audio to frames')
|
|
# Add the buffered audio
|
|
# Add the buffered audio
|
|
# to the recording frames
|
|
# to the recording frames
|
|
self.frames.extend(list(self.audio_buffer))
|
|
self.frames.extend(list(self.audio_buffer))
|
|
self.audio_buffer.clear()
|
|
self.audio_buffer.clear()
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Resetting Silero VAD model states')
|
|
self.silero_vad_model.reset_states()
|
|
self.silero_vad_model.reset_states()
|
|
else:
|
|
else:
|
|
|
|
+ logging.debug('Debug: Checking voice activity')
|
|
data_copy = data[:]
|
|
data_copy = data[:]
|
|
self._check_voice_activity(data_copy)
|
|
self._check_voice_activity(data_copy)
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Resetting speech_end_silence_start')
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
|
|
|
|
else:
|
|
else:
|
|
|
|
+ logging.debug('Debug: Handling recording state')
|
|
# If we are currently recording
|
|
# If we are currently recording
|
|
if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
|
|
+ logging.debug('Debug: Removing wakeword samples')
|
|
# Remove samples from the beginning of self.frames
|
|
# Remove samples from the beginning of self.frames
|
|
samples_removed = 0
|
|
samples_removed = 0
|
|
while wakeword_samples_to_remove > 0 and self.frames:
|
|
while wakeword_samples_to_remove > 0 and self.frames:
|
|
@@ -1708,20 +1751,25 @@ class AudioToTextRecorder:
|
|
|
|
|
|
wakeword_samples_to_remove = 0
|
|
wakeword_samples_to_remove = 0
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
|
|
# Stop the recording if silence is detected after speech
|
|
# Stop the recording if silence is detected after speech
|
|
if self.stop_recording_on_voice_deactivity:
|
|
if self.stop_recording_on_voice_deactivity:
|
|
|
|
+ logging.debug('Debug: Determining if speech is detected')
|
|
is_speech = (
|
|
is_speech = (
|
|
self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
else self._is_webrtc_speech(data, True)
|
|
else self._is_webrtc_speech(data, True)
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Formatting speech_end_silence_start')
|
|
if not self.speech_end_silence_start:
|
|
if not self.speech_end_silence_start:
|
|
str_speech_end_silence_start = "0"
|
|
str_speech_end_silence_start = "0"
|
|
else:
|
|
else:
|
|
str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if speech is not detected')
|
|
if not is_speech:
|
|
if not is_speech:
|
|
|
|
+ logging.debug('Debug: Handling voice deactivity')
|
|
# Voice deactivity was detected, so we start
|
|
# Voice deactivity was detected, so we start
|
|
# measuring silence time before stopping recording
|
|
# measuring silence time before stopping recording
|
|
if self.speech_end_silence_start == 0 and \
|
|
if self.speech_end_silence_start == 0 and \
|
|
@@ -1729,6 +1777,7 @@ class AudioToTextRecorder:
|
|
|
|
|
|
self.speech_end_silence_start = time.time()
|
|
self.speech_end_silence_start = time.time()
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking early transcription conditions')
|
|
if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
(time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
(time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
self.allowed_to_early_transcribe:
|
|
self.allowed_to_early_transcribe:
|
|
@@ -1740,45 +1789,57 @@ class AudioToTextRecorder:
|
|
self.allowed_to_early_transcribe = False
|
|
self.allowed_to_early_transcribe = False
|
|
|
|
|
|
else:
|
|
else:
|
|
|
|
+ logging.debug('Debug: Handling speech detection')
|
|
if self.speech_end_silence_start:
|
|
if self.speech_end_silence_start:
|
|
logging.info("Resetting self.speech_end_silence_start")
|
|
logging.info("Resetting self.speech_end_silence_start")
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
self.allowed_to_early_transcribe = True
|
|
self.allowed_to_early_transcribe = True
|
|
|
|
|
|
-
|
|
|
|
|
|
+ logging.debug('Debug: Checking if silence duration exceeds threshold')
|
|
# Wait for silence to stop recording after speech
|
|
# Wait for silence to stop recording after speech
|
|
if self.speech_end_silence_start and time.time() - \
|
|
if self.speech_end_silence_start and time.time() - \
|
|
self.speech_end_silence_start >= \
|
|
self.speech_end_silence_start >= \
|
|
self.post_speech_silence_duration:
|
|
self.post_speech_silence_duration:
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Formatting silence start time')
|
|
# Get time in desired format (HH:MM:SS.nnn)
|
|
# Get time in desired format (HH:MM:SS.nnn)
|
|
silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Calculating time difference')
|
|
# Calculate time difference
|
|
# Calculate time difference
|
|
time_diff = time.time() - self.speech_end_silence_start
|
|
time_diff = time.time() - self.speech_end_silence_start
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Logging voice deactivity detection')
|
|
logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
f"time since silence start: {time_diff:.3f} seconds")
|
|
f"time since silence start: {time_diff:.3f} seconds")
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Appending data to frames and stopping recording')
|
|
self.frames.append(data)
|
|
self.frames.append(data)
|
|
self.stop()
|
|
self.stop()
|
|
if not self.is_recording:
|
|
if not self.is_recording:
|
|
|
|
+ logging.debug('Debug: Resetting speech_end_silence_start')
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Handling non-wake word scenario')
|
|
if not self.use_wake_words:
|
|
if not self.use_wake_words:
|
|
self.listen_start = time.time()
|
|
self.listen_start = time.time()
|
|
self._set_state("listening")
|
|
self._set_state("listening")
|
|
self.start_recording_on_voice_activity = True
|
|
self.start_recording_on_voice_activity = True
|
|
else:
|
|
else:
|
|
|
|
+ logging.debug('Debug: Setting failed_stop_attempt to True')
|
|
failed_stop_attempt = True
|
|
failed_stop_attempt = True
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if recording stopped')
|
|
if not self.is_recording and was_recording:
|
|
if not self.is_recording and was_recording:
|
|
|
|
+ logging.debug('Debug: Resetting after stopping recording')
|
|
# Reset after stopping recording to ensure clean state
|
|
# Reset after stopping recording to ensure clean state
|
|
self.stop_recording_on_voice_deactivity = False
|
|
self.stop_recording_on_voice_deactivity = False
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking Silero time')
|
|
if time.time() - self.silero_check_time > 0.1:
|
|
if time.time() - self.silero_check_time > 0.1:
|
|
self.silero_check_time = 0
|
|
self.silero_check_time = 0
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Handling wake word timeout')
|
|
# Handle wake word timeout (waited to long initiating
|
|
# Handle wake word timeout (waited to long initiating
|
|
# speech after wake word detection)
|
|
# speech after wake word detection)
|
|
if self.wake_word_detect_time and time.time() - \
|
|
if self.wake_word_detect_time and time.time() - \
|
|
@@ -1786,22 +1847,286 @@ class AudioToTextRecorder:
|
|
|
|
|
|
self.wake_word_detect_time = 0
|
|
self.wake_word_detect_time = 0
|
|
if self.wakeword_detected and self.on_wakeword_timeout:
|
|
if self.wakeword_detected and self.on_wakeword_timeout:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_timeout')
|
|
self.on_wakeword_timeout()
|
|
self.on_wakeword_timeout()
|
|
self.wakeword_detected = False
|
|
self.wakeword_detected = False
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Updating was_recording')
|
|
was_recording = self.is_recording
|
|
was_recording = self.is_recording
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if recording and not failed stop attempt')
|
|
if self.is_recording and not failed_stop_attempt:
|
|
if self.is_recording and not failed_stop_attempt:
|
|
|
|
+ logging.debug('Debug: Appending data to frames')
|
|
self.frames.append(data)
|
|
self.frames.append(data)
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Checking if not recording or speech end silence start')
|
|
if not self.is_recording or self.speech_end_silence_start:
|
|
if not self.is_recording or self.speech_end_silence_start:
|
|
|
|
+ logging.debug('Debug: Appending data to audio buffer')
|
|
self.audio_buffer.append(data)
|
|
self.audio_buffer.append(data)
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
|
+ logging.debug('Debug: Caught exception in main try block')
|
|
if not self.interrupt_stop_event.is_set():
|
|
if not self.interrupt_stop_event.is_set():
|
|
logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
raise
|
|
raise
|
|
|
|
|
|
|
|
+ logging.debug('Debug: Exiting _recording_worker method')
|
|
|
|
+
|
|
|
|
+ # def _recording_worker(self):
|
|
|
|
+ # """
|
|
|
|
+ # The main worker method which constantly monitors the audio
|
|
|
|
+ # input for voice activity and accordingly starts/stops the recording.
|
|
|
|
+ # """
|
|
|
|
+
|
|
|
|
+ # logging.debug('Starting recording worker')
|
|
|
|
+
|
|
|
|
+ # try:
|
|
|
|
+ # time_since_last_buffer_message = 0
|
|
|
|
+ # was_recording = False
|
|
|
|
+ # delay_was_passed = False
|
|
|
|
+ # wakeword_detected_time = None
|
|
|
|
+ # wakeword_samples_to_remove = None
|
|
|
|
+ # self.allowed_to_early_transcribe = True
|
|
|
|
+
|
|
|
|
+ # # Continuously monitor audio for voice activity
|
|
|
|
+ # while self.is_running:
|
|
|
|
+
|
|
|
|
+ # try:
|
|
|
|
+ # try:
|
|
|
|
+ # data = self.audio_queue.get(timeout=0.1)
|
|
|
|
+ # except queue.Empty:
|
|
|
|
+ # if not self.is_running:
|
|
|
|
+ # break
|
|
|
|
+ # continue
|
|
|
|
+
|
|
|
|
+ # if self.on_recorded_chunk:
|
|
|
|
+ # self.on_recorded_chunk(data)
|
|
|
|
+
|
|
|
|
+ # if self.handle_buffer_overflow:
|
|
|
|
+ # # Handle queue overflow
|
|
|
|
+ # if (self.audio_queue.qsize() >
|
|
|
|
+ # self.allowed_latency_limit):
|
|
|
|
+ # logging.warning("!!! ### !!! ### !!!")
|
|
|
|
+ # logging.warning("Audio queue size exceeds "
|
|
|
|
+ # "latency limit. Current size: "
|
|
|
|
+ # f"{self.audio_queue.qsize()}. "
|
|
|
|
+ # "Discarding old audio chunks."
|
|
|
|
+ # )
|
|
|
|
+ # logging.warning("!!! ### !!! ### !!!")
|
|
|
|
+
|
|
|
|
+ # while (self.audio_queue.qsize() >
|
|
|
|
+ # self.allowed_latency_limit):
|
|
|
|
+
|
|
|
|
+ # data = self.audio_queue.get()
|
|
|
|
+
|
|
|
|
+ # except BrokenPipeError:
|
|
|
|
+ # logging.error("BrokenPipeError _recording_worker")
|
|
|
|
+ # self.is_running = False
|
|
|
|
+ # break
|
|
|
|
+
|
|
|
|
+ # # Feed the extracted data to the audio_queue
|
|
|
|
+ # if time_since_last_buffer_message:
|
|
|
|
+ # time_passed = time.time() - time_since_last_buffer_message
|
|
|
|
+ # if time_passed > 1:
|
|
|
|
+ # logging.debug("_recording_worker processing audio data")
|
|
|
|
+ # time_since_last_buffer_message = time.time()
|
|
|
|
+ # else:
|
|
|
|
+ # time_since_last_buffer_message = time.time()
|
|
|
|
+
|
|
|
|
+ # failed_stop_attempt = False
|
|
|
|
+
|
|
|
|
+ # if not self.is_recording:
|
|
|
|
+ # # Handle not recording state
|
|
|
|
+ # time_since_listen_start = (time.time() - self.listen_start
|
|
|
|
+ # if self.listen_start else 0)
|
|
|
|
+
|
|
|
|
+ # wake_word_activation_delay_passed = (
|
|
|
|
+ # time_since_listen_start >
|
|
|
|
+ # self.wake_word_activation_delay
|
|
|
|
+ # )
|
|
|
|
+
|
|
|
|
+ # # Handle wake-word timeout callback
|
|
|
|
+ # if wake_word_activation_delay_passed \
|
|
|
|
+ # and not delay_was_passed:
|
|
|
|
+
|
|
|
|
+ # if self.use_wake_words and self.wake_word_activation_delay:
|
|
|
|
+ # if self.on_wakeword_timeout:
|
|
|
|
+ # self.on_wakeword_timeout()
|
|
|
|
+ # delay_was_passed = wake_word_activation_delay_passed
|
|
|
|
+
|
|
|
|
+ # # Set state and spinner text
|
|
|
|
+ # if not self.recording_stop_time:
|
|
|
|
+ # if self.use_wake_words \
|
|
|
|
+ # and wake_word_activation_delay_passed \
|
|
|
|
+ # and not self.wakeword_detected:
|
|
|
|
+ # self._set_state("wakeword")
|
|
|
|
+ # else:
|
|
|
|
+ # if self.listen_start:
|
|
|
|
+ # self._set_state("listening")
|
|
|
|
+ # else:
|
|
|
|
+ # self._set_state("inactive")
|
|
|
|
+
|
|
|
|
+ # if self.use_wake_words and wake_word_activation_delay_passed:
|
|
|
|
+ # try:
|
|
|
|
+ # wakeword_index = self._process_wakeword(data)
|
|
|
|
+
|
|
|
|
+ # except struct.error:
|
|
|
|
+ # logging.error("Error unpacking audio data "
|
|
|
|
+ # "for wake word processing.")
|
|
|
|
+ # continue
|
|
|
|
+
|
|
|
|
+ # except Exception as e:
|
|
|
|
+ # logging.error(f"Wake word processing error: {e}")
|
|
|
|
+ # continue
|
|
|
|
+
|
|
|
|
+ # # If a wake word is detected
|
|
|
|
+ # if wakeword_index >= 0:
|
|
|
|
+ # self.wake_word_detect_time = time.time()
|
|
|
|
+ # wakeword_detected_time = time.time()
|
|
|
|
+ # wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
|
|
+ # self.wakeword_detected = True
|
|
|
|
+ # if self.on_wakeword_detected:
|
|
|
|
+ # self.on_wakeword_detected()
|
|
|
|
+
|
|
|
|
+ # # Check for voice activity to
|
|
|
|
+ # # trigger the start of recording
|
|
|
|
+ # if ((not self.use_wake_words
|
|
|
|
+ # or not wake_word_activation_delay_passed)
|
|
|
|
+ # and self.start_recording_on_voice_activity) \
|
|
|
|
+ # or self.wakeword_detected:
|
|
|
|
+
|
|
|
|
+ # if self._is_voice_active():
|
|
|
|
+ # logging.info("voice activity detected")
|
|
|
|
+
|
|
|
|
+ # self.start()
|
|
|
|
+
|
|
|
|
+ # self.start_recording_on_voice_activity = False
|
|
|
|
+
|
|
|
|
+ # # Add the buffered audio
|
|
|
|
+ # # to the recording frames
|
|
|
|
+ # self.frames.extend(list(self.audio_buffer))
|
|
|
|
+ # self.audio_buffer.clear()
|
|
|
|
+
|
|
|
|
+ # self.silero_vad_model.reset_states()
|
|
|
|
+ # else:
|
|
|
|
+ # data_copy = data[:]
|
|
|
|
+ # self._check_voice_activity(data_copy)
|
|
|
|
+
|
|
|
|
+ # self.speech_end_silence_start = 0
|
|
|
|
+
|
|
|
|
+ # else:
|
|
|
|
+ # # If we are currently recording
|
|
|
|
+ # if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
|
|
+ # # Remove samples from the beginning of self.frames
|
|
|
|
+ # samples_removed = 0
|
|
|
|
+ # while wakeword_samples_to_remove > 0 and self.frames:
|
|
|
|
+ # frame = self.frames[0]
|
|
|
|
+ # frame_samples = len(frame) // 2 # Assuming 16-bit audio
|
|
|
|
+ # if wakeword_samples_to_remove >= frame_samples:
|
|
|
|
+ # self.frames.pop(0)
|
|
|
|
+ # samples_removed += frame_samples
|
|
|
|
+ # wakeword_samples_to_remove -= frame_samples
|
|
|
|
+ # else:
|
|
|
|
+ # self.frames[0] = frame[wakeword_samples_to_remove * 2:]
|
|
|
|
+ # samples_removed += wakeword_samples_to_remove
|
|
|
|
+ # samples_to_remove = 0
|
|
|
|
+
|
|
|
|
+ # wakeword_samples_to_remove = 0
|
|
|
|
+
|
|
|
|
+ # # Stop the recording if silence is detected after speech
|
|
|
|
+ # if self.stop_recording_on_voice_deactivity:
|
|
|
|
+ # is_speech = (
|
|
|
|
+ # self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
|
|
+ # else self._is_webrtc_speech(data, True)
|
|
|
|
+ # )
|
|
|
|
+
|
|
|
|
+ # if not self.speech_end_silence_start:
|
|
|
|
+ # str_speech_end_silence_start = "0"
|
|
|
|
+ # else:
|
|
|
|
+ # str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
+ # logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
|
|
+
|
|
|
|
+ # if not is_speech:
|
|
|
|
+ # # Voice deactivity was detected, so we start
|
|
|
|
+ # # measuring silence time before stopping recording
|
|
|
|
+ # if self.speech_end_silence_start == 0 and \
|
|
|
|
+ # (time.time() - self.recording_start_time > self.min_length_of_recording):
|
|
|
|
+
|
|
|
|
+ # self.speech_end_silence_start = time.time()
|
|
|
|
+
|
|
|
|
+ # if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
|
|
+ # (time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
|
|
+ # self.allowed_to_early_transcribe:
|
|
|
|
+ # logging.debug("Adding early transcription request")
|
|
|
|
+ # self.transcribe_count += 1
|
|
|
|
+ # audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
|
|
|
|
+ # audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
|
|
|
|
+ # self.parent_transcription_pipe.send((audio, self.language))
|
|
|
|
+ # self.allowed_to_early_transcribe = False
|
|
|
|
+
|
|
|
|
+ # else:
|
|
|
|
+ # if self.speech_end_silence_start:
|
|
|
|
+ # logging.info("Resetting self.speech_end_silence_start")
|
|
|
|
+ # self.speech_end_silence_start = 0
|
|
|
|
+ # self.allowed_to_early_transcribe = True
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # # Wait for silence to stop recording after speech
|
|
|
|
+ # if self.speech_end_silence_start and time.time() - \
|
|
|
|
+ # self.speech_end_silence_start >= \
|
|
|
|
+ # self.post_speech_silence_duration:
|
|
|
|
+
|
|
|
|
+ # # Get time in desired format (HH:MM:SS.nnn)
|
|
|
|
+ # silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
+
|
|
|
|
+ # # Calculate time difference
|
|
|
|
+ # time_diff = time.time() - self.speech_end_silence_start
|
|
|
|
+
|
|
|
|
+ # logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
|
|
+ # f"time since silence start: {time_diff:.3f} seconds")
|
|
|
|
+
|
|
|
|
+ # self.frames.append(data)
|
|
|
|
+ # self.stop()
|
|
|
|
+ # if not self.is_recording:
|
|
|
|
+ # self.speech_end_silence_start = 0
|
|
|
|
+
|
|
|
|
+ # if not self.use_wake_words:
|
|
|
|
+ # self.listen_start = time.time()
|
|
|
|
+ # self._set_state("listening")
|
|
|
|
+ # self.start_recording_on_voice_activity = True
|
|
|
|
+ # else:
|
|
|
|
+ # failed_stop_attempt = True
|
|
|
|
+
|
|
|
|
+ # if not self.is_recording and was_recording:
|
|
|
|
+ # # Reset after stopping recording to ensure clean state
|
|
|
|
+ # self.stop_recording_on_voice_deactivity = False
|
|
|
|
+
|
|
|
|
+ # if time.time() - self.silero_check_time > 0.1:
|
|
|
|
+ # self.silero_check_time = 0
|
|
|
|
+
|
|
|
|
+ # # Handle wake word timeout (waited to long initiating
|
|
|
|
+ # # speech after wake word detection)
|
|
|
|
+ # if self.wake_word_detect_time and time.time() - \
|
|
|
|
+ # self.wake_word_detect_time > self.wake_word_timeout:
|
|
|
|
+
|
|
|
|
+ # self.wake_word_detect_time = 0
|
|
|
|
+ # if self.wakeword_detected and self.on_wakeword_timeout:
|
|
|
|
+ # self.on_wakeword_timeout()
|
|
|
|
+ # self.wakeword_detected = False
|
|
|
|
+
|
|
|
|
+ # was_recording = self.is_recording
|
|
|
|
+
|
|
|
|
+ # if self.is_recording and not failed_stop_attempt:
|
|
|
|
+ # self.frames.append(data)
|
|
|
|
+
|
|
|
|
+ # if not self.is_recording or self.speech_end_silence_start:
|
|
|
|
+ # self.audio_buffer.append(data)
|
|
|
|
+
|
|
|
|
+ # except Exception as e:
|
|
|
|
+ # if not self.interrupt_stop_event.is_set():
|
|
|
|
+ # logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
|
|
+ # raise
|
|
|
|
+
|
|
|
|
|
|
def _realtime_worker(self):
|
|
def _realtime_worker(self):
|
|
"""
|
|
"""
|