|
@@ -256,7 +256,8 @@ class AudioToTextRecorder:
|
|
print_transcription_time: bool = False,
|
|
print_transcription_time: bool = False,
|
|
early_transcription_on_silence: int = 0,
|
|
early_transcription_on_silence: int = 0,
|
|
allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
|
|
allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
|
|
- no_log_file: bool = False
|
|
|
|
|
|
+ no_log_file: bool = False,
|
|
|
|
+ use_extended_logging: bool = False,
|
|
):
|
|
):
|
|
"""
|
|
"""
|
|
Initializes an audio recorder and transcription
|
|
Initializes an audio recorder and transcription
|
|
@@ -453,6 +454,9 @@ class AudioToTextRecorder:
|
|
- allowed_latency_limit (int, default=100): Maximal amount of chunks
|
|
- allowed_latency_limit (int, default=100): Maximal amount of chunks
|
|
that can be unprocessed in queue before discarding chunks.
|
|
that can be unprocessed in queue before discarding chunks.
|
|
- no_log_file (bool, default=False): Skips writing of debug log file.
|
|
- no_log_file (bool, default=False): Skips writing of debug log file.
|
|
|
|
+ - use_extended_logging (bool, default=False): Writes extensive
|
|
|
|
+ log messages for the recording worker, that processes the audio
|
|
|
|
+ chunks.
|
|
|
|
|
|
Raises:
|
|
Raises:
|
|
Exception: Errors related to initializing transcription
|
|
Exception: Errors related to initializing transcription
|
|
@@ -543,9 +547,11 @@ class AudioToTextRecorder:
|
|
self.detected_realtime_language = None
|
|
self.detected_realtime_language = None
|
|
self.detected_realtime_language_probability = 0
|
|
self.detected_realtime_language_probability = 0
|
|
self.transcription_lock = threading.Lock()
|
|
self.transcription_lock = threading.Lock()
|
|
|
|
+ self.shutdown_lock = threading.Lock()
|
|
self.transcribe_count = 0
|
|
self.transcribe_count = 0
|
|
self.print_transcription_time = print_transcription_time
|
|
self.print_transcription_time = print_transcription_time
|
|
self.early_transcription_on_silence = early_transcription_on_silence
|
|
self.early_transcription_on_silence = early_transcription_on_silence
|
|
|
|
+ self.use_extended_logging = use_extended_logging
|
|
|
|
|
|
# Initialize the logging configuration with the specified level
|
|
# Initialize the logging configuration with the specified level
|
|
log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
|
|
log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
|
|
@@ -854,136 +860,6 @@ class AudioToTextRecorder:
|
|
def _transcription_worker(*args, **kwargs):
|
|
def _transcription_worker(*args, **kwargs):
|
|
worker = TranscriptionWorker(*args, **kwargs)
|
|
worker = TranscriptionWorker(*args, **kwargs)
|
|
worker.run()
|
|
worker.run()
|
|
- # @staticmethod
|
|
|
|
- # def _transcription_worker(conn,
|
|
|
|
- # stdout_pipe,
|
|
|
|
- # model_path,
|
|
|
|
- # compute_type,
|
|
|
|
- # gpu_device_index,
|
|
|
|
- # device,
|
|
|
|
- # ready_event,
|
|
|
|
- # shutdown_event,
|
|
|
|
- # interrupt_stop_event,
|
|
|
|
- # beam_size,
|
|
|
|
- # initial_prompt,
|
|
|
|
- # suppress_tokens
|
|
|
|
- # ):
|
|
|
|
- # """
|
|
|
|
- # Worker method that handles the continuous
|
|
|
|
- # process of transcribing audio data.
|
|
|
|
-
|
|
|
|
- # This method runs in a separate process and is responsible for:
|
|
|
|
- # - Initializing the `faster_whisper` model used for transcription.
|
|
|
|
- # - Receiving audio data sent through a pipe and using the model
|
|
|
|
- # to transcribe it.
|
|
|
|
- # - Sending transcription results back through the pipe.
|
|
|
|
- # - Continuously checking for a shutdown event to gracefully
|
|
|
|
- # terminate the transcription process.
|
|
|
|
-
|
|
|
|
- # Args:
|
|
|
|
- # conn (multiprocessing.Connection): The connection endpoint used
|
|
|
|
- # for receiving audio data and sending transcription results.
|
|
|
|
- # model_path (str): The path to the pre-trained faster_whisper model
|
|
|
|
- # for transcription.
|
|
|
|
- # compute_type (str): Specifies the type of computation to be used
|
|
|
|
- # for transcription.
|
|
|
|
- # gpu_device_index (int): Device ID to use.
|
|
|
|
- # device (str): Device for model to use.
|
|
|
|
- # ready_event (threading.Event): An event that is set when the
|
|
|
|
- # transcription model is successfully initialized and ready.
|
|
|
|
- # shutdown_event (threading.Event): An event that, when set,
|
|
|
|
- # signals this worker method to terminate.
|
|
|
|
- # interrupt_stop_event (threading.Event): An event that, when set,
|
|
|
|
- # signals this worker method to stop processing audio data.
|
|
|
|
- # beam_size (int): The beam size to use for beam search decoding.
|
|
|
|
- # initial_prompt (str or iterable of int): Initial prompt to be fed
|
|
|
|
- # to the transcription model.
|
|
|
|
- # suppress_tokens (list of int): Tokens to be suppressed from the
|
|
|
|
- # transcription output.
|
|
|
|
- # Raises:
|
|
|
|
- # Exception: If there is an error while initializing the
|
|
|
|
- # transcription model.
|
|
|
|
- # """
|
|
|
|
-
|
|
|
|
- # system_signal.signal(system_signal.SIGINT, system_signal.SIG_IGN)
|
|
|
|
-
|
|
|
|
- # def custom_print(*args, **kwargs):
|
|
|
|
- # message = ' '.join(map(str, args))
|
|
|
|
- # try:
|
|
|
|
- # stdout_pipe.send(message)
|
|
|
|
- # except (BrokenPipeError, EOFError, OSError):
|
|
|
|
- # # The pipe probably has been closed, so we ignore the error
|
|
|
|
- # pass
|
|
|
|
-
|
|
|
|
- # # Replace the built-in print function with our custom one
|
|
|
|
- # __builtins__['print'] = custom_print
|
|
|
|
-
|
|
|
|
- # logging.info("Initializing faster_whisper "
|
|
|
|
- # f"main transcription model {model_path}"
|
|
|
|
- # )
|
|
|
|
-
|
|
|
|
- # try:
|
|
|
|
- # model = faster_whisper.WhisperModel(
|
|
|
|
- # model_size_or_path=model_path,
|
|
|
|
- # device=device,
|
|
|
|
- # compute_type=compute_type,
|
|
|
|
- # device_index=gpu_device_index,
|
|
|
|
- # )
|
|
|
|
-
|
|
|
|
- # except Exception as e:
|
|
|
|
- # logging.exception("Error initializing main "
|
|
|
|
- # f"faster_whisper transcription model: {e}"
|
|
|
|
- # )
|
|
|
|
- # raise
|
|
|
|
-
|
|
|
|
- # ready_event.set()
|
|
|
|
-
|
|
|
|
- # logging.debug("Faster_whisper main speech to text "
|
|
|
|
- # "transcription model initialized successfully"
|
|
|
|
- # )
|
|
|
|
-
|
|
|
|
- # try:
|
|
|
|
- # while not shutdown_event.is_set():
|
|
|
|
- # try:
|
|
|
|
- # if conn.poll(0.01):
|
|
|
|
- # logging.debug("Receive from _transcription_worker pipe")
|
|
|
|
- # audio, language = conn.recv()
|
|
|
|
- # try:
|
|
|
|
- # segments, info = model.transcribe(
|
|
|
|
- # audio,
|
|
|
|
- # language=language if language else None,
|
|
|
|
- # beam_size=beam_size,
|
|
|
|
- # initial_prompt=initial_prompt,
|
|
|
|
- # suppress_tokens=suppress_tokens
|
|
|
|
- # )
|
|
|
|
- # transcription = " ".join(seg.text for seg in segments)
|
|
|
|
- # transcription = transcription.strip()
|
|
|
|
- # logging.debug(f"Final text detected with main model: {transcription}")
|
|
|
|
- # conn.send(('success', (transcription, info)))
|
|
|
|
- # except Exception as e:
|
|
|
|
- # logging.error(f"General error in _transcription_worker in transcription: {e}")
|
|
|
|
- # conn.send(('error', str(e)))
|
|
|
|
- # else:
|
|
|
|
- # time.sleep(TIME_SLEEP)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- # except KeyboardInterrupt:
|
|
|
|
- # interrupt_stop_event.set()
|
|
|
|
-
|
|
|
|
- # logging.debug("Transcription worker process "
|
|
|
|
- # "finished due to KeyboardInterrupt"
|
|
|
|
- # )
|
|
|
|
- # stdout_pipe.close()
|
|
|
|
- # break
|
|
|
|
-
|
|
|
|
- # except Exception as e:
|
|
|
|
- # logging.error(f"General error in _transcription_worker in accessing pipe: {e}")
|
|
|
|
-
|
|
|
|
- # finally:
|
|
|
|
- # __builtins__['print'] = print # Restore the original print function
|
|
|
|
- # conn.close()
|
|
|
|
- # stdout_pipe.close()
|
|
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
def _audio_data_worker(audio_queue,
|
|
def _audio_data_worker(audio_queue,
|
|
@@ -1438,46 +1314,6 @@ class AudioToTextRecorder:
|
|
else:
|
|
else:
|
|
return self.transcribe()
|
|
return self.transcribe()
|
|
|
|
|
|
- # def text(self,
|
|
|
|
- # on_transcription_finished=None,
|
|
|
|
- # ):
|
|
|
|
- # """
|
|
|
|
- # Transcribes audio captured by this class instance
|
|
|
|
- # using the `faster_whisper` model.
|
|
|
|
-
|
|
|
|
- # - Automatically starts recording upon voice activity if not manually
|
|
|
|
- # started using `recorder.start()`.
|
|
|
|
- # - Automatically stops recording upon voice deactivity if not manually
|
|
|
|
- # stopped with `recorder.stop()`.
|
|
|
|
- # - Processes the recorded audio to generate transcription.
|
|
|
|
-
|
|
|
|
- # Args:
|
|
|
|
- # on_transcription_finished (callable, optional): Callback function
|
|
|
|
- # to be executed when transcription is ready.
|
|
|
|
- # If provided, transcription will be performed asynchronously, and
|
|
|
|
- # the callback will receive the transcription as its argument.
|
|
|
|
- # If omitted, the transcription will be performed synchronously,
|
|
|
|
- # and the result will be returned.
|
|
|
|
-
|
|
|
|
- # Returns (if not callback is set):
|
|
|
|
- # str: The transcription of the recorded audio
|
|
|
|
- # """
|
|
|
|
-
|
|
|
|
- # self.interrupt_stop_event.clear()
|
|
|
|
- # self.was_interrupted.clear()
|
|
|
|
-
|
|
|
|
- # self.wait_audio()
|
|
|
|
-
|
|
|
|
- # if self.is_shut_down or self.interrupt_stop_event.is_set():
|
|
|
|
- # if self.interrupt_stop_event.is_set():
|
|
|
|
- # self.was_interrupted.set()
|
|
|
|
- # return ""
|
|
|
|
-
|
|
|
|
- # if on_transcription_finished:
|
|
|
|
- # threading.Thread(target=on_transcription_finished,
|
|
|
|
- # args=(self.transcribe(),)).start()
|
|
|
|
- # else:
|
|
|
|
- # return self.transcribe()
|
|
|
|
|
|
|
|
def start(self):
|
|
def start(self):
|
|
"""
|
|
"""
|
|
@@ -1594,54 +1430,58 @@ class AudioToTextRecorder:
|
|
recording worker and closing the audio stream.
|
|
recording worker and closing the audio stream.
|
|
"""
|
|
"""
|
|
|
|
|
|
- print("RealtimeSTT shutting down")
|
|
|
|
- logging.debug("RealtimeSTT shutting down")
|
|
|
|
|
|
+ with self.shutdown_lock:
|
|
|
|
+ if self.is_shut_down:
|
|
|
|
+ return
|
|
|
|
|
|
- # Force wait_audio() and text() to exit
|
|
|
|
- self.is_shut_down = True
|
|
|
|
- self.start_recording_event.set()
|
|
|
|
- self.stop_recording_event.set()
|
|
|
|
|
|
+ print("\033[91mRealtimeSTT shutting down\033[0m")
|
|
|
|
+ # logging.debug("RealtimeSTT shutting down")
|
|
|
|
|
|
- self.shutdown_event.set()
|
|
|
|
- self.is_recording = False
|
|
|
|
- self.is_running = False
|
|
|
|
|
|
+ # Force wait_audio() and text() to exit
|
|
|
|
+ self.is_shut_down = True
|
|
|
|
+ self.start_recording_event.set()
|
|
|
|
+ self.stop_recording_event.set()
|
|
|
|
|
|
- logging.debug('Finishing recording thread')
|
|
|
|
- if self.recording_thread:
|
|
|
|
- self.recording_thread.join()
|
|
|
|
|
|
+ self.shutdown_event.set()
|
|
|
|
+ self.is_recording = False
|
|
|
|
+ self.is_running = False
|
|
|
|
|
|
- logging.debug('Terminating reader process')
|
|
|
|
|
|
+ logging.debug('Finishing recording thread')
|
|
|
|
+ if self.recording_thread:
|
|
|
|
+ self.recording_thread.join()
|
|
|
|
|
|
- # Give it some time to finish the loop and cleanup.
|
|
|
|
- if self.use_microphone:
|
|
|
|
- self.reader_process.join(timeout=10)
|
|
|
|
|
|
+ logging.debug('Terminating reader process')
|
|
|
|
|
|
- if self.reader_process.is_alive():
|
|
|
|
- logging.warning("Reader process did not terminate "
|
|
|
|
- "in time. Terminating forcefully."
|
|
|
|
- )
|
|
|
|
- self.reader_process.terminate()
|
|
|
|
|
|
+ # Give it some time to finish the loop and cleanup.
|
|
|
|
+ if self.use_microphone:
|
|
|
|
+ self.reader_process.join(timeout=10)
|
|
|
|
|
|
- logging.debug('Terminating transcription process')
|
|
|
|
- self.transcript_process.join(timeout=10)
|
|
|
|
|
|
+ if self.reader_process.is_alive():
|
|
|
|
+ logging.warning("Reader process did not terminate "
|
|
|
|
+ "in time. Terminating forcefully."
|
|
|
|
+ )
|
|
|
|
+ self.reader_process.terminate()
|
|
|
|
|
|
- if self.transcript_process.is_alive():
|
|
|
|
- logging.warning("Transcript process did not terminate "
|
|
|
|
- "in time. Terminating forcefully."
|
|
|
|
- )
|
|
|
|
- self.transcript_process.terminate()
|
|
|
|
|
|
+ logging.debug('Terminating transcription process')
|
|
|
|
+ self.transcript_process.join(timeout=10)
|
|
|
|
+
|
|
|
|
+ if self.transcript_process.is_alive():
|
|
|
|
+ logging.warning("Transcript process did not terminate "
|
|
|
|
+ "in time. Terminating forcefully."
|
|
|
|
+ )
|
|
|
|
+ self.transcript_process.terminate()
|
|
|
|
|
|
- self.parent_transcription_pipe.close()
|
|
|
|
|
|
+ self.parent_transcription_pipe.close()
|
|
|
|
|
|
- logging.debug('Finishing realtime thread')
|
|
|
|
- if self.realtime_thread:
|
|
|
|
- self.realtime_thread.join()
|
|
|
|
|
|
+ logging.debug('Finishing realtime thread')
|
|
|
|
+ if self.realtime_thread:
|
|
|
|
+ self.realtime_thread.join()
|
|
|
|
|
|
- if self.enable_realtime_transcription:
|
|
|
|
- if self.realtime_model_type:
|
|
|
|
- del self.realtime_model_type
|
|
|
|
- self.realtime_model_type = None
|
|
|
|
- gc.collect()
|
|
|
|
|
|
+ if self.enable_realtime_transcription:
|
|
|
|
+ if self.realtime_model_type:
|
|
|
|
+ del self.realtime_model_type
|
|
|
|
+ self.realtime_model_type = None
|
|
|
|
+ gc.collect()
|
|
|
|
|
|
def _recording_worker(self):
|
|
def _recording_worker(self):
|
|
"""
|
|
"""
|
|
@@ -1649,14 +1489,13 @@ class AudioToTextRecorder:
|
|
input for voice activity and accordingly starts/stops the recording.
|
|
input for voice activity and accordingly starts/stops the recording.
|
|
"""
|
|
"""
|
|
|
|
|
|
- logging.debug('Debug: Starting _recording_worker method')
|
|
|
|
- logging.debug('Starting recording worker')
|
|
|
|
-
|
|
|
|
- logging.debug('Debug: Entering try block')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Entering try block')
|
|
|
|
|
|
last_inner_try_time = 0
|
|
last_inner_try_time = 0
|
|
try:
|
|
try:
|
|
- logging.debug('Debug: Initializing variables')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Initializing variables')
|
|
time_since_last_buffer_message = 0
|
|
time_since_last_buffer_message = 0
|
|
was_recording = False
|
|
was_recording = False
|
|
delay_was_passed = False
|
|
delay_was_passed = False
|
|
@@ -1664,76 +1503,91 @@ class AudioToTextRecorder:
|
|
wakeword_samples_to_remove = None
|
|
wakeword_samples_to_remove = None
|
|
self.allowed_to_early_transcribe = True
|
|
self.allowed_to_early_transcribe = True
|
|
|
|
|
|
- logging.debug('Debug: Starting main loop')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Starting main loop')
|
|
# Continuously monitor audio for voice activity
|
|
# Continuously monitor audio for voice activity
|
|
while self.is_running:
|
|
while self.is_running:
|
|
|
|
|
|
- logging.debug('Debug: Entering inner try block')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Entering inner try block')
|
|
if last_inner_try_time:
|
|
if last_inner_try_time:
|
|
last_processing_time = time.time() - last_inner_try_time
|
|
last_processing_time = time.time() - last_inner_try_time
|
|
- if last_processing_time > 0.05:
|
|
|
|
- logging.warning('### WARNING: PROCESSING TOOK TOO LONG')
|
|
|
|
|
|
+ if last_processing_time > 0.1:
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.warning('### WARNING: PROCESSING TOOK TOO LONG')
|
|
last_inner_try_time = time.time()
|
|
last_inner_try_time = time.time()
|
|
try:
|
|
try:
|
|
- logging.debug('Debug: Trying to get data from audio queue')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Trying to get data from audio queue')
|
|
try:
|
|
try:
|
|
- data = self.audio_queue.get(timeout=0.02)
|
|
|
|
|
|
+ data = self.audio_queue.get(timeout=0.01)
|
|
except queue.Empty:
|
|
except queue.Empty:
|
|
- logging.debug('Debug: Queue is empty, checking if still running')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Queue is empty, checking if still running')
|
|
if not self.is_running:
|
|
if not self.is_running:
|
|
- logging.debug('Debug: Not running, breaking loop')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Not running, breaking loop')
|
|
break
|
|
break
|
|
- logging.debug('Debug: Continuing to next iteration')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Continuing to next iteration')
|
|
continue
|
|
continue
|
|
|
|
|
|
- logging.debug('Debug: Checking for on_recorded_chunk callback')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking for on_recorded_chunk callback')
|
|
if self.on_recorded_chunk:
|
|
if self.on_recorded_chunk:
|
|
- logging.debug('Debug: Calling on_recorded_chunk')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Calling on_recorded_chunk')
|
|
self.on_recorded_chunk(data)
|
|
self.on_recorded_chunk(data)
|
|
|
|
|
|
- logging.debug('Debug: Checking if handle_buffer_overflow is True')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if handle_buffer_overflow is True')
|
|
if self.handle_buffer_overflow:
|
|
if self.handle_buffer_overflow:
|
|
- logging.debug('Debug: Handling buffer overflow')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling buffer overflow')
|
|
# Handle queue overflow
|
|
# Handle queue overflow
|
|
if (self.audio_queue.qsize() >
|
|
if (self.audio_queue.qsize() >
|
|
self.allowed_latency_limit):
|
|
self.allowed_latency_limit):
|
|
- logging.debug('Debug: Queue size exceeds limit, logging warnings')
|
|
|
|
- logging.warning("!!! ### !!! ### !!!")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Queue size exceeds limit, logging warnings')
|
|
logging.warning("Audio queue size exceeds "
|
|
logging.warning("Audio queue size exceeds "
|
|
"latency limit. Current size: "
|
|
"latency limit. Current size: "
|
|
f"{self.audio_queue.qsize()}. "
|
|
f"{self.audio_queue.qsize()}. "
|
|
"Discarding old audio chunks."
|
|
"Discarding old audio chunks."
|
|
)
|
|
)
|
|
- logging.warning("!!! ### !!! ### !!!")
|
|
|
|
|
|
|
|
- logging.debug('Debug: Discarding old chunks if necessary')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Discarding old chunks if necessary')
|
|
while (self.audio_queue.qsize() >
|
|
while (self.audio_queue.qsize() >
|
|
self.allowed_latency_limit):
|
|
self.allowed_latency_limit):
|
|
|
|
|
|
data = self.audio_queue.get()
|
|
data = self.audio_queue.get()
|
|
|
|
|
|
except BrokenPipeError:
|
|
except BrokenPipeError:
|
|
- logging.debug('Debug: Caught BrokenPipeError')
|
|
|
|
logging.error("BrokenPipeError _recording_worker")
|
|
logging.error("BrokenPipeError _recording_worker")
|
|
self.is_running = False
|
|
self.is_running = False
|
|
break
|
|
break
|
|
|
|
|
|
- logging.debug('Debug: Updating time_since_last_buffer_message')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Updating time_since_last_buffer_message')
|
|
# Feed the extracted data to the audio_queue
|
|
# Feed the extracted data to the audio_queue
|
|
if time_since_last_buffer_message:
|
|
if time_since_last_buffer_message:
|
|
time_passed = time.time() - time_since_last_buffer_message
|
|
time_passed = time.time() - time_since_last_buffer_message
|
|
if time_passed > 1:
|
|
if time_passed > 1:
|
|
- logging.debug("_recording_worker processing audio data")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug("_recording_worker processing audio data")
|
|
time_since_last_buffer_message = time.time()
|
|
time_since_last_buffer_message = time.time()
|
|
else:
|
|
else:
|
|
time_since_last_buffer_message = time.time()
|
|
time_since_last_buffer_message = time.time()
|
|
|
|
|
|
- logging.debug('Debug: Initializing failed_stop_attempt')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Initializing failed_stop_attempt')
|
|
failed_stop_attempt = False
|
|
failed_stop_attempt = False
|
|
|
|
|
|
- logging.debug('Debug: Checking if not recording')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if not recording')
|
|
if not self.is_recording:
|
|
if not self.is_recording:
|
|
- logging.debug('Debug: Handling not recording state')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling not recording state')
|
|
# Handle not recording state
|
|
# Handle not recording state
|
|
time_since_listen_start = (time.time() - self.listen_start
|
|
time_since_listen_start = (time.time() - self.listen_start
|
|
if self.listen_start else 0)
|
|
if self.listen_start else 0)
|
|
@@ -1743,63 +1597,73 @@ class AudioToTextRecorder:
|
|
self.wake_word_activation_delay
|
|
self.wake_word_activation_delay
|
|
)
|
|
)
|
|
|
|
|
|
- logging.debug('Debug: Handling wake-word timeout callback')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling wake-word timeout callback')
|
|
# Handle wake-word timeout callback
|
|
# Handle wake-word timeout callback
|
|
if wake_word_activation_delay_passed \
|
|
if wake_word_activation_delay_passed \
|
|
and not delay_was_passed:
|
|
and not delay_was_passed:
|
|
|
|
|
|
if self.use_wake_words and self.wake_word_activation_delay:
|
|
if self.use_wake_words and self.wake_word_activation_delay:
|
|
if self.on_wakeword_timeout:
|
|
if self.on_wakeword_timeout:
|
|
- logging.debug('Debug: Calling on_wakeword_timeout')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_timeout')
|
|
self.on_wakeword_timeout()
|
|
self.on_wakeword_timeout()
|
|
delay_was_passed = wake_word_activation_delay_passed
|
|
delay_was_passed = wake_word_activation_delay_passed
|
|
|
|
|
|
- logging.debug('Debug: Setting state and spinner text')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Setting state and spinner text')
|
|
# Set state and spinner text
|
|
# Set state and spinner text
|
|
if not self.recording_stop_time:
|
|
if not self.recording_stop_time:
|
|
if self.use_wake_words \
|
|
if self.use_wake_words \
|
|
and wake_word_activation_delay_passed \
|
|
and wake_word_activation_delay_passed \
|
|
and not self.wakeword_detected:
|
|
and not self.wakeword_detected:
|
|
- logging.debug('Debug: Setting state to "wakeword"')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Setting state to "wakeword"')
|
|
self._set_state("wakeword")
|
|
self._set_state("wakeword")
|
|
else:
|
|
else:
|
|
if self.listen_start:
|
|
if self.listen_start:
|
|
- logging.debug('Debug: Setting state to "listening"')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Setting state to "listening"')
|
|
self._set_state("listening")
|
|
self._set_state("listening")
|
|
else:
|
|
else:
|
|
- logging.debug('Debug: Setting state to "inactive"')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Setting state to "inactive"')
|
|
self._set_state("inactive")
|
|
self._set_state("inactive")
|
|
|
|
|
|
- logging.debug('Debug: Checking wake word conditions')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking wake word conditions')
|
|
if self.use_wake_words and wake_word_activation_delay_passed:
|
|
if self.use_wake_words and wake_word_activation_delay_passed:
|
|
try:
|
|
try:
|
|
- logging.debug('Debug: Processing wakeword')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Processing wakeword')
|
|
wakeword_index = self._process_wakeword(data)
|
|
wakeword_index = self._process_wakeword(data)
|
|
|
|
|
|
except struct.error:
|
|
except struct.error:
|
|
- logging.debug('Debug: Caught struct.error')
|
|
|
|
logging.error("Error unpacking audio data "
|
|
logging.error("Error unpacking audio data "
|
|
"for wake word processing.")
|
|
"for wake word processing.")
|
|
continue
|
|
continue
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- logging.debug('Debug: Caught general exception')
|
|
|
|
logging.error(f"Wake word processing error: {e}")
|
|
logging.error(f"Wake word processing error: {e}")
|
|
continue
|
|
continue
|
|
|
|
|
|
- logging.debug('Debug: Checking if wake word detected')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if wake word detected')
|
|
# If a wake word is detected
|
|
# If a wake word is detected
|
|
if wakeword_index >= 0:
|
|
if wakeword_index >= 0:
|
|
- logging.debug('Debug: Wake word detected, updating variables')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Wake word detected, updating variables')
|
|
self.wake_word_detect_time = time.time()
|
|
self.wake_word_detect_time = time.time()
|
|
wakeword_detected_time = time.time()
|
|
wakeword_detected_time = time.time()
|
|
wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
self.wakeword_detected = True
|
|
self.wakeword_detected = True
|
|
if self.on_wakeword_detected:
|
|
if self.on_wakeword_detected:
|
|
- logging.debug('Debug: Calling on_wakeword_detected')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_detected')
|
|
self.on_wakeword_detected()
|
|
self.on_wakeword_detected()
|
|
|
|
|
|
- logging.debug('Debug: Checking voice activity conditions')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking voice activity conditions')
|
|
# Check for voice activity to
|
|
# Check for voice activity to
|
|
# trigger the start of recording
|
|
# trigger the start of recording
|
|
if ((not self.use_wake_words
|
|
if ((not self.use_wake_words
|
|
@@ -1807,37 +1671,46 @@ class AudioToTextRecorder:
|
|
and self.start_recording_on_voice_activity) \
|
|
and self.start_recording_on_voice_activity) \
|
|
or self.wakeword_detected:
|
|
or self.wakeword_detected:
|
|
|
|
|
|
- logging.debug('Debug: Checking if voice is active')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if voice is active')
|
|
if self._is_voice_active():
|
|
if self._is_voice_active():
|
|
- logging.debug('Debug: Voice activity detected')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Voice activity detected')
|
|
logging.info("voice activity detected")
|
|
logging.info("voice activity detected")
|
|
|
|
|
|
- logging.debug('Debug: Starting recording')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Starting recording')
|
|
self.start()
|
|
self.start()
|
|
|
|
|
|
self.start_recording_on_voice_activity = False
|
|
self.start_recording_on_voice_activity = False
|
|
|
|
|
|
- logging.debug('Debug: Adding buffered audio to frames')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Adding buffered audio to frames')
|
|
# Add the buffered audio
|
|
# Add the buffered audio
|
|
# to the recording frames
|
|
# to the recording frames
|
|
self.frames.extend(list(self.audio_buffer))
|
|
self.frames.extend(list(self.audio_buffer))
|
|
self.audio_buffer.clear()
|
|
self.audio_buffer.clear()
|
|
|
|
|
|
- logging.debug('Debug: Resetting Silero VAD model states')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Resetting Silero VAD model states')
|
|
self.silero_vad_model.reset_states()
|
|
self.silero_vad_model.reset_states()
|
|
else:
|
|
else:
|
|
- logging.debug('Debug: Checking voice activity')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking voice activity')
|
|
data_copy = data[:]
|
|
data_copy = data[:]
|
|
self._check_voice_activity(data_copy)
|
|
self._check_voice_activity(data_copy)
|
|
|
|
|
|
- logging.debug('Debug: Resetting speech_end_silence_start')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Resetting speech_end_silence_start')
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
|
|
|
|
else:
|
|
else:
|
|
- logging.debug('Debug: Handling recording state')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling recording state')
|
|
# If we are currently recording
|
|
# If we are currently recording
|
|
if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
- logging.debug('Debug: Removing wakeword samples')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Removing wakeword samples')
|
|
# Remove samples from the beginning of self.frames
|
|
# Remove samples from the beginning of self.frames
|
|
samples_removed = 0
|
|
samples_removed = 0
|
|
while wakeword_samples_to_remove > 0 and self.frames:
|
|
while wakeword_samples_to_remove > 0 and self.frames:
|
|
@@ -1854,25 +1727,31 @@ class AudioToTextRecorder:
|
|
|
|
|
|
wakeword_samples_to_remove = 0
|
|
wakeword_samples_to_remove = 0
|
|
|
|
|
|
- logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
|
|
# Stop the recording if silence is detected after speech
|
|
# Stop the recording if silence is detected after speech
|
|
if self.stop_recording_on_voice_deactivity:
|
|
if self.stop_recording_on_voice_deactivity:
|
|
- logging.debug('Debug: Determining if speech is detected')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Determining if speech is detected')
|
|
is_speech = (
|
|
is_speech = (
|
|
self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
else self._is_webrtc_speech(data, True)
|
|
else self._is_webrtc_speech(data, True)
|
|
)
|
|
)
|
|
|
|
|
|
- logging.debug('Debug: Formatting speech_end_silence_start')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Formatting speech_end_silence_start')
|
|
if not self.speech_end_silence_start:
|
|
if not self.speech_end_silence_start:
|
|
str_speech_end_silence_start = "0"
|
|
str_speech_end_silence_start = "0"
|
|
else:
|
|
else:
|
|
str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
- logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
|
|
|
|
- logging.debug('Debug: Checking if speech is not detected')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if speech is not detected')
|
|
if not is_speech:
|
|
if not is_speech:
|
|
- logging.debug('Debug: Handling voice deactivity')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling voice deactivity')
|
|
# Voice deactivity was detected, so we start
|
|
# Voice deactivity was detected, so we start
|
|
# measuring silence time before stopping recording
|
|
# measuring silence time before stopping recording
|
|
if self.speech_end_silence_start == 0 and \
|
|
if self.speech_end_silence_start == 0 and \
|
|
@@ -1880,71 +1759,89 @@ class AudioToTextRecorder:
|
|
|
|
|
|
self.speech_end_silence_start = time.time()
|
|
self.speech_end_silence_start = time.time()
|
|
|
|
|
|
- logging.debug('Debug: Checking early transcription conditions')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking early transcription conditions')
|
|
if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
(time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
(time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
self.allowed_to_early_transcribe:
|
|
self.allowed_to_early_transcribe:
|
|
- logging.debug("Debug:Adding early transcription request")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug("Debug:Adding early transcription request")
|
|
self.transcribe_count += 1
|
|
self.transcribe_count += 1
|
|
audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
|
|
audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
|
|
audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
|
|
audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
|
|
- logging.debug("Debug: early transcription request pipe send")
|
|
|
|
|
|
+
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug("Debug: early transcription request pipe send")
|
|
self.parent_transcription_pipe.send((audio, self.language))
|
|
self.parent_transcription_pipe.send((audio, self.language))
|
|
- logging.debug("Debug: early transcription request pipe send return")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug("Debug: early transcription request pipe send return")
|
|
self.allowed_to_early_transcribe = False
|
|
self.allowed_to_early_transcribe = False
|
|
|
|
|
|
else:
|
|
else:
|
|
- logging.debug('Debug: Handling speech detection')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling speech detection')
|
|
if self.speech_end_silence_start:
|
|
if self.speech_end_silence_start:
|
|
- logging.info("Resetting self.speech_end_silence_start")
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.info("Resetting self.speech_end_silence_start")
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
self.allowed_to_early_transcribe = True
|
|
self.allowed_to_early_transcribe = True
|
|
|
|
|
|
- logging.debug('Debug: Checking if silence duration exceeds threshold')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if silence duration exceeds threshold')
|
|
# Wait for silence to stop recording after speech
|
|
# Wait for silence to stop recording after speech
|
|
if self.speech_end_silence_start and time.time() - \
|
|
if self.speech_end_silence_start and time.time() - \
|
|
self.speech_end_silence_start >= \
|
|
self.speech_end_silence_start >= \
|
|
self.post_speech_silence_duration:
|
|
self.post_speech_silence_duration:
|
|
|
|
|
|
- logging.debug('Debug: Formatting silence start time')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Formatting silence start time')
|
|
# Get time in desired format (HH:MM:SS.nnn)
|
|
# Get time in desired format (HH:MM:SS.nnn)
|
|
silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
|
|
- logging.debug('Debug: Calculating time difference')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Calculating time difference')
|
|
# Calculate time difference
|
|
# Calculate time difference
|
|
time_diff = time.time() - self.speech_end_silence_start
|
|
time_diff = time.time() - self.speech_end_silence_start
|
|
|
|
|
|
- logging.debug('Debug: Logging voice deactivity detection')
|
|
|
|
- logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Logging voice deactivity detection')
|
|
|
|
+ logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
f"time since silence start: {time_diff:.3f} seconds")
|
|
f"time since silence start: {time_diff:.3f} seconds")
|
|
|
|
|
|
- logging.debug('Debug: Appending data to frames and stopping recording')
|
|
|
|
|
|
+ logging.debug('Debug: Appending data to frames and stopping recording')
|
|
self.frames.append(data)
|
|
self.frames.append(data)
|
|
self.stop()
|
|
self.stop()
|
|
if not self.is_recording:
|
|
if not self.is_recording:
|
|
- logging.debug('Debug: Resetting speech_end_silence_start')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Resetting speech_end_silence_start')
|
|
self.speech_end_silence_start = 0
|
|
self.speech_end_silence_start = 0
|
|
|
|
|
|
- logging.debug('Debug: Handling non-wake word scenario')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling non-wake word scenario')
|
|
if not self.use_wake_words:
|
|
if not self.use_wake_words:
|
|
self.listen_start = time.time()
|
|
self.listen_start = time.time()
|
|
self._set_state("listening")
|
|
self._set_state("listening")
|
|
self.start_recording_on_voice_activity = True
|
|
self.start_recording_on_voice_activity = True
|
|
else:
|
|
else:
|
|
- logging.debug('Debug: Setting failed_stop_attempt to True')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Setting failed_stop_attempt to True')
|
|
failed_stop_attempt = True
|
|
failed_stop_attempt = True
|
|
|
|
|
|
- logging.debug('Debug: Checking if recording stopped')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if recording stopped')
|
|
if not self.is_recording and was_recording:
|
|
if not self.is_recording and was_recording:
|
|
- logging.debug('Debug: Resetting after stopping recording')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Resetting after stopping recording')
|
|
# Reset after stopping recording to ensure clean state
|
|
# Reset after stopping recording to ensure clean state
|
|
self.stop_recording_on_voice_deactivity = False
|
|
self.stop_recording_on_voice_deactivity = False
|
|
|
|
|
|
- logging.debug('Debug: Checking Silero time')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking Silero time')
|
|
if time.time() - self.silero_check_time > 0.1:
|
|
if time.time() - self.silero_check_time > 0.1:
|
|
self.silero_check_time = 0
|
|
self.silero_check_time = 0
|
|
|
|
|
|
- logging.debug('Debug: Handling wake word timeout')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Handling wake word timeout')
|
|
# Handle wake word timeout (waited to long initiating
|
|
# Handle wake word timeout (waited to long initiating
|
|
# speech after wake word detection)
|
|
# speech after wake word detection)
|
|
if self.wake_word_detect_time and time.time() - \
|
|
if self.wake_word_detect_time and time.time() - \
|
|
@@ -1952,21 +1849,27 @@ class AudioToTextRecorder:
|
|
|
|
|
|
self.wake_word_detect_time = 0
|
|
self.wake_word_detect_time = 0
|
|
if self.wakeword_detected and self.on_wakeword_timeout:
|
|
if self.wakeword_detected and self.on_wakeword_timeout:
|
|
- logging.debug('Debug: Calling on_wakeword_timeout')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Calling on_wakeword_timeout')
|
|
self.on_wakeword_timeout()
|
|
self.on_wakeword_timeout()
|
|
self.wakeword_detected = False
|
|
self.wakeword_detected = False
|
|
|
|
|
|
- logging.debug('Debug: Updating was_recording')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Updating was_recording')
|
|
was_recording = self.is_recording
|
|
was_recording = self.is_recording
|
|
|
|
|
|
- logging.debug('Debug: Checking if recording and not failed stop attempt')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if recording and not failed stop attempt')
|
|
if self.is_recording and not failed_stop_attempt:
|
|
if self.is_recording and not failed_stop_attempt:
|
|
- logging.debug('Debug: Appending data to frames')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Appending data to frames')
|
|
self.frames.append(data)
|
|
self.frames.append(data)
|
|
|
|
|
|
- logging.debug('Debug: Checking if not recording or speech end silence start')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Checking if not recording or speech end silence start')
|
|
if not self.is_recording or self.speech_end_silence_start:
|
|
if not self.is_recording or self.speech_end_silence_start:
|
|
- logging.debug('Debug: Appending data to audio buffer')
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Appending data to audio buffer')
|
|
self.audio_buffer.append(data)
|
|
self.audio_buffer.append(data)
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -1975,262 +1878,8 @@ class AudioToTextRecorder:
|
|
logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
raise
|
|
raise
|
|
|
|
|
|
- logging.debug('Debug: Exiting _recording_worker method')
|
|
|
|
-
|
|
|
|
- # def _recording_worker(self):
|
|
|
|
- # """
|
|
|
|
- # The main worker method which constantly monitors the audio
|
|
|
|
- # input for voice activity and accordingly starts/stops the recording.
|
|
|
|
- # """
|
|
|
|
-
|
|
|
|
- # logging.debug('Starting recording worker')
|
|
|
|
-
|
|
|
|
- # try:
|
|
|
|
- # time_since_last_buffer_message = 0
|
|
|
|
- # was_recording = False
|
|
|
|
- # delay_was_passed = False
|
|
|
|
- # wakeword_detected_time = None
|
|
|
|
- # wakeword_samples_to_remove = None
|
|
|
|
- # self.allowed_to_early_transcribe = True
|
|
|
|
-
|
|
|
|
- # # Continuously monitor audio for voice activity
|
|
|
|
- # while self.is_running:
|
|
|
|
-
|
|
|
|
- # try:
|
|
|
|
- # try:
|
|
|
|
- # data = self.audio_queue.get(timeout=0.1)
|
|
|
|
- # except queue.Empty:
|
|
|
|
- # if not self.is_running:
|
|
|
|
- # break
|
|
|
|
- # continue
|
|
|
|
-
|
|
|
|
- # if self.on_recorded_chunk:
|
|
|
|
- # self.on_recorded_chunk(data)
|
|
|
|
-
|
|
|
|
- # if self.handle_buffer_overflow:
|
|
|
|
- # # Handle queue overflow
|
|
|
|
- # if (self.audio_queue.qsize() >
|
|
|
|
- # self.allowed_latency_limit):
|
|
|
|
- # logging.warning("!!! ### !!! ### !!!")
|
|
|
|
- # logging.warning("Audio queue size exceeds "
|
|
|
|
- # "latency limit. Current size: "
|
|
|
|
- # f"{self.audio_queue.qsize()}. "
|
|
|
|
- # "Discarding old audio chunks."
|
|
|
|
- # )
|
|
|
|
- # logging.warning("!!! ### !!! ### !!!")
|
|
|
|
-
|
|
|
|
- # while (self.audio_queue.qsize() >
|
|
|
|
- # self.allowed_latency_limit):
|
|
|
|
-
|
|
|
|
- # data = self.audio_queue.get()
|
|
|
|
-
|
|
|
|
- # except BrokenPipeError:
|
|
|
|
- # logging.error("BrokenPipeError _recording_worker")
|
|
|
|
- # self.is_running = False
|
|
|
|
- # break
|
|
|
|
-
|
|
|
|
- # # Feed the extracted data to the audio_queue
|
|
|
|
- # if time_since_last_buffer_message:
|
|
|
|
- # time_passed = time.time() - time_since_last_buffer_message
|
|
|
|
- # if time_passed > 1:
|
|
|
|
- # logging.debug("_recording_worker processing audio data")
|
|
|
|
- # time_since_last_buffer_message = time.time()
|
|
|
|
- # else:
|
|
|
|
- # time_since_last_buffer_message = time.time()
|
|
|
|
-
|
|
|
|
- # failed_stop_attempt = False
|
|
|
|
-
|
|
|
|
- # if not self.is_recording:
|
|
|
|
- # # Handle not recording state
|
|
|
|
- # time_since_listen_start = (time.time() - self.listen_start
|
|
|
|
- # if self.listen_start else 0)
|
|
|
|
-
|
|
|
|
- # wake_word_activation_delay_passed = (
|
|
|
|
- # time_since_listen_start >
|
|
|
|
- # self.wake_word_activation_delay
|
|
|
|
- # )
|
|
|
|
-
|
|
|
|
- # # Handle wake-word timeout callback
|
|
|
|
- # if wake_word_activation_delay_passed \
|
|
|
|
- # and not delay_was_passed:
|
|
|
|
-
|
|
|
|
- # if self.use_wake_words and self.wake_word_activation_delay:
|
|
|
|
- # if self.on_wakeword_timeout:
|
|
|
|
- # self.on_wakeword_timeout()
|
|
|
|
- # delay_was_passed = wake_word_activation_delay_passed
|
|
|
|
-
|
|
|
|
- # # Set state and spinner text
|
|
|
|
- # if not self.recording_stop_time:
|
|
|
|
- # if self.use_wake_words \
|
|
|
|
- # and wake_word_activation_delay_passed \
|
|
|
|
- # and not self.wakeword_detected:
|
|
|
|
- # self._set_state("wakeword")
|
|
|
|
- # else:
|
|
|
|
- # if self.listen_start:
|
|
|
|
- # self._set_state("listening")
|
|
|
|
- # else:
|
|
|
|
- # self._set_state("inactive")
|
|
|
|
-
|
|
|
|
- # if self.use_wake_words and wake_word_activation_delay_passed:
|
|
|
|
- # try:
|
|
|
|
- # wakeword_index = self._process_wakeword(data)
|
|
|
|
-
|
|
|
|
- # except struct.error:
|
|
|
|
- # logging.error("Error unpacking audio data "
|
|
|
|
- # "for wake word processing.")
|
|
|
|
- # continue
|
|
|
|
-
|
|
|
|
- # except Exception as e:
|
|
|
|
- # logging.error(f"Wake word processing error: {e}")
|
|
|
|
- # continue
|
|
|
|
-
|
|
|
|
- # # If a wake word is detected
|
|
|
|
- # if wakeword_index >= 0:
|
|
|
|
- # self.wake_word_detect_time = time.time()
|
|
|
|
- # wakeword_detected_time = time.time()
|
|
|
|
- # wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
|
|
|
|
- # self.wakeword_detected = True
|
|
|
|
- # if self.on_wakeword_detected:
|
|
|
|
- # self.on_wakeword_detected()
|
|
|
|
-
|
|
|
|
- # # Check for voice activity to
|
|
|
|
- # # trigger the start of recording
|
|
|
|
- # if ((not self.use_wake_words
|
|
|
|
- # or not wake_word_activation_delay_passed)
|
|
|
|
- # and self.start_recording_on_voice_activity) \
|
|
|
|
- # or self.wakeword_detected:
|
|
|
|
-
|
|
|
|
- # if self._is_voice_active():
|
|
|
|
- # logging.info("voice activity detected")
|
|
|
|
-
|
|
|
|
- # self.start()
|
|
|
|
-
|
|
|
|
- # self.start_recording_on_voice_activity = False
|
|
|
|
-
|
|
|
|
- # # Add the buffered audio
|
|
|
|
- # # to the recording frames
|
|
|
|
- # self.frames.extend(list(self.audio_buffer))
|
|
|
|
- # self.audio_buffer.clear()
|
|
|
|
-
|
|
|
|
- # self.silero_vad_model.reset_states()
|
|
|
|
- # else:
|
|
|
|
- # data_copy = data[:]
|
|
|
|
- # self._check_voice_activity(data_copy)
|
|
|
|
-
|
|
|
|
- # self.speech_end_silence_start = 0
|
|
|
|
-
|
|
|
|
- # else:
|
|
|
|
- # # If we are currently recording
|
|
|
|
- # if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
|
|
|
|
- # # Remove samples from the beginning of self.frames
|
|
|
|
- # samples_removed = 0
|
|
|
|
- # while wakeword_samples_to_remove > 0 and self.frames:
|
|
|
|
- # frame = self.frames[0]
|
|
|
|
- # frame_samples = len(frame) // 2 # Assuming 16-bit audio
|
|
|
|
- # if wakeword_samples_to_remove >= frame_samples:
|
|
|
|
- # self.frames.pop(0)
|
|
|
|
- # samples_removed += frame_samples
|
|
|
|
- # wakeword_samples_to_remove -= frame_samples
|
|
|
|
- # else:
|
|
|
|
- # self.frames[0] = frame[wakeword_samples_to_remove * 2:]
|
|
|
|
- # samples_removed += wakeword_samples_to_remove
|
|
|
|
- # samples_to_remove = 0
|
|
|
|
-
|
|
|
|
- # wakeword_samples_to_remove = 0
|
|
|
|
-
|
|
|
|
- # # Stop the recording if silence is detected after speech
|
|
|
|
- # if self.stop_recording_on_voice_deactivity:
|
|
|
|
- # is_speech = (
|
|
|
|
- # self._is_silero_speech(data) if self.silero_deactivity_detection
|
|
|
|
- # else self._is_webrtc_speech(data, True)
|
|
|
|
- # )
|
|
|
|
-
|
|
|
|
- # if not self.speech_end_silence_start:
|
|
|
|
- # str_speech_end_silence_start = "0"
|
|
|
|
- # else:
|
|
|
|
- # str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
- # logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
|
|
|
|
-
|
|
|
|
- # if not is_speech:
|
|
|
|
- # # Voice deactivity was detected, so we start
|
|
|
|
- # # measuring silence time before stopping recording
|
|
|
|
- # if self.speech_end_silence_start == 0 and \
|
|
|
|
- # (time.time() - self.recording_start_time > self.min_length_of_recording):
|
|
|
|
-
|
|
|
|
- # self.speech_end_silence_start = time.time()
|
|
|
|
-
|
|
|
|
- # if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
|
|
|
|
- # (time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
|
|
|
|
- # self.allowed_to_early_transcribe:
|
|
|
|
- # logging.debug("Adding early transcription request")
|
|
|
|
- # self.transcribe_count += 1
|
|
|
|
- # audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
|
|
|
|
- # audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
|
|
|
|
- # self.parent_transcription_pipe.send((audio, self.language))
|
|
|
|
- # self.allowed_to_early_transcribe = False
|
|
|
|
-
|
|
|
|
- # else:
|
|
|
|
- # if self.speech_end_silence_start:
|
|
|
|
- # logging.info("Resetting self.speech_end_silence_start")
|
|
|
|
- # self.speech_end_silence_start = 0
|
|
|
|
- # self.allowed_to_early_transcribe = True
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- # # Wait for silence to stop recording after speech
|
|
|
|
- # if self.speech_end_silence_start and time.time() - \
|
|
|
|
- # self.speech_end_silence_start >= \
|
|
|
|
- # self.post_speech_silence_duration:
|
|
|
|
-
|
|
|
|
- # # Get time in desired format (HH:MM:SS.nnn)
|
|
|
|
- # silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
|
|
|
|
-
|
|
|
|
- # # Calculate time difference
|
|
|
|
- # time_diff = time.time() - self.speech_end_silence_start
|
|
|
|
-
|
|
|
|
- # logging.info(f"voice deactivity detected at {silence_start_time}, "
|
|
|
|
- # f"time since silence start: {time_diff:.3f} seconds")
|
|
|
|
-
|
|
|
|
- # self.frames.append(data)
|
|
|
|
- # self.stop()
|
|
|
|
- # if not self.is_recording:
|
|
|
|
- # self.speech_end_silence_start = 0
|
|
|
|
-
|
|
|
|
- # if not self.use_wake_words:
|
|
|
|
- # self.listen_start = time.time()
|
|
|
|
- # self._set_state("listening")
|
|
|
|
- # self.start_recording_on_voice_activity = True
|
|
|
|
- # else:
|
|
|
|
- # failed_stop_attempt = True
|
|
|
|
-
|
|
|
|
- # if not self.is_recording and was_recording:
|
|
|
|
- # # Reset after stopping recording to ensure clean state
|
|
|
|
- # self.stop_recording_on_voice_deactivity = False
|
|
|
|
-
|
|
|
|
- # if time.time() - self.silero_check_time > 0.1:
|
|
|
|
- # self.silero_check_time = 0
|
|
|
|
-
|
|
|
|
- # # Handle wake word timeout (waited to long initiating
|
|
|
|
- # # speech after wake word detection)
|
|
|
|
- # if self.wake_word_detect_time and time.time() - \
|
|
|
|
- # self.wake_word_detect_time > self.wake_word_timeout:
|
|
|
|
-
|
|
|
|
- # self.wake_word_detect_time = 0
|
|
|
|
- # if self.wakeword_detected and self.on_wakeword_timeout:
|
|
|
|
- # self.on_wakeword_timeout()
|
|
|
|
- # self.wakeword_detected = False
|
|
|
|
-
|
|
|
|
- # was_recording = self.is_recording
|
|
|
|
-
|
|
|
|
- # if self.is_recording and not failed_stop_attempt:
|
|
|
|
- # self.frames.append(data)
|
|
|
|
-
|
|
|
|
- # if not self.is_recording or self.speech_end_silence_start:
|
|
|
|
- # self.audio_buffer.append(data)
|
|
|
|
-
|
|
|
|
- # except Exception as e:
|
|
|
|
- # if not self.interrupt_stop_event.is_set():
|
|
|
|
- # logging.error(f"Unhandled exeption in _recording_worker: {e}")
|
|
|
|
- # raise
|
|
|
|
|
|
+ if self.use_extended_logging:
|
|
|
|
+ logging.debug('Debug: Exiting _recording_worker method')
|
|
|
|
|
|
|
|
|
|
def _realtime_worker(self):
|
|
def _realtime_worker(self):
|