KoljaB il y a 8 mois
Parent
commit
6855db98a6
1 fichiers modifiés avec 195 ajouts et 546 suppressions
  1. 195 546
      RealtimeSTT/audio_recorder.py

+ 195 - 546
RealtimeSTT/audio_recorder.py

@@ -256,7 +256,8 @@ class AudioToTextRecorder:
                  print_transcription_time: bool = False,
                  early_transcription_on_silence: int = 0,
                  allowed_latency_limit: int = ALLOWED_LATENCY_LIMIT,
-                 no_log_file: bool = False
+                 no_log_file: bool = False,
+                 use_extended_logging: bool = False,
                  ):
         """
         Initializes an audio recorder and  transcription
@@ -453,6 +454,9 @@ class AudioToTextRecorder:
         - allowed_latency_limit (int, default=100): Maximal amount of chunks
             that can be unprocessed in queue before discarding chunks.
         - no_log_file (bool, default=False): Skips writing of debug log file.
+        - use_extended_logging (bool, default=False): Writes extensive
+            log messages for the recording worker, that processes the audio
+            chunks.
 
         Raises:
             Exception: Errors related to initializing transcription
@@ -543,9 +547,11 @@ class AudioToTextRecorder:
         self.detected_realtime_language = None
         self.detected_realtime_language_probability = 0
         self.transcription_lock = threading.Lock()
+        self.shutdown_lock = threading.Lock()
         self.transcribe_count = 0
         self.print_transcription_time = print_transcription_time
         self.early_transcription_on_silence = early_transcription_on_silence
+        self.use_extended_logging = use_extended_logging
 
         # Initialize the logging configuration with the specified level
         log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
@@ -854,136 +860,6 @@ class AudioToTextRecorder:
     def _transcription_worker(*args, **kwargs):
         worker = TranscriptionWorker(*args, **kwargs)
         worker.run()
-    # @staticmethod
-    # def _transcription_worker(conn,
-    #                           stdout_pipe,
-    #                           model_path,
-    #                           compute_type,
-    #                           gpu_device_index,
-    #                           device,
-    #                           ready_event,
-    #                           shutdown_event,
-    #                           interrupt_stop_event,
-    #                           beam_size,
-    #                           initial_prompt,
-    #                           suppress_tokens
-    #                           ):
-    #     """
-    #     Worker method that handles the continuous
-    #     process of transcribing audio data.
-
-    #     This method runs in a separate process and is responsible for:
-    #     - Initializing the `faster_whisper` model used for transcription.
-    #     - Receiving audio data sent through a pipe and using the model
-    #       to transcribe it.
-    #     - Sending transcription results back through the pipe.
-    #     - Continuously checking for a shutdown event to gracefully
-    #       terminate the transcription process.
-
-    #     Args:
-    #         conn (multiprocessing.Connection): The connection endpoint used
-    #           for receiving audio data and sending transcription results.
-    #         model_path (str): The path to the pre-trained faster_whisper model
-    #           for transcription.
-    #         compute_type (str): Specifies the type of computation to be used
-    #             for transcription.
-    #         gpu_device_index (int): Device ID to use.
-    #         device (str): Device for model to use.
-    #         ready_event (threading.Event): An event that is set when the
-    #           transcription model is successfully initialized and ready.
-    #         shutdown_event (threading.Event): An event that, when set,
-    #           signals this worker method to terminate.
-    #         interrupt_stop_event (threading.Event): An event that, when set,
-    #             signals this worker method to stop processing audio data.
-    #         beam_size (int): The beam size to use for beam search decoding.
-    #         initial_prompt (str or iterable of int): Initial prompt to be fed
-    #             to the transcription model.
-    #         suppress_tokens (list of int): Tokens to be suppressed from the
-    #             transcription output.
-    #     Raises:
-    #         Exception: If there is an error while initializing the
-    #         transcription model.
-    #     """
-
-    #     system_signal.signal(system_signal.SIGINT, system_signal.SIG_IGN)
-
-    #     def custom_print(*args, **kwargs):
-    #         message = ' '.join(map(str, args))
-    #         try:
-    #             stdout_pipe.send(message)
-    #         except (BrokenPipeError, EOFError, OSError):
-    #             # The pipe probably has been closed, so we ignore the error
-    #             pass
-
-    #     # Replace the built-in print function with our custom one
-    #     __builtins__['print'] = custom_print
-
-    #     logging.info("Initializing faster_whisper "
-    #                  f"main transcription model {model_path}"
-    #                  )
-
-    #     try:
-    #         model = faster_whisper.WhisperModel(
-    #             model_size_or_path=model_path,
-    #             device=device,
-    #             compute_type=compute_type,
-    #             device_index=gpu_device_index,
-    #         )
-
-    #     except Exception as e:
-    #         logging.exception("Error initializing main "
-    #                           f"faster_whisper transcription model: {e}"
-    #                           )
-    #         raise
-
-    #     ready_event.set()
-
-    #     logging.debug("Faster_whisper main speech to text "
-    #                   "transcription model initialized successfully"
-    #                   )
-
-    #     try:
-    #         while not shutdown_event.is_set():
-    #             try:
-    #                 if conn.poll(0.01):
-    #                     logging.debug("Receive from _transcription_worker  pipe")
-    #                     audio, language = conn.recv()
-    #                     try:
-    #                         segments, info = model.transcribe(
-    #                             audio,
-    #                             language=language if language else None,
-    #                             beam_size=beam_size,
-    #                             initial_prompt=initial_prompt,
-    #                             suppress_tokens=suppress_tokens
-    #                         )
-    #                         transcription = " ".join(seg.text for seg in segments)
-    #                         transcription = transcription.strip()
-    #                         logging.debug(f"Final text detected with main model: {transcription}")
-    #                         conn.send(('success', (transcription, info)))
-    #                     except Exception as e:
-    #                         logging.error(f"General error in _transcription_worker in transcription: {e}")
-    #                         conn.send(('error', str(e)))
-    #                 else:
-    #                     time.sleep(TIME_SLEEP)
-
-
-
-    #             except KeyboardInterrupt:
-    #                 interrupt_stop_event.set()
-                    
-    #                 logging.debug("Transcription worker process "
-    #                                 "finished due to KeyboardInterrupt"
-    #                                 )
-    #                 stdout_pipe.close()
-    #                 break
-
-    #             except Exception as e:
-    #                 logging.error(f"General error in _transcription_worker in accessing pipe: {e}")
-
-    #     finally:
-    #         __builtins__['print'] = print  # Restore the original print function            
-    #         conn.close()
-    #         stdout_pipe.close()
 
     @staticmethod
     def _audio_data_worker(audio_queue,
@@ -1438,46 +1314,6 @@ class AudioToTextRecorder:
         else:
             return self.transcribe()
 
-    # def text(self,
-    #          on_transcription_finished=None,
-    #          ):
-    #     """
-    #     Transcribes audio captured by this class instance
-    #     using the `faster_whisper` model.
-
-    #     - Automatically starts recording upon voice activity if not manually
-    #       started using `recorder.start()`.
-    #     - Automatically stops recording upon voice deactivity if not manually
-    #       stopped with `recorder.stop()`.
-    #     - Processes the recorded audio to generate transcription.
-
-    #     Args:
-    #         on_transcription_finished (callable, optional): Callback function
-    #           to be executed when transcription is ready.
-    #         If provided, transcription will be performed asynchronously, and
-    #           the callback will receive the transcription as its argument.
-    #           If omitted, the transcription will be performed synchronously,
-    #           and the result will be returned.
-
-    #     Returns (if not callback is set):
-    #         str: The transcription of the recorded audio
-    #     """
-
-    #     self.interrupt_stop_event.clear()
-    #     self.was_interrupted.clear()
-
-    #     self.wait_audio()
-
-    #     if self.is_shut_down or self.interrupt_stop_event.is_set():
-    #         if self.interrupt_stop_event.is_set():
-    #             self.was_interrupted.set()
-    #         return ""
-
-    #     if on_transcription_finished:
-    #         threading.Thread(target=on_transcription_finished,
-    #                          args=(self.transcribe(),)).start()
-    #     else:
-    #         return self.transcribe()
 
     def start(self):
         """
@@ -1594,54 +1430,58 @@ class AudioToTextRecorder:
         recording worker and closing the audio stream.
         """
 
-        print("RealtimeSTT shutting down")
-        logging.debug("RealtimeSTT shutting down")
+        with self.shutdown_lock:
+            if self.is_shut_down:
+                return
 
-        # Force wait_audio() and text() to exit
-        self.is_shut_down = True
-        self.start_recording_event.set()
-        self.stop_recording_event.set()
+            print("\033[91mRealtimeSTT shutting down\033[0m")
+            # logging.debug("RealtimeSTT shutting down")
 
-        self.shutdown_event.set()
-        self.is_recording = False
-        self.is_running = False
+            # Force wait_audio() and text() to exit
+            self.is_shut_down = True
+            self.start_recording_event.set()
+            self.stop_recording_event.set()
 
-        logging.debug('Finishing recording thread')
-        if self.recording_thread:
-            self.recording_thread.join()
+            self.shutdown_event.set()
+            self.is_recording = False
+            self.is_running = False
 
-        logging.debug('Terminating reader process')
+            logging.debug('Finishing recording thread')
+            if self.recording_thread:
+                self.recording_thread.join()
 
-        # Give it some time to finish the loop and cleanup.
-        if self.use_microphone:
-            self.reader_process.join(timeout=10)
+            logging.debug('Terminating reader process')
 
-        if self.reader_process.is_alive():
-            logging.warning("Reader process did not terminate "
-                            "in time. Terminating forcefully."
-                            )
-            self.reader_process.terminate()
+            # Give it some time to finish the loop and cleanup.
+            if self.use_microphone:
+                self.reader_process.join(timeout=10)
 
-        logging.debug('Terminating transcription process')
-        self.transcript_process.join(timeout=10)
+            if self.reader_process.is_alive():
+                logging.warning("Reader process did not terminate "
+                                "in time. Terminating forcefully."
+                                )
+                self.reader_process.terminate()
 
-        if self.transcript_process.is_alive():
-            logging.warning("Transcript process did not terminate "
-                            "in time. Terminating forcefully."
-                            )
-            self.transcript_process.terminate()
+            logging.debug('Terminating transcription process')
+            self.transcript_process.join(timeout=10)
+
+            if self.transcript_process.is_alive():
+                logging.warning("Transcript process did not terminate "
+                                "in time. Terminating forcefully."
+                                )
+                self.transcript_process.terminate()
 
-        self.parent_transcription_pipe.close()
+            self.parent_transcription_pipe.close()
 
-        logging.debug('Finishing realtime thread')
-        if self.realtime_thread:
-            self.realtime_thread.join()
+            logging.debug('Finishing realtime thread')
+            if self.realtime_thread:
+                self.realtime_thread.join()
 
-        if self.enable_realtime_transcription:
-            if self.realtime_model_type:
-                del self.realtime_model_type
-                self.realtime_model_type = None
-        gc.collect()
+            if self.enable_realtime_transcription:
+                if self.realtime_model_type:
+                    del self.realtime_model_type
+                    self.realtime_model_type = None
+            gc.collect()
 
     def _recording_worker(self):
         """
@@ -1649,14 +1489,13 @@ class AudioToTextRecorder:
         input for voice activity and accordingly starts/stops the recording.
         """
 
-        logging.debug('Debug: Starting _recording_worker method')
-        logging.debug('Starting recording worker')
-
-        logging.debug('Debug: Entering try block')
+        if self.use_extended_logging:
+            logging.debug('Debug: Entering try block')
 
         last_inner_try_time = 0
         try:
-            logging.debug('Debug: Initializing variables')
+            if self.use_extended_logging:
+                logging.debug('Debug: Initializing variables')
             time_since_last_buffer_message = 0
             was_recording = False
             delay_was_passed = False
@@ -1664,76 +1503,91 @@ class AudioToTextRecorder:
             wakeword_samples_to_remove = None
             self.allowed_to_early_transcribe = True
 
-            logging.debug('Debug: Starting main loop')
+            if self.use_extended_logging:
+                logging.debug('Debug: Starting main loop')
             # Continuously monitor audio for voice activity
             while self.is_running:
 
-                logging.debug('Debug: Entering inner try block')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Entering inner try block')
                 if last_inner_try_time:
                     last_processing_time = time.time() - last_inner_try_time
-                    if last_processing_time > 0.05:
-                        logging.warning('### WARNING: PROCESSING TOOK TOO LONG')
+                    if last_processing_time > 0.1:
+                        if self.use_extended_logging:
+                            logging.warning('### WARNING: PROCESSING TOOK TOO LONG')
                 last_inner_try_time = time.time()
                 try:
-                    logging.debug('Debug: Trying to get data from audio queue')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Trying to get data from audio queue')
                     try:
-                        data = self.audio_queue.get(timeout=0.02)
+                        data = self.audio_queue.get(timeout=0.01)
                     except queue.Empty:
-                        logging.debug('Debug: Queue is empty, checking if still running')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Queue is empty, checking if still running')
                         if not self.is_running:
-                            logging.debug('Debug: Not running, breaking loop')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Not running, breaking loop')
                             break
-                        logging.debug('Debug: Continuing to next iteration')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Continuing to next iteration')
                         continue
 
-                    logging.debug('Debug: Checking for on_recorded_chunk callback')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Checking for on_recorded_chunk callback')
                     if self.on_recorded_chunk:
-                        logging.debug('Debug: Calling on_recorded_chunk')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Calling on_recorded_chunk')
                         self.on_recorded_chunk(data)
 
-                    logging.debug('Debug: Checking if handle_buffer_overflow is True')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Checking if handle_buffer_overflow is True')
                     if self.handle_buffer_overflow:
-                        logging.debug('Debug: Handling buffer overflow')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Handling buffer overflow')
                         # Handle queue overflow
                         if (self.audio_queue.qsize() >
                                 self.allowed_latency_limit):
-                            logging.debug('Debug: Queue size exceeds limit, logging warnings')
-                            logging.warning("!!! ### !!! ### !!!")
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Queue size exceeds limit, logging warnings')
                             logging.warning("Audio queue size exceeds "
                                             "latency limit. Current size: "
                                             f"{self.audio_queue.qsize()}. "
                                             "Discarding old audio chunks."
                                             )
-                            logging.warning("!!! ### !!! ### !!!")
 
-                        logging.debug('Debug: Discarding old chunks if necessary')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Discarding old chunks if necessary')
                         while (self.audio_queue.qsize() >
                                 self.allowed_latency_limit):
 
                             data = self.audio_queue.get()
 
                 except BrokenPipeError:
-                    logging.debug('Debug: Caught BrokenPipeError')
                     logging.error("BrokenPipeError _recording_worker")
                     self.is_running = False
                     break
 
-                logging.debug('Debug: Updating time_since_last_buffer_message')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Updating time_since_last_buffer_message')
                 # Feed the extracted data to the audio_queue
                 if time_since_last_buffer_message:
                     time_passed = time.time() - time_since_last_buffer_message
                     if time_passed > 1:
-                        logging.debug("_recording_worker processing audio data")
+                        if self.use_extended_logging:
+                            logging.debug("_recording_worker processing audio data")
                         time_since_last_buffer_message = time.time()
                 else:
                     time_since_last_buffer_message = time.time()
 
-                logging.debug('Debug: Initializing failed_stop_attempt')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Initializing failed_stop_attempt')
                 failed_stop_attempt = False
 
-                logging.debug('Debug: Checking if not recording')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Checking if not recording')
                 if not self.is_recording:
-                    logging.debug('Debug: Handling not recording state')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Handling not recording state')
                     # Handle not recording state
                     time_since_listen_start = (time.time() - self.listen_start
                                             if self.listen_start else 0)
@@ -1743,63 +1597,73 @@ class AudioToTextRecorder:
                         self.wake_word_activation_delay
                     )
 
-                    logging.debug('Debug: Handling wake-word timeout callback')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Handling wake-word timeout callback')
                     # Handle wake-word timeout callback
                     if wake_word_activation_delay_passed \
                             and not delay_was_passed:
 
                         if self.use_wake_words and self.wake_word_activation_delay:
                             if self.on_wakeword_timeout:
-                                logging.debug('Debug: Calling on_wakeword_timeout')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Calling on_wakeword_timeout')
                                 self.on_wakeword_timeout()
                     delay_was_passed = wake_word_activation_delay_passed
 
-                    logging.debug('Debug: Setting state and spinner text')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Setting state and spinner text')
                     # Set state and spinner text
                     if not self.recording_stop_time:
                         if self.use_wake_words \
                                 and wake_word_activation_delay_passed \
                                 and not self.wakeword_detected:
-                            logging.debug('Debug: Setting state to "wakeword"')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Setting state to "wakeword"')
                             self._set_state("wakeword")
                         else:
                             if self.listen_start:
-                                logging.debug('Debug: Setting state to "listening"')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Setting state to "listening"')
                                 self._set_state("listening")
                             else:
-                                logging.debug('Debug: Setting state to "inactive"')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Setting state to "inactive"')
                                 self._set_state("inactive")
 
-                    logging.debug('Debug: Checking wake word conditions')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Checking wake word conditions')
                     if self.use_wake_words and wake_word_activation_delay_passed:
                         try:
-                            logging.debug('Debug: Processing wakeword')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Processing wakeword')
                             wakeword_index = self._process_wakeword(data)
 
                         except struct.error:
-                            logging.debug('Debug: Caught struct.error')
                             logging.error("Error unpacking audio data "
                                         "for wake word processing.")
                             continue
 
                         except Exception as e:
-                            logging.debug('Debug: Caught general exception')
                             logging.error(f"Wake word processing error: {e}")
                             continue
 
-                        logging.debug('Debug: Checking if wake word detected')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Checking if wake word detected')
                         # If a wake word is detected                        
                         if wakeword_index >= 0:
-                            logging.debug('Debug: Wake word detected, updating variables')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Wake word detected, updating variables')
                             self.wake_word_detect_time = time.time()
                             wakeword_detected_time = time.time()
                             wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
                             self.wakeword_detected = True
                             if self.on_wakeword_detected:
-                                logging.debug('Debug: Calling on_wakeword_detected')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Calling on_wakeword_detected')
                                 self.on_wakeword_detected()
 
-                    logging.debug('Debug: Checking voice activity conditions')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Checking voice activity conditions')
                     # Check for voice activity to
                     # trigger the start of recording
                     if ((not self.use_wake_words
@@ -1807,37 +1671,46 @@ class AudioToTextRecorder:
                             and self.start_recording_on_voice_activity) \
                             or self.wakeword_detected:
 
-                        logging.debug('Debug: Checking if voice is active')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Checking if voice is active')
                         if self._is_voice_active():
-                            logging.debug('Debug: Voice activity detected')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Voice activity detected')
                             logging.info("voice activity detected")
 
-                            logging.debug('Debug: Starting recording')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Starting recording')
                             self.start()
 
                             self.start_recording_on_voice_activity = False
 
-                            logging.debug('Debug: Adding buffered audio to frames')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Adding buffered audio to frames')
                             # Add the buffered audio
                             # to the recording frames
                             self.frames.extend(list(self.audio_buffer))
                             self.audio_buffer.clear()
 
-                            logging.debug('Debug: Resetting Silero VAD model states')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Resetting Silero VAD model states')
                             self.silero_vad_model.reset_states()
                         else:
-                            logging.debug('Debug: Checking voice activity')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Checking voice activity')
                             data_copy = data[:]
                             self._check_voice_activity(data_copy)
 
-                    logging.debug('Debug: Resetting speech_end_silence_start')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Resetting speech_end_silence_start')
                     self.speech_end_silence_start = 0
 
                 else:
-                    logging.debug('Debug: Handling recording state')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Handling recording state')
                     # If we are currently recording
                     if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
-                        logging.debug('Debug: Removing wakeword samples')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Removing wakeword samples')
                         # Remove samples from the beginning of self.frames
                         samples_removed = 0
                         while wakeword_samples_to_remove > 0 and self.frames:
@@ -1854,25 +1727,31 @@ class AudioToTextRecorder:
                         
                         wakeword_samples_to_remove = 0
 
-                    logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Checking if stop_recording_on_voice_deactivity is True')
                     # Stop the recording if silence is detected after speech
                     if self.stop_recording_on_voice_deactivity:
-                        logging.debug('Debug: Determining if speech is detected')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Determining if speech is detected')
                         is_speech = (
                             self._is_silero_speech(data) if self.silero_deactivity_detection
                             else self._is_webrtc_speech(data, True)
                         )
 
-                        logging.debug('Debug: Formatting speech_end_silence_start')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Formatting speech_end_silence_start')
                         if not self.speech_end_silence_start:
                             str_speech_end_silence_start = "0"
                         else:
                             str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
-                        logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
+                        if self.use_extended_logging:
+                            logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
 
-                        logging.debug('Debug: Checking if speech is not detected')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Checking if speech is not detected')
                         if not is_speech:
-                            logging.debug('Debug: Handling voice deactivity')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Handling voice deactivity')
                             # Voice deactivity was detected, so we start
                             # measuring silence time before stopping recording
                             if self.speech_end_silence_start == 0 and \
@@ -1880,71 +1759,89 @@ class AudioToTextRecorder:
 
                                 self.speech_end_silence_start = time.time()
 
-                            logging.debug('Debug: Checking early transcription conditions')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Checking early transcription conditions')
                             if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
                                 (time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
                                 self.allowed_to_early_transcribe:
-                                    logging.debug("Debug:Adding early transcription request")
+                                    if self.use_extended_logging:
+                                        logging.debug("Debug:Adding early transcription request")
                                     self.transcribe_count += 1
                                     audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
                                     audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
-                                    logging.debug("Debug: early transcription request pipe send")
+
+                                    if self.use_extended_logging:
+                                        logging.debug("Debug: early transcription request pipe send")
                                     self.parent_transcription_pipe.send((audio, self.language))
-                                    logging.debug("Debug: early transcription request pipe send return")
+                                    if self.use_extended_logging:
+                                        logging.debug("Debug: early transcription request pipe send return")
                                     self.allowed_to_early_transcribe = False
 
                         else:
-                            logging.debug('Debug: Handling speech detection')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Handling speech detection')
                             if self.speech_end_silence_start:
-                                logging.info("Resetting self.speech_end_silence_start")
+                                if self.use_extended_logging:
+                                    logging.info("Resetting self.speech_end_silence_start")
                                 self.speech_end_silence_start = 0
                                 self.allowed_to_early_transcribe = True
 
-                        logging.debug('Debug: Checking if silence duration exceeds threshold')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Checking if silence duration exceeds threshold')
                         # Wait for silence to stop recording after speech
                         if self.speech_end_silence_start and time.time() - \
                                 self.speech_end_silence_start >= \
                                 self.post_speech_silence_duration:
 
-                            logging.debug('Debug: Formatting silence start time')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Formatting silence start time')
                             # Get time in desired format (HH:MM:SS.nnn)
                             silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
 
-                            logging.debug('Debug: Calculating time difference')
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Calculating time difference')
                             # Calculate time difference
                             time_diff = time.time() - self.speech_end_silence_start
 
-                            logging.debug('Debug: Logging voice deactivity detection')
-                            logging.info(f"voice deactivity detected at {silence_start_time}, "
+                            if self.use_extended_logging:
+                                logging.debug('Debug: Logging voice deactivity detection')
+                                logging.info(f"voice deactivity detected at {silence_start_time}, "
                                         f"time since silence start: {time_diff:.3f} seconds")
 
-                            logging.debug('Debug: Appending data to frames and stopping recording')
+                                logging.debug('Debug: Appending data to frames and stopping recording')
                             self.frames.append(data)
                             self.stop()
                             if not self.is_recording:
-                                logging.debug('Debug: Resetting speech_end_silence_start')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Resetting speech_end_silence_start')
                                 self.speech_end_silence_start = 0
 
-                                logging.debug('Debug: Handling non-wake word scenario')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Handling non-wake word scenario')
                                 if not self.use_wake_words:
                                     self.listen_start = time.time()
                                     self._set_state("listening")
                                     self.start_recording_on_voice_activity = True
                             else:
-                                logging.debug('Debug: Setting failed_stop_attempt to True')
+                                if self.use_extended_logging:
+                                    logging.debug('Debug: Setting failed_stop_attempt to True')
                                 failed_stop_attempt = True
 
-                logging.debug('Debug: Checking if recording stopped')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Checking if recording stopped')
                 if not self.is_recording and was_recording:
-                    logging.debug('Debug: Resetting after stopping recording')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Resetting after stopping recording')
                     # Reset after stopping recording to ensure clean state
                     self.stop_recording_on_voice_deactivity = False
 
-                logging.debug('Debug: Checking Silero time')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Checking Silero time')
                 if time.time() - self.silero_check_time > 0.1:
                     self.silero_check_time = 0
 
-                logging.debug('Debug: Handling wake word timeout')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Handling wake word timeout')
                 # Handle wake word timeout (waited to long initiating
                 # speech after wake word detection)
                 if self.wake_word_detect_time and time.time() - \
@@ -1952,21 +1849,27 @@ class AudioToTextRecorder:
 
                     self.wake_word_detect_time = 0
                     if self.wakeword_detected and self.on_wakeword_timeout:
-                        logging.debug('Debug: Calling on_wakeword_timeout')
+                        if self.use_extended_logging:
+                            logging.debug('Debug: Calling on_wakeword_timeout')
                         self.on_wakeword_timeout()
                     self.wakeword_detected = False
 
-                logging.debug('Debug: Updating was_recording')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Updating was_recording')
                 was_recording = self.is_recording
 
-                logging.debug('Debug: Checking if recording and not failed stop attempt')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Checking if recording and not failed stop attempt')
                 if self.is_recording and not failed_stop_attempt:
-                    logging.debug('Debug: Appending data to frames')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Appending data to frames')
                     self.frames.append(data)
 
-                logging.debug('Debug: Checking if not recording or speech end silence start')
+                if self.use_extended_logging:
+                    logging.debug('Debug: Checking if not recording or speech end silence start')
                 if not self.is_recording or self.speech_end_silence_start:
-                    logging.debug('Debug: Appending data to audio buffer')
+                    if self.use_extended_logging:
+                        logging.debug('Debug: Appending data to audio buffer')
                     self.audio_buffer.append(data)
 
         except Exception as e:
@@ -1975,262 +1878,8 @@ class AudioToTextRecorder:
                 logging.error(f"Unhandled exeption in _recording_worker: {e}")
                 raise
 
-        logging.debug('Debug: Exiting _recording_worker method')
-
-    # def _recording_worker(self):
-    #     """
-    #     The main worker method which constantly monitors the audio
-    #     input for voice activity and accordingly starts/stops the recording.
-    #     """
-
-    #     logging.debug('Starting recording worker')
-
-    #     try:
-    #         time_since_last_buffer_message = 0
-    #         was_recording = False
-    #         delay_was_passed = False
-    #         wakeword_detected_time = None
-    #         wakeword_samples_to_remove = None
-    #         self.allowed_to_early_transcribe = True
-
-    #         # Continuously monitor audio for voice activity
-    #         while self.is_running:
-
-    #             try:
-    #                 try:
-    #                     data = self.audio_queue.get(timeout=0.1)
-    #                 except queue.Empty:
-    #                     if not self.is_running:
-    #                         break
-    #                     continue
-
-    #                 if self.on_recorded_chunk:
-    #                     self.on_recorded_chunk(data)
-
-    #                 if self.handle_buffer_overflow:
-    #                     # Handle queue overflow
-    #                     if (self.audio_queue.qsize() >
-    #                             self.allowed_latency_limit):
-    #                         logging.warning("!!! ### !!! ### !!!")
-    #                         logging.warning("Audio queue size exceeds "
-    #                                         "latency limit. Current size: "
-    #                                         f"{self.audio_queue.qsize()}. "
-    #                                         "Discarding old audio chunks."
-    #                                         )
-    #                         logging.warning("!!! ### !!! ### !!!")
-
-    #                     while (self.audio_queue.qsize() >
-    #                             self.allowed_latency_limit):
-
-    #                         data = self.audio_queue.get()
-
-    #             except BrokenPipeError:
-    #                 logging.error("BrokenPipeError _recording_worker")
-    #                 self.is_running = False
-    #                 break
-
-    #             # Feed the extracted data to the audio_queue
-    #             if time_since_last_buffer_message:
-    #                 time_passed = time.time() - time_since_last_buffer_message
-    #                 if time_passed > 1:
-    #                     logging.debug("_recording_worker processing audio data")
-    #                     time_since_last_buffer_message = time.time()
-    #             else:
-    #                 time_since_last_buffer_message = time.time()
-
-    #             failed_stop_attempt = False
-
-    #             if not self.is_recording:
-    #                 # Handle not recording state
-    #                 time_since_listen_start = (time.time() - self.listen_start
-    #                                            if self.listen_start else 0)
-
-    #                 wake_word_activation_delay_passed = (
-    #                     time_since_listen_start >
-    #                     self.wake_word_activation_delay
-    #                 )
-
-    #                 # Handle wake-word timeout callback
-    #                 if wake_word_activation_delay_passed \
-    #                         and not delay_was_passed:
-
-    #                     if self.use_wake_words and self.wake_word_activation_delay:
-    #                         if self.on_wakeword_timeout:
-    #                             self.on_wakeword_timeout()
-    #                 delay_was_passed = wake_word_activation_delay_passed
-
-    #                 # Set state and spinner text
-    #                 if not self.recording_stop_time:
-    #                     if self.use_wake_words \
-    #                             and wake_word_activation_delay_passed \
-    #                             and not self.wakeword_detected:
-    #                         self._set_state("wakeword")
-    #                     else:
-    #                         if self.listen_start:
-    #                             self._set_state("listening")
-    #                         else:
-    #                             self._set_state("inactive")
-
-    #                 if self.use_wake_words and wake_word_activation_delay_passed:
-    #                     try:
-    #                         wakeword_index = self._process_wakeword(data)
-
-    #                     except struct.error:
-    #                         logging.error("Error unpacking audio data "
-    #                                       "for wake word processing.")
-    #                         continue
-
-    #                     except Exception as e:
-    #                         logging.error(f"Wake word processing error: {e}")
-    #                         continue
-
-    #                     # If a wake word is detected                        
-    #                     if wakeword_index >= 0:
-    #                         self.wake_word_detect_time = time.time()
-    #                         wakeword_detected_time = time.time()
-    #                         wakeword_samples_to_remove = int(self.sample_rate * self.wake_word_buffer_duration)
-    #                         self.wakeword_detected = True
-    #                         if self.on_wakeword_detected:
-    #                             self.on_wakeword_detected()
-
-    #                 # Check for voice activity to
-    #                 # trigger the start of recording
-    #                 if ((not self.use_wake_words
-    #                      or not wake_word_activation_delay_passed)
-    #                         and self.start_recording_on_voice_activity) \
-    #                         or self.wakeword_detected:
-
-    #                     if self._is_voice_active():
-    #                         logging.info("voice activity detected")
-
-    #                         self.start()
-
-    #                         self.start_recording_on_voice_activity = False
-
-    #                         # Add the buffered audio
-    #                         # to the recording frames
-    #                         self.frames.extend(list(self.audio_buffer))
-    #                         self.audio_buffer.clear()
-
-    #                         self.silero_vad_model.reset_states()
-    #                     else:
-    #                         data_copy = data[:]
-    #                         self._check_voice_activity(data_copy)
-
-    #                 self.speech_end_silence_start = 0
-
-    #             else:
-    #                 # If we are currently recording
-    #                 if wakeword_samples_to_remove and wakeword_samples_to_remove > 0:
-    #                     # Remove samples from the beginning of self.frames
-    #                     samples_removed = 0
-    #                     while wakeword_samples_to_remove > 0 and self.frames:
-    #                         frame = self.frames[0]
-    #                         frame_samples = len(frame) // 2  # Assuming 16-bit audio
-    #                         if wakeword_samples_to_remove >= frame_samples:
-    #                             self.frames.pop(0)
-    #                             samples_removed += frame_samples
-    #                             wakeword_samples_to_remove -= frame_samples
-    #                         else:
-    #                             self.frames[0] = frame[wakeword_samples_to_remove * 2:]
-    #                             samples_removed += wakeword_samples_to_remove
-    #                             samples_to_remove = 0
-                        
-    #                     wakeword_samples_to_remove = 0
-
-    #                 # Stop the recording if silence is detected after speech
-    #                 if self.stop_recording_on_voice_deactivity:
-    #                     is_speech = (
-    #                         self._is_silero_speech(data) if self.silero_deactivity_detection
-    #                         else self._is_webrtc_speech(data, True)
-    #                     )
-
-    #                     if not self.speech_end_silence_start:
-    #                         str_speech_end_silence_start = "0"
-    #                     else:
-    #                         str_speech_end_silence_start = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
-    #                     logging.debug(f"is_speech: {is_speech}, str_speech_end_silence_start: {str_speech_end_silence_start}")
-
-    #                     if not is_speech:
-    #                         # Voice deactivity was detected, so we start
-    #                         # measuring silence time before stopping recording
-    #                         if self.speech_end_silence_start == 0 and \
-    #                             (time.time() - self.recording_start_time > self.min_length_of_recording):
-
-    #                             self.speech_end_silence_start = time.time()
-
-    #                         if self.speech_end_silence_start and self.early_transcription_on_silence and len(self.frames) > 0 and \
-    #                             (time.time() - self.speech_end_silence_start > self.early_transcription_on_silence) and \
-    #                             self.allowed_to_early_transcribe:
-    #                                 logging.debug("Adding early transcription request")
-    #                                 self.transcribe_count += 1
-    #                                 audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
-    #                                 audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
-    #                                 self.parent_transcription_pipe.send((audio, self.language))
-    #                                 self.allowed_to_early_transcribe = False
-
-    #                     else:
-    #                         if self.speech_end_silence_start:
-    #                             logging.info("Resetting self.speech_end_silence_start")
-    #                             self.speech_end_silence_start = 0
-    #                             self.allowed_to_early_transcribe = True
-
-
-    #                     # Wait for silence to stop recording after speech
-    #                     if self.speech_end_silence_start and time.time() - \
-    #                             self.speech_end_silence_start >= \
-    #                             self.post_speech_silence_duration:
-
-    #                         # Get time in desired format (HH:MM:SS.nnn)
-    #                         silence_start_time = datetime.datetime.fromtimestamp(self.speech_end_silence_start).strftime('%H:%M:%S.%f')[:-3]
-
-    #                         # Calculate time difference
-    #                         time_diff = time.time() - self.speech_end_silence_start
-
-    #                         logging.info(f"voice deactivity detected at {silence_start_time}, "
-    #                                     f"time since silence start: {time_diff:.3f} seconds")
-
-    #                         self.frames.append(data)
-    #                         self.stop()
-    #                         if not self.is_recording:
-    #                             self.speech_end_silence_start = 0
-
-    #                             if not self.use_wake_words:
-    #                                 self.listen_start = time.time()
-    #                                 self._set_state("listening")
-    #                                 self.start_recording_on_voice_activity = True
-    #                         else:
-    #                             failed_stop_attempt = True
-
-    #             if not self.is_recording and was_recording:
-    #                 # Reset after stopping recording to ensure clean state
-    #                 self.stop_recording_on_voice_deactivity = False
-
-    #             if time.time() - self.silero_check_time > 0.1:
-    #                 self.silero_check_time = 0
-
-    #             # Handle wake word timeout (waited to long initiating
-    #             # speech after wake word detection)
-    #             if self.wake_word_detect_time and time.time() - \
-    #                     self.wake_word_detect_time > self.wake_word_timeout:
-
-    #                 self.wake_word_detect_time = 0
-    #                 if self.wakeword_detected and self.on_wakeword_timeout:
-    #                     self.on_wakeword_timeout()
-    #                 self.wakeword_detected = False
-
-    #             was_recording = self.is_recording
-
-    #             if self.is_recording and not failed_stop_attempt:
-    #                 self.frames.append(data)
-
-    #             if not self.is_recording or self.speech_end_silence_start:
-    #                 self.audio_buffer.append(data)
-
-    #     except Exception as e:
-    #         if not self.interrupt_stop_event.is_set():
-    #             logging.error(f"Unhandled exeption in _recording_worker: {e}")
-    #             raise
+        if self.use_extended_logging:
+            logging.debug('Debug: Exiting _recording_worker method')
 
 
     def _realtime_worker(self):